1 // SPDX-License-Identifier: MIT
2 //
3 // Copyright 2024 Advanced Micro Devices, Inc.
4
5
6 #include "dml2_internal_shared_types.h"
7 #include "dml2_core_dcn4_calcs.h"
8 #include "dml2_debug.h"
9 #include "lib_float_math.h"
10 #include "dml_top_types.h"
11
12 #define DML2_MAX_FMT_420_BUFFER_WIDTH 4096
13 #define DML_MAX_NUM_OF_SLICES_PER_DSC 4
14 #define DML_MAX_COMPRESSION_RATIO 4
15 //#define DML_MODE_SUPPORT_USE_DPM_DRAM_BW
16 //#define DML_GLOBAL_PREFETCH_CHECK
17 #define ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE
18 #define DML_MAX_VSTARTUP_START 1023
19
dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type)20 const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type)
21 {
22 switch (bw_type) {
23 case (dml2_core_internal_bw_sdp):
24 return("dml2_core_internal_bw_sdp");
25 case (dml2_core_internal_bw_dram):
26 return("dml2_core_internal_bw_dram");
27 case (dml2_core_internal_bw_max):
28 return("dml2_core_internal_bw_max");
29 default:
30 return("dml2_core_internal_bw_unknown");
31 }
32 }
33
dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type)34 const char *dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type)
35 {
36 switch (dml2_core_internal_soc_state_type) {
37 case (dml2_core_internal_soc_state_sys_idle):
38 return("dml2_core_internal_soc_state_sys_idle");
39 case (dml2_core_internal_soc_state_sys_active):
40 return("dml2_core_internal_soc_state_sys_active");
41 case (dml2_core_internal_soc_state_svp_prefetch):
42 return("dml2_core_internal_soc_state_svp_prefetch");
43 case dml2_core_internal_soc_state_max:
44 default:
45 return("dml2_core_internal_soc_state_unknown");
46 }
47 }
48
dml2_core_div_rem(double dividend,unsigned int divisor,unsigned int * remainder)49 static double dml2_core_div_rem(double dividend, unsigned int divisor, unsigned int *remainder)
50 {
51 *remainder = ((dividend / divisor) - (int)(dividend / divisor) > 0);
52 return dividend / divisor;
53 }
54
dml2_print_mode_support_info(const struct dml2_core_internal_mode_support_info * support,bool fail_only)55 static void dml2_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only)
56 {
57 DML_LOG_VERBOSE("DML: ===================================== \n");
58 DML_LOG_VERBOSE("DML: DML_MODE_SUPPORT_INFO_ST\n");
59 if (!fail_only || support->ScaleRatioAndTapsSupport == 0)
60 DML_LOG_VERBOSE("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport);
61 if (!fail_only || support->SourceFormatPixelAndScanSupport == 0)
62 DML_LOG_VERBOSE("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport);
63 if (!fail_only || support->ViewportSizeSupport == 0)
64 DML_LOG_VERBOSE("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport);
65 if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1)
66 DML_LOG_VERBOSE("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion);
67 if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1)
68 DML_LOG_VERBOSE("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated);
69 if (!fail_only || support->BPPForMultistreamNotIndicated == 1)
70 DML_LOG_VERBOSE("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated);
71 if (!fail_only || support->MultistreamWithHDMIOreDP == 1)
72 DML_LOG_VERBOSE("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP);
73 if (!fail_only || support->ExceededMultistreamSlots == 1)
74 DML_LOG_VERBOSE("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots);
75 if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1)
76 DML_LOG_VERBOSE("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink);
77 if (!fail_only || support->NotEnoughLanesForMSO == 1)
78 DML_LOG_VERBOSE("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO);
79 if (!fail_only || support->P2IWith420 == 1)
80 DML_LOG_VERBOSE("DML: support: P2IWith420 = %d\n", support->P2IWith420);
81 if (!fail_only || support->DSC422NativeNotSupported == 1)
82 DML_LOG_VERBOSE("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported);
83 if (!fail_only || support->DSCSlicesODMModeSupported == 0)
84 DML_LOG_VERBOSE("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported);
85 if (!fail_only || support->NotEnoughDSCUnits == 1)
86 DML_LOG_VERBOSE("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits);
87 if (!fail_only || support->NotEnoughDSCSlices == 1)
88 DML_LOG_VERBOSE("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices);
89 if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1)
90 DML_LOG_VERBOSE("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe);
91 if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1)
92 DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen);
93 if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1)
94 DML_LOG_VERBOSE("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported);
95 if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0)
96 DML_LOG_VERBOSE("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport);
97 if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1)
98 DML_LOG_VERBOSE("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported);
99 if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1)
100 DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState);
101 if (!fail_only || support->ROBSupport == 0)
102 DML_LOG_VERBOSE("DML: support: ROBSupport = %d\n", support->ROBSupport);
103 if (!fail_only || support->OutstandingRequestsSupport == 0)
104 DML_LOG_VERBOSE("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport);
105 if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0)
106 DML_LOG_VERBOSE("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance);
107 if (!fail_only || support->DISPCLK_DPPCLK_Support == 0)
108 DML_LOG_VERBOSE("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support);
109 if (!fail_only || support->TotalAvailablePipesSupport == 0)
110 DML_LOG_VERBOSE("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport);
111 if (!fail_only || support->NumberOfOTGSupport == 0)
112 DML_LOG_VERBOSE("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport);
113 if (!fail_only || support->NumberOfHDMIFRLSupport == 0)
114 DML_LOG_VERBOSE("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport);
115 if (!fail_only || support->NumberOfDP2p0Support == 0)
116 DML_LOG_VERBOSE("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support);
117 if (!fail_only || support->EnoughWritebackUnits == 0)
118 DML_LOG_VERBOSE("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits);
119 if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0)
120 DML_LOG_VERBOSE("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport);
121 if (!fail_only || support->WritebackLatencySupport == 0)
122 DML_LOG_VERBOSE("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport);
123 if (!fail_only || support->CursorSupport == 0)
124 DML_LOG_VERBOSE("DML: support: CursorSupport = %d\n", support->CursorSupport);
125 if (!fail_only || support->PitchSupport == 0)
126 DML_LOG_VERBOSE("DML: support: PitchSupport = %d\n", support->PitchSupport);
127 if (!fail_only || support->ViewportExceedsSurface == 1)
128 DML_LOG_VERBOSE("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface);
129 if (!fail_only || support->PrefetchSupported == 0)
130 DML_LOG_VERBOSE("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported);
131 if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0)
132 DML_LOG_VERBOSE("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport);
133 if (!fail_only || support->AvgBandwidthSupport == 0)
134 DML_LOG_VERBOSE("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport);
135 if (!fail_only || support->DynamicMetadataSupported == 0)
136 DML_LOG_VERBOSE("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported);
137 if (!fail_only || support->VRatioInPrefetchSupported == 0)
138 DML_LOG_VERBOSE("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported);
139 if (!fail_only || support->PTEBufferSizeNotExceeded == 0)
140 DML_LOG_VERBOSE("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded);
141 if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 0)
142 DML_LOG_VERBOSE("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded);
143 if (!fail_only || support->ExceededMALLSize == 1)
144 DML_LOG_VERBOSE("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize);
145 if (!fail_only || support->g6_temp_read_support == 0)
146 DML_LOG_VERBOSE("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support);
147 if (!fail_only || support->ImmediateFlipSupport == 0)
148 DML_LOG_VERBOSE("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport);
149 if (!fail_only || support->LinkCapacitySupport == 0)
150 DML_LOG_VERBOSE("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport);
151
152 if (!fail_only || support->ModeSupport == 0)
153 DML_LOG_VERBOSE("DML: support: ModeSupport = %d\n", support->ModeSupport);
154 DML_LOG_VERBOSE("DML: ===================================== \n");
155 }
156
get_stream_output_bpp(double * out_bpp,const struct dml2_display_cfg * display_cfg)157 static void get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg)
158 {
159 for (unsigned int k = 0; k < display_cfg->num_planes; k++) {
160 double bpc = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.bpc;
161 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_disable) {
162 switch (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format) {
163 case dml2_444:
164 out_bpp[k] = bpc * 3;
165 break;
166 case dml2_s422:
167 out_bpp[k] = bpc * 2;
168 break;
169 case dml2_n422:
170 out_bpp[k] = bpc * 2;
171 break;
172 case dml2_420:
173 default:
174 out_bpp[k] = bpc * 1.5;
175 break;
176 }
177 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable) {
178 out_bpp[k] = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.dsc_compressed_bpp_x16 / 16;
179 } else {
180 out_bpp[k] = 0;
181 }
182 DML_LOG_VERBOSE("DML::%s: k=%d bpc=%f\n", __func__, k, bpc);
183 DML_LOG_VERBOSE("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable);
184 DML_LOG_VERBOSE("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]);
185 }
186 }
187
dml_round_to_multiple(unsigned int num,unsigned int multiple,bool up)188 static unsigned int dml_round_to_multiple(unsigned int num, unsigned int multiple, bool up)
189 {
190 unsigned int remainder;
191
192 if (multiple == 0)
193 return num;
194
195 remainder = num % multiple;
196 if (remainder == 0)
197 return num;
198
199 if (up)
200 return (num + multiple - remainder);
201 else
202 return (num - remainder);
203 }
204
dml_get_num_active_pipes(unsigned int num_planes,const struct core_display_cfg_support_info * cfg_support_info)205 static unsigned int dml_get_num_active_pipes(unsigned int num_planes, const struct core_display_cfg_support_info *cfg_support_info)
206 {
207 unsigned int num_active_pipes = 0;
208
209 for (unsigned int k = 0; k < num_planes; k++) {
210 num_active_pipes = num_active_pipes + (unsigned int)cfg_support_info->plane_support_info[k].dpps_used;
211 }
212
213 DML_LOG_VERBOSE("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes);
214 return num_active_pipes;
215 }
216
dml_calc_pipe_plane_mapping(const struct core_display_cfg_support_info * cfg_support_info,unsigned int * pipe_plane)217 static void dml_calc_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane)
218 {
219 unsigned int pipe_idx = 0;
220
221 for (unsigned int k = 0; k < DML2_MAX_PLANES; ++k) {
222 pipe_plane[k] = __DML2_CALCS_PIPE_NO_PLANE__;
223 }
224
225 for (unsigned int plane_idx = 0; plane_idx < DML2_MAX_PLANES; plane_idx++) {
226 for (int i = 0; i < cfg_support_info->plane_support_info[plane_idx].dpps_used; i++) {
227 pipe_plane[pipe_idx] = plane_idx;
228 pipe_idx++;
229 }
230 }
231 }
232
dml_is_phantom_pipe(const struct dml2_plane_parameters * plane_cfg)233 static bool dml_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg)
234 {
235 bool is_phantom = false;
236
237 if (plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe ||
238 plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) {
239 is_phantom = true;
240 }
241
242 return is_phantom;
243 }
244
dml_get_is_phantom_pipe(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,unsigned int pipe_idx)245 static bool dml_get_is_phantom_pipe(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx)
246 {
247 unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx];
248
249 bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_idx]);
250 DML_LOG_VERBOSE("DML::%s: pipe_idx=%d legacy_svp_config=%0d is_phantom=%d\n", __func__, pipe_idx, display_cfg->plane_descriptors[plane_idx].overrides.legacy_svp_config, is_phantom);
251 return is_phantom;
252 }
253
254 #define dml_get_per_pipe_var_func(variable, type, interval_var) static type dml_get_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx) \
255 { \
256 unsigned int plane_idx; \
257 plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; \
258 return (type) interval_var[plane_idx]; \
259 }
260
261 dml_get_per_pipe_var_func(dpte_group_size_in_bytes, unsigned int, mode_lib->mp.dpte_group_bytes);
262 dml_get_per_pipe_var_func(vm_group_size_in_bytes, unsigned int, mode_lib->mp.vm_group_bytes);
263 dml_get_per_pipe_var_func(swath_height_l, unsigned int, mode_lib->mp.SwathHeightY);
264 dml_get_per_pipe_var_func(swath_height_c, unsigned int, mode_lib->mp.SwathHeightC);
265 dml_get_per_pipe_var_func(dpte_row_height_linear_l, unsigned int, mode_lib->mp.dpte_row_height_linear);
266 dml_get_per_pipe_var_func(dpte_row_height_linear_c, unsigned int, mode_lib->mp.dpte_row_height_linear_chroma);
267
268 dml_get_per_pipe_var_func(vstartup_calculated, unsigned int, mode_lib->mp.VStartup);
269 dml_get_per_pipe_var_func(vupdate_offset, unsigned int, mode_lib->mp.VUpdateOffsetPix);
270 dml_get_per_pipe_var_func(vupdate_width, unsigned int, mode_lib->mp.VUpdateWidthPix);
271 dml_get_per_pipe_var_func(vready_offset, unsigned int, mode_lib->mp.VReadyOffsetPix);
272 dml_get_per_pipe_var_func(pstate_keepout_dst_lines, unsigned int, mode_lib->mp.pstate_keepout_dst_lines);
273 dml_get_per_pipe_var_func(det_stored_buffer_size_l_bytes, unsigned int, mode_lib->mp.DETBufferSizeY);
274 dml_get_per_pipe_var_func(det_stored_buffer_size_c_bytes, unsigned int, mode_lib->mp.DETBufferSizeC);
275 dml_get_per_pipe_var_func(det_buffer_size_kbytes, unsigned int, mode_lib->mp.DETBufferSizeInKByte);
276 dml_get_per_pipe_var_func(surface_size_in_mall_bytes, unsigned int, mode_lib->mp.SurfaceSizeInTheMALL);
277
278 #define dml_get_per_plane_var_func(variable, type, interval_var) static type dml_get_plane_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int plane_idx) \
279 { \
280 return (type) interval_var[plane_idx]; \
281 }
282
283 dml_get_per_plane_var_func(num_mcaches_plane0, unsigned int, mode_lib->ms.num_mcaches_l);
284 dml_get_per_plane_var_func(mcache_row_bytes_plane0, unsigned int, mode_lib->ms.mcache_row_bytes_l);
285 dml_get_per_plane_var_func(mcache_shift_granularity_plane0, unsigned int, mode_lib->ms.mcache_shift_granularity_l);
286 dml_get_per_plane_var_func(num_mcaches_plane1, unsigned int, mode_lib->ms.num_mcaches_c);
287 dml_get_per_plane_var_func(mcache_row_bytes_plane1, unsigned int, mode_lib->ms.mcache_row_bytes_c);
288 dml_get_per_plane_var_func(mcache_shift_granularity_plane1, unsigned int, mode_lib->ms.mcache_shift_granularity_c);
289 dml_get_per_plane_var_func(mall_comb_mcache_l, unsigned int, mode_lib->ms.mall_comb_mcache_l);
290 dml_get_per_plane_var_func(mall_comb_mcache_c, unsigned int, mode_lib->ms.mall_comb_mcache_c);
291 dml_get_per_plane_var_func(lc_comb_mcache, unsigned int, mode_lib->ms.lc_comb_mcache);
292 dml_get_per_plane_var_func(subviewport_lines_needed_in_mall, unsigned int, mode_lib->ms.SubViewportLinesNeededInMALL);
293 dml_get_per_plane_var_func(max_vstartup_lines, unsigned int, mode_lib->ms.MaxVStartupLines);
294
295 #define dml_get_per_plane_array_var_func(variable, type, interval_var) static type dml_get_plane_array_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int plane_idx, unsigned int array_idx) \
296 { \
297 return (type) interval_var[plane_idx][array_idx]; \
298 }
299
300 dml_get_per_plane_array_var_func(mcache_offsets_plane0, unsigned int, mode_lib->ms.mcache_offsets_l);
301 dml_get_per_plane_array_var_func(mcache_offsets_plane1, unsigned int, mode_lib->ms.mcache_offsets_c);
302
303 #define dml_get_var_func(var, type, internal_var) static type dml_get_##var(const struct dml2_core_internal_display_mode_lib *mode_lib) \
304 { \
305 return (type) internal_var; \
306 }
307
308 dml_get_var_func(wm_urgent, double, mode_lib->mp.Watermark.UrgentWatermark);
309 dml_get_var_func(wm_stutter_exit, double, mode_lib->mp.Watermark.StutterExitWatermark);
310 dml_get_var_func(wm_stutter_enter_exit, double, mode_lib->mp.Watermark.StutterEnterPlusExitWatermark);
311 dml_get_var_func(wm_z8_stutter_exit, double, mode_lib->mp.Watermark.Z8StutterExitWatermark);
312 dml_get_var_func(wm_z8_stutter_enter_exit, double, mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark);
313 dml_get_var_func(wm_memory_trip, double, mode_lib->mp.UrgentLatency);
314 dml_get_var_func(meta_trip_memory_us, double, mode_lib->mp.MetaTripToMemory);
315
316 dml_get_var_func(wm_fclk_change, double, mode_lib->mp.Watermark.FCLKChangeWatermark);
317 dml_get_var_func(wm_usr_retraining, double, mode_lib->mp.Watermark.USRRetrainingWatermark);
318 dml_get_var_func(wm_temp_read_or_ppt, double, mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us);
319 dml_get_var_func(wm_dram_clock_change, double, mode_lib->mp.Watermark.DRAMClockChangeWatermark);
320 dml_get_var_func(fraction_of_urgent_bandwidth, double, mode_lib->mp.FractionOfUrgentBandwidth);
321 dml_get_var_func(fraction_of_urgent_bandwidth_imm_flip, double, mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip);
322 dml_get_var_func(fraction_of_urgent_bandwidth_mall, double, mode_lib->mp.FractionOfUrgentBandwidthMALL);
323 dml_get_var_func(wm_writeback_dram_clock_change, double, mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark);
324 dml_get_var_func(wm_writeback_fclk_change, double, mode_lib->mp.Watermark.WritebackFCLKChangeWatermark);
325 dml_get_var_func(stutter_efficiency, double, mode_lib->mp.StutterEfficiency);
326 dml_get_var_func(stutter_efficiency_no_vblank, double, mode_lib->mp.StutterEfficiencyNotIncludingVBlank);
327 dml_get_var_func(stutter_num_bursts, double, mode_lib->mp.NumberOfStutterBurstsPerFrame);
328 dml_get_var_func(stutter_efficiency_z8, double, mode_lib->mp.Z8StutterEfficiency);
329 dml_get_var_func(stutter_efficiency_no_vblank_z8, double, mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank);
330 dml_get_var_func(stutter_num_bursts_z8, double, mode_lib->mp.Z8NumberOfStutterBurstsPerFrame);
331 dml_get_var_func(stutter_period, double, mode_lib->mp.StutterPeriod);
332 dml_get_var_func(stutter_efficiency_z8_bestcase, double, mode_lib->mp.Z8StutterEfficiencyBestCase);
333 dml_get_var_func(stutter_num_bursts_z8_bestcase, double, mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase);
334 dml_get_var_func(stutter_period_bestcase, double, mode_lib->mp.StutterPeriodBestCase);
335 dml_get_var_func(fclk_change_latency, double, mode_lib->mp.MaxActiveFCLKChangeLatencySupported);
336 dml_get_var_func(global_dppclk_khz, double, mode_lib->mp.GlobalDPPCLK * 1000.0);
337
338 dml_get_var_func(sys_active_avg_bw_required_sdp, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
339 dml_get_var_func(sys_active_avg_bw_required_dram, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
340
341 dml_get_var_func(svp_prefetch_avg_bw_required_sdp, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
342 dml_get_var_func(svp_prefetch_avg_bw_required_dram, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
343
344 dml_get_var_func(sys_active_avg_bw_available_sdp, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
345 dml_get_var_func(sys_active_avg_bw_available_dram, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
346
347 dml_get_var_func(svp_prefetch_avg_bw_available_sdp, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
348 dml_get_var_func(svp_prefetch_avg_bw_available_dram, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
349
350 dml_get_var_func(sys_active_urg_bw_available_sdp, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
351 dml_get_var_func(sys_active_urg_bw_available_dram, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
352 dml_get_var_func(sys_active_urg_bw_available_dram_vm_only, double, mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]);
353
354 dml_get_var_func(svp_prefetch_urg_bw_available_sdp, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
355 dml_get_var_func(svp_prefetch_urg_bw_available_dram, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
356 dml_get_var_func(svp_prefetch_urg_bw_available_dram_vm_only, double, mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_svp_prefetch]);
357
358 dml_get_var_func(urgent_latency, double, mode_lib->mp.UrgentLatency);
359 dml_get_var_func(max_urgent_latency_us, double, mode_lib->ms.support.max_urgent_latency_us);
360 dml_get_var_func(max_non_urgent_latency_us, double, mode_lib->ms.support.max_non_urgent_latency_us);
361 dml_get_var_func(avg_non_urgent_latency_us, double, mode_lib->ms.support.avg_non_urgent_latency_us);
362 dml_get_var_func(avg_urgent_latency_us, double, mode_lib->ms.support.avg_urgent_latency_us);
363
364 dml_get_var_func(sys_active_urg_bw_required_sdp, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
365 dml_get_var_func(sys_active_urg_bw_required_dram, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
366 dml_get_var_func(svp_prefetch_urg_bw_required_sdp, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
367 dml_get_var_func(svp_prefetch_urg_bw_required_dram, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
368
369 dml_get_var_func(sys_active_non_urg_required_sdp, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
370 dml_get_var_func(sys_active_non_urg_required_dram, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
371 dml_get_var_func(svp_prefetch_non_urg_bw_required_sdp, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
372 dml_get_var_func(svp_prefetch_non_urg_bw_required_dram, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
373
374 dml_get_var_func(sys_active_urg_bw_required_sdp_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
375 dml_get_var_func(sys_active_urg_bw_required_dram_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
376 dml_get_var_func(svp_prefetch_urg_bw_required_sdp_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
377 dml_get_var_func(svp_prefetch_urg_bw_required_dram_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
378
379 dml_get_var_func(sys_active_non_urg_required_sdp_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
380 dml_get_var_func(sys_active_non_urg_required_dram_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
381 dml_get_var_func(svp_prefetch_non_urg_bw_required_sdp_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
382 dml_get_var_func(svp_prefetch_non_urg_bw_required_dram_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
383
384 dml_get_var_func(comp_buffer_size_kbytes, unsigned int, mode_lib->mp.CompressedBufferSizeInkByte);
385
386 dml_get_var_func(unbounded_request_enabled, bool, mode_lib->mp.UnboundedRequestEnabled);
387 dml_get_var_func(wm_writeback_urgent, double, mode_lib->mp.Watermark.WritebackUrgentWatermark);
388 dml_get_var_func(cstate_max_cap_mode, bool, mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
389 dml_get_var_func(compbuf_reserved_space_64b, unsigned int, mode_lib->mp.compbuf_reserved_space_64b);
390 dml_get_var_func(hw_debug5, bool, mode_lib->mp.hw_debug5);
391 dml_get_var_func(dcfclk_deep_sleep_hysteresis, unsigned int, mode_lib->mp.dcfclk_deep_sleep_hysteresis);
392
CalculateMaxDETAndMinCompressedBufferSize(unsigned int ConfigReturnBufferSizeInKByte,unsigned int ConfigReturnBufferSegmentSizeInKByte,unsigned int ROBBufferSizeInKByte,unsigned int MaxNumDPP,unsigned int nomDETInKByteOverrideEnable,unsigned int nomDETInKByteOverrideValue,bool is_mrq_present,unsigned int * MaxTotalDETInKByte,unsigned int * nomDETInKByte,unsigned int * MinCompressedBufferSizeInKByte)393 static void CalculateMaxDETAndMinCompressedBufferSize(
394 unsigned int ConfigReturnBufferSizeInKByte,
395 unsigned int ConfigReturnBufferSegmentSizeInKByte,
396 unsigned int ROBBufferSizeInKByte,
397 unsigned int MaxNumDPP,
398 unsigned int nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
399 unsigned int nomDETInKByteOverrideValue, // VBA_DELTA
400 bool is_mrq_present,
401
402 // Output
403 unsigned int *MaxTotalDETInKByte,
404 unsigned int *nomDETInKByte,
405 unsigned int *MinCompressedBufferSizeInKByte)
406 {
407 if (is_mrq_present)
408 *MaxTotalDETInKByte = (unsigned int) math_ceil2((double)(ConfigReturnBufferSizeInKByte + ROBBufferSizeInKByte)*4/5, 64);
409 else
410 *MaxTotalDETInKByte = ConfigReturnBufferSizeInKByte - ConfigReturnBufferSegmentSizeInKByte;
411
412 *nomDETInKByte = (unsigned int)(math_floor2((double)*MaxTotalDETInKByte / (double)MaxNumDPP, ConfigReturnBufferSegmentSizeInKByte));
413 *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
414
415 DML_LOG_VERBOSE("DML::%s: is_mrq_present = %u\n", __func__, is_mrq_present);
416 DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
417 DML_LOG_VERBOSE("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, ROBBufferSizeInKByte);
418 DML_LOG_VERBOSE("DML::%s: MaxNumDPP = %u\n", __func__, MaxNumDPP);
419 DML_LOG_VERBOSE("DML::%s: MaxTotalDETInKByte = %u\n", __func__, *MaxTotalDETInKByte);
420 DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, *nomDETInKByte);
421 DML_LOG_VERBOSE("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, *MinCompressedBufferSizeInKByte);
422
423 if (nomDETInKByteOverrideEnable) {
424 *nomDETInKByte = nomDETInKByteOverrideValue;
425 DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u (overrided)\n", __func__, *nomDETInKByte);
426 }
427 }
428
PixelClockAdjustmentForProgressiveToInterlaceUnit(const struct dml2_display_cfg * display_cfg,bool ptoi_supported,double * PixelClockBackEnd)429 static void PixelClockAdjustmentForProgressiveToInterlaceUnit(const struct dml2_display_cfg *display_cfg, bool ptoi_supported, double *PixelClockBackEnd)
430 {
431 //unsigned int num_active_planes = display_cfg->num_planes;
432
433 //Progressive To Interlace Unit Effect
434 for (unsigned int k = 0; k < display_cfg->num_planes; ++k) {
435 PixelClockBackEnd[k] = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
436 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && ptoi_supported == true) {
437 // FIXME_STAGE2... can sw pass the pixel rate for interlaced directly
438 //display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz = 2 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz;
439 }
440 }
441 }
442
dml_is_420(enum dml2_source_format_class source_format)443 static bool dml_is_420(enum dml2_source_format_class source_format)
444 {
445 bool val = false;
446
447 switch (source_format) {
448 case dml2_444_8:
449 val = 0;
450 break;
451 case dml2_444_16:
452 val = 0;
453 break;
454 case dml2_444_32:
455 val = 0;
456 break;
457 case dml2_444_64:
458 val = 0;
459 break;
460 case dml2_420_8:
461 val = 1;
462 break;
463 case dml2_420_10:
464 val = 1;
465 break;
466 case dml2_420_12:
467 val = 1;
468 break;
469 case dml2_422_planar_8:
470 val = 0;
471 break;
472 case dml2_422_planar_10:
473 val = 0;
474 break;
475 case dml2_422_planar_12:
476 val = 0;
477 break;
478 case dml2_422_packed_8:
479 val = 0;
480 break;
481 case dml2_422_packed_10:
482 val = 0;
483 break;
484 case dml2_422_packed_12:
485 val = 0;
486 break;
487 case dml2_rgbe_alpha:
488 val = 0;
489 break;
490 case dml2_rgbe:
491 val = 0;
492 break;
493 case dml2_mono_8:
494 val = 0;
495 break;
496 case dml2_mono_16:
497 val = 0;
498 break;
499 default:
500 DML_ASSERT(0);
501 break;
502 }
503 return val;
504 }
505
dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode)506 static unsigned int dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode)
507 {
508 if (sw_mode == dml2_sw_linear)
509 return 256;
510 else if (sw_mode == dml2_sw_256b_2d)
511 return 256;
512 else if (sw_mode == dml2_sw_4kb_2d)
513 return 4096;
514 else if (sw_mode == dml2_sw_64kb_2d)
515 return 65536;
516 else if (sw_mode == dml2_sw_256kb_2d)
517 return 262144;
518 else if (sw_mode == dml2_gfx11_sw_linear)
519 return 256;
520 else if (sw_mode == dml2_gfx11_sw_64kb_d)
521 return 65536;
522 else if (sw_mode == dml2_gfx11_sw_64kb_d_t)
523 return 65536;
524 else if (sw_mode == dml2_gfx11_sw_64kb_d_x)
525 return 65536;
526 else if (sw_mode == dml2_gfx11_sw_64kb_r_x)
527 return 65536;
528 else if (sw_mode == dml2_gfx11_sw_256kb_d_x)
529 return 262144;
530 else if (sw_mode == dml2_gfx11_sw_256kb_r_x)
531 return 262144;
532 else {
533 DML_ASSERT(0);
534 return 256;
535 }
536 }
537
dml_is_vertical_rotation(enum dml2_rotation_angle Scan)538 static bool dml_is_vertical_rotation(enum dml2_rotation_angle Scan)
539 {
540 bool is_vert = false;
541 if (Scan == dml2_rotation_90 || Scan == dml2_rotation_270) {
542 is_vert = true;
543 } else {
544 is_vert = false;
545 }
546 return is_vert;
547 }
548
dml_get_gfx_version(enum dml2_swizzle_mode sw_mode)549 static unsigned int dml_get_gfx_version(enum dml2_swizzle_mode sw_mode)
550 {
551 unsigned int version = 0;
552
553 if (sw_mode == dml2_sw_linear ||
554 sw_mode == dml2_sw_256b_2d ||
555 sw_mode == dml2_sw_4kb_2d ||
556 sw_mode == dml2_sw_64kb_2d ||
557 sw_mode == dml2_sw_256kb_2d) {
558 version = 12;
559 } else if (sw_mode == dml2_gfx11_sw_linear ||
560 sw_mode == dml2_gfx11_sw_64kb_d ||
561 sw_mode == dml2_gfx11_sw_64kb_d_t ||
562 sw_mode == dml2_gfx11_sw_64kb_d_x ||
563 sw_mode == dml2_gfx11_sw_64kb_r_x ||
564 sw_mode == dml2_gfx11_sw_256kb_d_x ||
565 sw_mode == dml2_gfx11_sw_256kb_r_x) {
566 version = 11;
567 } else {
568 DML_LOG_VERBOSE("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode);
569 DML_ASSERT(0);
570 }
571
572 return version;
573 }
574
CalculateBytePerPixelAndBlockSizes(enum dml2_source_format_class SourcePixelFormat,enum dml2_swizzle_mode SurfaceTiling,unsigned int pitch_y,unsigned int pitch_c,unsigned int * BytePerPixelY,unsigned int * BytePerPixelC,double * BytePerPixelDETY,double * BytePerPixelDETC,unsigned int * BlockHeight256BytesY,unsigned int * BlockHeight256BytesC,unsigned int * BlockWidth256BytesY,unsigned int * BlockWidth256BytesC,unsigned int * MacroTileHeightY,unsigned int * MacroTileHeightC,unsigned int * MacroTileWidthY,unsigned int * MacroTileWidthC,bool * surf_linear128_l,bool * surf_linear128_c)575 static void CalculateBytePerPixelAndBlockSizes(
576 enum dml2_source_format_class SourcePixelFormat,
577 enum dml2_swizzle_mode SurfaceTiling,
578 unsigned int pitch_y,
579 unsigned int pitch_c,
580
581 // Output
582 unsigned int *BytePerPixelY,
583 unsigned int *BytePerPixelC,
584 double *BytePerPixelDETY,
585 double *BytePerPixelDETC,
586 unsigned int *BlockHeight256BytesY,
587 unsigned int *BlockHeight256BytesC,
588 unsigned int *BlockWidth256BytesY,
589 unsigned int *BlockWidth256BytesC,
590 unsigned int *MacroTileHeightY,
591 unsigned int *MacroTileHeightC,
592 unsigned int *MacroTileWidthY,
593 unsigned int *MacroTileWidthC,
594 bool *surf_linear128_l,
595 bool *surf_linear128_c)
596 {
597 *BytePerPixelDETY = 0;
598 *BytePerPixelDETC = 0;
599 *BytePerPixelY = 1;
600 *BytePerPixelC = 1;
601
602 if (SourcePixelFormat == dml2_444_64) {
603 *BytePerPixelDETY = 8;
604 *BytePerPixelDETC = 0;
605 *BytePerPixelY = 8;
606 *BytePerPixelC = 0;
607 } else if (SourcePixelFormat == dml2_444_32 || SourcePixelFormat == dml2_rgbe) {
608 *BytePerPixelDETY = 4;
609 *BytePerPixelDETC = 0;
610 *BytePerPixelY = 4;
611 *BytePerPixelC = 0;
612 } else if (SourcePixelFormat == dml2_444_16 || SourcePixelFormat == dml2_mono_16) {
613 *BytePerPixelDETY = 2;
614 *BytePerPixelDETC = 0;
615 *BytePerPixelY = 2;
616 *BytePerPixelC = 0;
617 } else if (SourcePixelFormat == dml2_444_8 || SourcePixelFormat == dml2_mono_8) {
618 *BytePerPixelDETY = 1;
619 *BytePerPixelDETC = 0;
620 *BytePerPixelY = 1;
621 *BytePerPixelC = 0;
622 } else if (SourcePixelFormat == dml2_rgbe_alpha) {
623 *BytePerPixelDETY = 4;
624 *BytePerPixelDETC = 1;
625 *BytePerPixelY = 4;
626 *BytePerPixelC = 1;
627 } else if (SourcePixelFormat == dml2_420_8) {
628 *BytePerPixelDETY = 1;
629 *BytePerPixelDETC = 2;
630 *BytePerPixelY = 1;
631 *BytePerPixelC = 2;
632 } else if (SourcePixelFormat == dml2_420_12) {
633 *BytePerPixelDETY = 2;
634 *BytePerPixelDETC = 4;
635 *BytePerPixelY = 2;
636 *BytePerPixelC = 4;
637 } else if (SourcePixelFormat == dml2_420_10) {
638 *BytePerPixelDETY = (double)(4.0 / 3);
639 *BytePerPixelDETC = (double)(8.0 / 3);
640 *BytePerPixelY = 2;
641 *BytePerPixelC = 4;
642 } else {
643 DML_LOG_VERBOSE("ERROR: DML::%s: SourcePixelFormat = %u not supported!\n", __func__, SourcePixelFormat);
644 DML_ASSERT(0);
645 }
646
647 DML_LOG_VERBOSE("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat);
648 DML_LOG_VERBOSE("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
649 DML_LOG_VERBOSE("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
650 DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY);
651 DML_LOG_VERBOSE("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC);
652 DML_LOG_VERBOSE("DML::%s: pitch_y = %u\n", __func__, pitch_y);
653 DML_LOG_VERBOSE("DML::%s: pitch_c = %u\n", __func__, pitch_c);
654 DML_LOG_VERBOSE("DML::%s: surf_linear128_l = %u\n", __func__, *surf_linear128_l);
655 DML_LOG_VERBOSE("DML::%s: surf_linear128_c = %u\n", __func__, *surf_linear128_c);
656
657 if (dml_get_gfx_version(SurfaceTiling) == 11) {
658 *surf_linear128_l = 0;
659 *surf_linear128_c = 0;
660 } else {
661 if (SurfaceTiling == dml2_sw_linear) {
662 *surf_linear128_l = (((pitch_y * *BytePerPixelY) % 256) != 0);
663
664 if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha)
665 *surf_linear128_c = (((pitch_c * *BytePerPixelC) % 256) != 0);
666 }
667 }
668
669 if (!(dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha)) {
670 if (SurfaceTiling == dml2_sw_linear) {
671 *BlockHeight256BytesY = 1;
672 } else if (SourcePixelFormat == dml2_444_64) {
673 *BlockHeight256BytesY = 4;
674 } else if (SourcePixelFormat == dml2_444_8) {
675 *BlockHeight256BytesY = 16;
676 } else {
677 *BlockHeight256BytesY = 8;
678 }
679 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
680 *BlockHeight256BytesC = 0;
681 *BlockWidth256BytesC = 0;
682 } else { // dual plane
683 if (SurfaceTiling == dml2_sw_linear) {
684 *BlockHeight256BytesY = 1;
685 *BlockHeight256BytesC = 1;
686 } else if (SourcePixelFormat == dml2_rgbe_alpha) {
687 *BlockHeight256BytesY = 8;
688 *BlockHeight256BytesC = 16;
689 } else if (SourcePixelFormat == dml2_420_8) {
690 *BlockHeight256BytesY = 16;
691 *BlockHeight256BytesC = 8;
692 } else {
693 *BlockHeight256BytesY = 8;
694 *BlockHeight256BytesC = 8;
695 }
696 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
697 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
698 }
699 DML_LOG_VERBOSE("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY);
700 DML_LOG_VERBOSE("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY);
701 DML_LOG_VERBOSE("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC);
702 DML_LOG_VERBOSE("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC);
703
704 if (dml_get_gfx_version(SurfaceTiling) == 11) {
705 if (SurfaceTiling == dml2_gfx11_sw_linear) {
706 *MacroTileHeightY = *BlockHeight256BytesY;
707 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
708 *MacroTileHeightC = *BlockHeight256BytesC;
709 if (*MacroTileHeightC == 0) {
710 *MacroTileWidthC = 0;
711 } else {
712 *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
713 }
714 } else if (SurfaceTiling == dml2_gfx11_sw_64kb_d || SurfaceTiling == dml2_gfx11_sw_64kb_d_t || SurfaceTiling == dml2_gfx11_sw_64kb_d_x || SurfaceTiling == dml2_gfx11_sw_64kb_r_x) {
715 *MacroTileHeightY = 16 * *BlockHeight256BytesY;
716 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
717 *MacroTileHeightC = 16 * *BlockHeight256BytesC;
718 if (*MacroTileHeightC == 0) {
719 *MacroTileWidthC = 0;
720 } else {
721 *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
722 }
723 } else {
724 *MacroTileHeightY = 32 * *BlockHeight256BytesY;
725 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
726 *MacroTileHeightC = 32 * *BlockHeight256BytesC;
727 if (*MacroTileHeightC == 0) {
728 *MacroTileWidthC = 0;
729 } else {
730 *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
731 }
732 }
733 } else {
734 unsigned int macro_tile_size_bytes = dml_get_tile_block_size_bytes(SurfaceTiling);
735 unsigned int macro_tile_scale = 1; // macro tile to 256B req scaling
736
737 if (SurfaceTiling == dml2_sw_linear) {
738 macro_tile_scale = 1;
739 } else if (SurfaceTiling == dml2_sw_4kb_2d) {
740 macro_tile_scale = 4;
741 } else if (SurfaceTiling == dml2_sw_64kb_2d) {
742 macro_tile_scale = 16;
743 } else if (SurfaceTiling == dml2_sw_256kb_2d) {
744 macro_tile_scale = 32;
745 } else {
746 DML_LOG_VERBOSE("ERROR: Invalid SurfaceTiling setting! val=%u\n", SurfaceTiling);
747 DML_ASSERT(0);
748 }
749
750 *MacroTileHeightY = macro_tile_scale * *BlockHeight256BytesY;
751 *MacroTileWidthY = macro_tile_size_bytes / *BytePerPixelY / *MacroTileHeightY;
752 *MacroTileHeightC = macro_tile_scale * *BlockHeight256BytesC;
753 if (*MacroTileHeightC == 0) {
754 *MacroTileWidthC = 0;
755 } else {
756 *MacroTileWidthC = macro_tile_size_bytes / *BytePerPixelC / *MacroTileHeightC;
757 }
758 }
759
760 DML_LOG_VERBOSE("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY);
761 DML_LOG_VERBOSE("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY);
762 DML_LOG_VERBOSE("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC);
763 DML_LOG_VERBOSE("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC);
764 }
765
CalculateSinglePipeDPPCLKAndSCLThroughput(double HRatio,double HRatioChroma,double VRatio,double VRatioChroma,double MaxDCHUBToPSCLThroughput,double MaxPSCLToLBThroughput,double PixelClock,enum dml2_source_format_class SourcePixelFormat,unsigned int HTaps,unsigned int HTapsChroma,unsigned int VTaps,unsigned int VTapsChroma,double * PSCL_THROUGHPUT,double * PSCL_THROUGHPUT_CHROMA,double * DPPCLKUsingSingleDPP)766 static void CalculateSinglePipeDPPCLKAndSCLThroughput(
767 double HRatio,
768 double HRatioChroma,
769 double VRatio,
770 double VRatioChroma,
771 double MaxDCHUBToPSCLThroughput,
772 double MaxPSCLToLBThroughput,
773 double PixelClock,
774 enum dml2_source_format_class SourcePixelFormat,
775 unsigned int HTaps,
776 unsigned int HTapsChroma,
777 unsigned int VTaps,
778 unsigned int VTapsChroma,
779
780 // Output
781 double *PSCL_THROUGHPUT,
782 double *PSCL_THROUGHPUT_CHROMA,
783 double *DPPCLKUsingSingleDPP)
784 {
785 double DPPCLKUsingSingleDPPLuma;
786 double DPPCLKUsingSingleDPPChroma;
787
788 if (HRatio > 1) {
789 *PSCL_THROUGHPUT = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / math_ceil2((double)HTaps / 6.0, 1.0));
790 } else {
791 *PSCL_THROUGHPUT = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
792 }
793
794 DPPCLKUsingSingleDPPLuma = PixelClock * math_max3(VTaps / 6 * math_min2(1, HRatio), HRatio * VRatio / *PSCL_THROUGHPUT, 1);
795
796 if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
797 DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
798
799 if (!dml_is_420(SourcePixelFormat) && SourcePixelFormat != dml2_rgbe_alpha) {
800 *PSCL_THROUGHPUT_CHROMA = 0;
801 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
802 } else {
803 if (HRatioChroma > 1) {
804 *PSCL_THROUGHPUT_CHROMA = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatioChroma / math_ceil2((double)HTapsChroma / 6.0, 1.0));
805 } else {
806 *PSCL_THROUGHPUT_CHROMA = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
807 }
808 DPPCLKUsingSingleDPPChroma = PixelClock * math_max3(VTapsChroma / 6 * math_min2(1, HRatioChroma),
809 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
810 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
811 DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
812 *DPPCLKUsingSingleDPP = math_max2(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
813 }
814 }
815
CalculateSwathWidth(const struct dml2_display_cfg * display_cfg,bool ForceSingleDPP,unsigned int NumberOfActiveSurfaces,enum dml2_odm_mode ODMMode[],unsigned int BytePerPixY[],unsigned int BytePerPixC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],bool surf_linear128_l[],bool surf_linear128_c[],unsigned int DPPPerSurface[],unsigned int req_per_swath_ub_l[],unsigned int req_per_swath_ub_c[],unsigned int SwathWidthSingleDPPY[],unsigned int SwathWidthSingleDPPC[],unsigned int SwathWidthY[],unsigned int SwathWidthC[],unsigned int MaximumSwathHeightY[],unsigned int MaximumSwathHeightC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[])816 static void CalculateSwathWidth(
817 const struct dml2_display_cfg *display_cfg,
818 bool ForceSingleDPP,
819 unsigned int NumberOfActiveSurfaces,
820 enum dml2_odm_mode ODMMode[],
821 unsigned int BytePerPixY[],
822 unsigned int BytePerPixC[],
823 unsigned int Read256BytesBlockHeightY[],
824 unsigned int Read256BytesBlockHeightC[],
825 unsigned int Read256BytesBlockWidthY[],
826 unsigned int Read256BytesBlockWidthC[],
827 bool surf_linear128_l[],
828 bool surf_linear128_c[],
829 unsigned int DPPPerSurface[],
830
831 // Output
832 unsigned int req_per_swath_ub_l[],
833 unsigned int req_per_swath_ub_c[],
834 unsigned int SwathWidthSingleDPPY[], // post-rotated plane width
835 unsigned int SwathWidthSingleDPPC[],
836 unsigned int SwathWidthY[], // per-pipe
837 unsigned int SwathWidthC[], // per-pipe
838 unsigned int MaximumSwathHeightY[],
839 unsigned int MaximumSwathHeightC[],
840 unsigned int swath_width_luma_ub[], // per-pipe
841 unsigned int swath_width_chroma_ub[]) // per-pipe
842 {
843 (void)BytePerPixY;
844 enum dml2_odm_mode MainSurfaceODMMode;
845 double odm_hactive_factor = 1.0;
846 unsigned int req_width_horz_y;
847 unsigned int req_width_horz_c;
848 unsigned int surface_width_ub_l;
849 unsigned int surface_height_ub_l;
850 unsigned int surface_width_ub_c;
851 unsigned int surface_height_ub_c;
852
853 DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
854 DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
855
856 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
857 if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
858 SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
859 } else {
860 SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
861 }
862
863 DML_LOG_VERBOSE("DML::%s: k=%u ViewportWidth=%lu\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width);
864 DML_LOG_VERBOSE("DML::%s: k=%u ViewportHeight=%lu\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height);
865 DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
866
867 MainSurfaceODMMode = ODMMode[k];
868
869 if (ForceSingleDPP) {
870 SwathWidthY[k] = SwathWidthSingleDPPY[k];
871 } else {
872 if (MainSurfaceODMMode == dml2_odm_mode_combine_4to1)
873 odm_hactive_factor = 4.0;
874 else if (MainSurfaceODMMode == dml2_odm_mode_combine_3to1)
875 odm_hactive_factor = 3.0;
876 else if (MainSurfaceODMMode == dml2_odm_mode_combine_2to1)
877 odm_hactive_factor = 2.0;
878
879 if (MainSurfaceODMMode == dml2_odm_mode_combine_4to1 || MainSurfaceODMMode == dml2_odm_mode_combine_3to1 || MainSurfaceODMMode == dml2_odm_mode_combine_2to1) {
880 SwathWidthY[k] = (unsigned int)(math_min2((double)SwathWidthSingleDPPY[k], math_round((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active / odm_hactive_factor * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio)));
881 } else if (DPPPerSurface[k] == 2) {
882 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
883 } else {
884 SwathWidthY[k] = SwathWidthSingleDPPY[k];
885 }
886 }
887
888 DML_LOG_VERBOSE("DML::%s: k=%u HActive=%lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active);
889 DML_LOG_VERBOSE("DML::%s: k=%u HRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
890 DML_LOG_VERBOSE("DML::%s: k=%u MainSurfaceODMMode=%u\n", __func__, k, MainSurfaceODMMode);
891 DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthSingleDPPY=%u\n", __func__, k, SwathWidthSingleDPPY[k]);
892 DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthY=%u\n", __func__, k, SwathWidthY[k]);
893
894 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) {
895 SwathWidthC[k] = SwathWidthY[k] / 2;
896 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
897 } else {
898 SwathWidthC[k] = SwathWidthY[k];
899 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
900 }
901
902 if (ForceSingleDPP == true) {
903 SwathWidthY[k] = SwathWidthSingleDPPY[k];
904 SwathWidthC[k] = SwathWidthSingleDPPC[k];
905 }
906
907 req_width_horz_y = Read256BytesBlockWidthY[k];
908 req_width_horz_c = Read256BytesBlockWidthC[k];
909
910 if (surf_linear128_l[k])
911 req_width_horz_y = req_width_horz_y / 2;
912
913 if (surf_linear128_c[k])
914 req_width_horz_c = req_width_horz_c / 2;
915
916 surface_width_ub_l = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane0.width, req_width_horz_y);
917 surface_height_ub_l = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane0.height, Read256BytesBlockHeightY[k]);
918 surface_width_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.width, req_width_horz_c);
919 surface_height_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.height, Read256BytesBlockHeightC[k]);
920
921 DML_LOG_VERBOSE("DML::%s: k=%u surface_width_ub_l=%u\n", __func__, k, surface_width_ub_l);
922 DML_LOG_VERBOSE("DML::%s: k=%u surface_height_ub_l=%u\n", __func__, k, surface_height_ub_l);
923 DML_LOG_VERBOSE("DML::%s: k=%u surface_width_ub_c=%u\n", __func__, k, surface_width_ub_c);
924 DML_LOG_VERBOSE("DML::%s: k=%u surface_height_ub_c=%u\n", __func__, k, surface_height_ub_c);
925 DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y);
926 DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c);
927 DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockWidthY=%u\n", __func__, k, Read256BytesBlockWidthY[k]);
928 DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockHeightY=%u\n", __func__, k, Read256BytesBlockHeightY[k]);
929 DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockWidthC=%u\n", __func__, k, Read256BytesBlockWidthC[k]);
930 DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockHeightC=%u\n", __func__, k, Read256BytesBlockHeightC[k]);
931 DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y);
932 DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c);
933 DML_LOG_VERBOSE("DML::%s: k=%u ViewportStationary=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.stationary);
934 DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
935
936 req_per_swath_ub_l[k] = 0;
937 req_per_swath_ub_c[k] = 0;
938 if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
939 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
940 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
941 if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
942 swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_width_ub_l, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start + SwathWidthY[k] + req_width_horz_y - 1, req_width_horz_y) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start, req_width_horz_y)));
943 } else {
944 swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_width_ub_l, math_ceil2((double)SwathWidthY[k] - 1, req_width_horz_y) + req_width_horz_y));
945 }
946 req_per_swath_ub_l[k] = swath_width_luma_ub[k] / req_width_horz_y;
947
948 if (BytePerPixC[k] > 0) {
949 if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
950 swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_width_ub_c, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start + SwathWidthC[k] + req_width_horz_c - 1, req_width_horz_c) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start, req_width_horz_c)));
951 } else {
952 swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_width_ub_c, math_ceil2((double)SwathWidthC[k] - 1, req_width_horz_c) + req_width_horz_c));
953 }
954 req_per_swath_ub_c[k] = swath_width_chroma_ub[k] / req_width_horz_c;
955 } else {
956 swath_width_chroma_ub[k] = 0;
957 }
958 } else {
959 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
960 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
961
962 if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
963 swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_height_ub_l, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start + SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, Read256BytesBlockHeightY[k]) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start, Read256BytesBlockHeightY[k])));
964 } else {
965 swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_height_ub_l, math_ceil2((double)SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]));
966 }
967 req_per_swath_ub_l[k] = swath_width_luma_ub[k] / Read256BytesBlockHeightY[k];
968 if (BytePerPixC[k] > 0) {
969 if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
970 swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_height_ub_c, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start + SwathWidthC[k] + Read256BytesBlockHeightC[k] - 1, Read256BytesBlockHeightC[k]) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start, Read256BytesBlockHeightC[k])));
971 } else {
972 swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_height_ub_c, math_ceil2((double)SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]));
973 }
974 req_per_swath_ub_c[k] = swath_width_chroma_ub[k] / Read256BytesBlockHeightC[k];
975 } else {
976 swath_width_chroma_ub[k] = 0;
977 }
978 }
979
980 DML_LOG_VERBOSE("DML::%s: k=%u swath_width_luma_ub=%u\n", __func__, k, swath_width_luma_ub[k]);
981 DML_LOG_VERBOSE("DML::%s: k=%u swath_width_chroma_ub=%u\n", __func__, k, swath_width_chroma_ub[k]);
982 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightY=%u\n", __func__, k, MaximumSwathHeightY[k]);
983 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightC=%u\n", __func__, k, MaximumSwathHeightC[k]);
984 DML_LOG_VERBOSE("DML::%s: k=%u req_per_swath_ub_l=%u\n", __func__, k, req_per_swath_ub_l[k]);
985 DML_LOG_VERBOSE("DML::%s: k=%u req_per_swath_ub_c=%u\n", __func__, k, req_per_swath_ub_c[k]);
986 }
987 }
988
UnboundedRequest(bool unb_req_force_en,bool unb_req_force_val,unsigned int TotalNumberOfActiveDPP,bool NoChromaOrLinear)989 static bool UnboundedRequest(bool unb_req_force_en, bool unb_req_force_val, unsigned int TotalNumberOfActiveDPP, bool NoChromaOrLinear)
990 {
991 bool unb_req_ok = false;
992 bool unb_req_en = false;
993
994 unb_req_ok = (TotalNumberOfActiveDPP == 1 && NoChromaOrLinear);
995 unb_req_en = unb_req_ok;
996
997 if (unb_req_force_en) {
998 unb_req_en = unb_req_force_val && unb_req_ok;
999 }
1000 DML_LOG_VERBOSE("DML::%s: unb_req_force_en = %u\n", __func__, unb_req_force_en);
1001 DML_LOG_VERBOSE("DML::%s: unb_req_force_val = %u\n", __func__, unb_req_force_val);
1002 DML_LOG_VERBOSE("DML::%s: unb_req_ok = %u\n", __func__, unb_req_ok);
1003 DML_LOG_VERBOSE("DML::%s: unb_req_en = %u\n", __func__, unb_req_en);
1004 return unb_req_en;
1005 }
1006
CalculateDETBufferSize(struct dml2_core_shared_CalculateDETBufferSize_locals * l,const struct dml2_display_cfg * display_cfg,bool ForceSingleDPP,unsigned int NumberOfActiveSurfaces,bool UnboundedRequestEnabled,unsigned int nomDETInKByte,unsigned int MaxTotalDETInKByte,unsigned int ConfigReturnBufferSizeInKByte,unsigned int MinCompressedBufferSizeInKByte,unsigned int ConfigReturnBufferSegmentSizeInkByte,unsigned int CompressedBufferSegmentSizeInkByte,double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int full_swath_bytes_l[],unsigned int full_swath_bytes_c[],unsigned int DPPPerSurface[],unsigned int DETBufferSizeInKByte[],unsigned int * CompressedBufferSizeInkByte)1007 static void CalculateDETBufferSize(
1008 struct dml2_core_shared_CalculateDETBufferSize_locals *l,
1009 const struct dml2_display_cfg *display_cfg,
1010 bool ForceSingleDPP,
1011 unsigned int NumberOfActiveSurfaces,
1012 bool UnboundedRequestEnabled,
1013 unsigned int nomDETInKByte,
1014 unsigned int MaxTotalDETInKByte,
1015 unsigned int ConfigReturnBufferSizeInKByte,
1016 unsigned int MinCompressedBufferSizeInKByte,
1017 unsigned int ConfigReturnBufferSegmentSizeInkByte,
1018 unsigned int CompressedBufferSegmentSizeInkByte,
1019 double ReadBandwidthLuma[],
1020 double ReadBandwidthChroma[],
1021 unsigned int full_swath_bytes_l[],
1022 unsigned int full_swath_bytes_c[],
1023 unsigned int DPPPerSurface[],
1024 // Output
1025 unsigned int DETBufferSizeInKByte[],
1026 unsigned int *CompressedBufferSizeInkByte)
1027 {
1028 memset(l, 0, sizeof(struct dml2_core_shared_CalculateDETBufferSize_locals));
1029
1030 bool DETPieceAssignedToThisSurfaceAlready[DML2_MAX_PLANES];
1031 bool NextPotentialSurfaceToAssignDETPieceFound;
1032 bool MinimizeReallocationSuccess = false;
1033
1034 DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
1035 DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
1036 DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
1037 DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, UnboundedRequestEnabled);
1038 DML_LOG_VERBOSE("DML::%s: MaxTotalDETInKByte = %u\n", __func__, MaxTotalDETInKByte);
1039 DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
1040 DML_LOG_VERBOSE("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, MinCompressedBufferSizeInKByte);
1041 DML_LOG_VERBOSE("DML::%s: CompressedBufferSegmentSizeInkByte = %u\n", __func__, CompressedBufferSegmentSizeInkByte);
1042
1043 // Note: Will use default det size if that fits 2 swaths
1044 if (UnboundedRequestEnabled) {
1045 if (display_cfg->plane_descriptors[0].overrides.det_size_override_kb > 0) {
1046 DETBufferSizeInKByte[0] = display_cfg->plane_descriptors[0].overrides.det_size_override_kb;
1047 } else {
1048 DETBufferSizeInKByte[0] = (unsigned int)math_max2(128.0, math_ceil2(2.0 * ((double)full_swath_bytes_l[0] + (double)full_swath_bytes_c[0]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte));
1049 }
1050 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
1051 } else {
1052 l->DETBufferSizePoolInKByte = MaxTotalDETInKByte;
1053 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1054 DETBufferSizeInKByte[k] = 0;
1055 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) {
1056 l->max_minDET = nomDETInKByte - ConfigReturnBufferSegmentSizeInkByte;
1057 } else {
1058 l->max_minDET = nomDETInKByte;
1059 }
1060 l->minDET = 128;
1061 l->minDET_pipe = 0;
1062
1063 // add DET resource until can hold 2 full swaths
1064 while (l->minDET <= l->max_minDET && l->minDET_pipe == 0) {
1065 if (2.0 * ((double)full_swath_bytes_l[k] + (double)full_swath_bytes_c[k]) / 1024.0 <= l->minDET)
1066 l->minDET_pipe = l->minDET;
1067 l->minDET = l->minDET + ConfigReturnBufferSegmentSizeInkByte;
1068 }
1069
1070 DML_LOG_VERBOSE("DML::%s: k=%u minDET = %u\n", __func__, k, l->minDET);
1071 DML_LOG_VERBOSE("DML::%s: k=%u max_minDET = %u\n", __func__, k, l->max_minDET);
1072 DML_LOG_VERBOSE("DML::%s: k=%u minDET_pipe = %u\n", __func__, k, l->minDET_pipe);
1073 DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, full_swath_bytes_l[k]);
1074 DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, full_swath_bytes_c[k]);
1075
1076 if (l->minDET_pipe == 0) {
1077 l->minDET_pipe = (unsigned int)(math_max2(128, math_ceil2(((double)full_swath_bytes_l[k] + (double)full_swath_bytes_c[k]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte)));
1078 DML_LOG_VERBOSE("DML::%s: k=%u minDET_pipe = %u (assume each plane take half DET)\n", __func__, k, l->minDET_pipe);
1079 }
1080
1081 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
1082 DETBufferSizeInKByte[k] = 0;
1083 } else if (display_cfg->plane_descriptors[k].overrides.det_size_override_kb > 0) {
1084 DETBufferSizeInKByte[k] = display_cfg->plane_descriptors[k].overrides.det_size_override_kb;
1085 l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * display_cfg->plane_descriptors[k].overrides.det_size_override_kb;
1086 } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * l->minDET_pipe <= l->DETBufferSizePoolInKByte) {
1087 DETBufferSizeInKByte[k] = l->minDET_pipe;
1088 l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * l->minDET_pipe;
1089 }
1090
1091 DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]);
1092 DML_LOG_VERBOSE("DML::%s: k=%u DETSizeOverride = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.det_size_override_kb);
1093 DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]);
1094 DML_LOG_VERBOSE("DML::%s: DETBufferSizePoolInKByte = %u\n", __func__, l->DETBufferSizePoolInKByte);
1095 }
1096
1097 if (display_cfg->minimize_det_reallocation) {
1098 MinimizeReallocationSuccess = true;
1099 // To minimize det reallocation, we don't distribute based on each surfaces bandwidth proportional to the global
1100 // but rather distribute DET across streams proportionally based on pixel rate, and only distribute based on
1101 // bandwidth between the planes on the same stream. This ensures that large scale re-distribution only on a
1102 // stream count and/or pixel rate change, which is must less likely then general bandwidth changes per plane.
1103
1104 // Calculate total pixel rate
1105 for (unsigned int k = 0; k < display_cfg->num_streams; ++k) {
1106 l->TotalPixelRate += display_cfg->stream_descriptors[k].timing.pixel_clock_khz;
1107 }
1108
1109 // Calculate per stream DET budget
1110 for (unsigned int k = 0; k < display_cfg->num_streams; ++k) {
1111 l->DETBudgetPerStream[k] = (unsigned int)((double) display_cfg->stream_descriptors[k].timing.pixel_clock_khz * MaxTotalDETInKByte / l->TotalPixelRate);
1112 l->RemainingDETBudgetPerStream[k] = l->DETBudgetPerStream[k];
1113 }
1114
1115 // Calculate the per stream total bandwidth
1116 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1117 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
1118 l->TotalBandwidthPerStream[display_cfg->plane_descriptors[k].stream_index] += (unsigned int)(ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1119
1120 // Check the minimum can be satisfied by budget
1121 if (l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] >= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1122 l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] -= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k]);
1123 } else {
1124 MinimizeReallocationSuccess = false;
1125 break;
1126 }
1127 }
1128 }
1129
1130 if (MinimizeReallocationSuccess) {
1131 // Since a fixed budget per stream is sufficient to satisfy the minimums, just re-distribute each streams
1132 // budget proportionally across its planes
1133 l->ResidualDETAfterRounding = MaxTotalDETInKByte;
1134
1135 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1136 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
1137 l->IdealDETBudget = (unsigned int)(((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / l->TotalBandwidthPerStream[display_cfg->plane_descriptors[k].stream_index])
1138 * l->DETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index]);
1139
1140 if (l->IdealDETBudget > DETBufferSizeInKByte[k]) {
1141 l->DeltaDETBudget = l->IdealDETBudget - DETBufferSizeInKByte[k];
1142 if (l->DeltaDETBudget > l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index])
1143 l->DeltaDETBudget = l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index];
1144
1145 /* split the additional budgeted DET among the pipes per plane */
1146 DETBufferSizeInKByte[k] += (unsigned int)((double)l->DeltaDETBudget / (ForceSingleDPP ? 1 : DPPPerSurface[k]));
1147 l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] -= l->DeltaDETBudget;
1148 }
1149
1150 // Round down to segment size
1151 DETBufferSizeInKByte[k] = (DETBufferSizeInKByte[k] / ConfigReturnBufferSegmentSizeInkByte) * ConfigReturnBufferSegmentSizeInkByte;
1152
1153 l->ResidualDETAfterRounding -= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k]);
1154 }
1155 }
1156 }
1157 }
1158
1159 if (!MinimizeReallocationSuccess) {
1160 l->TotalBandwidth = 0;
1161 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1162 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
1163 l->TotalBandwidth = l->TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
1164 }
1165 }
1166 DML_LOG_VERBOSE("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
1167 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1168 DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]);
1169 }
1170 DML_LOG_VERBOSE("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
1171 DML_LOG_VERBOSE("DML::%s: TotalBandwidth = %f\n", __func__, l->TotalBandwidth);
1172 l->BandwidthOfSurfacesNotAssignedDETPiece = l->TotalBandwidth;
1173 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1174
1175 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
1176 DETPieceAssignedToThisSurfaceAlready[k] = true;
1177 } else if (display_cfg->plane_descriptors[k].overrides.det_size_override_kb > 0 || (((double)(ForceSingleDPP ? 1 : DPPPerSurface[k]) * (double)DETBufferSizeInKByte[k] / (double)MaxTotalDETInKByte) >= ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / l->TotalBandwidth))) {
1178 DETPieceAssignedToThisSurfaceAlready[k] = true;
1179 l->BandwidthOfSurfacesNotAssignedDETPiece = l->BandwidthOfSurfacesNotAssignedDETPiece - ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1180 } else {
1181 DETPieceAssignedToThisSurfaceAlready[k] = false;
1182 }
1183 DML_LOG_VERBOSE("DML::%s: k=%u DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, k, DETPieceAssignedToThisSurfaceAlready[k]);
1184 DML_LOG_VERBOSE("DML::%s: k=%u BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, l->BandwidthOfSurfacesNotAssignedDETPiece);
1185 }
1186
1187 for (unsigned int j = 0; j < NumberOfActiveSurfaces; ++j) {
1188 NextPotentialSurfaceToAssignDETPieceFound = false;
1189 l->NextSurfaceToAssignDETPiece = 0;
1190
1191 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1192 DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthLuma[k] = %f\n", __func__, j, k, ReadBandwidthLuma[k]);
1193 DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthChroma[k] = %f\n", __func__, j, k, ReadBandwidthChroma[k]);
1194 DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]);
1195 DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]);
1196 DML_LOG_VERBOSE("DML::%s: j=%u k=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, k, l->NextSurfaceToAssignDETPiece);
1197 if (!DETPieceAssignedToThisSurfaceAlready[k] && (!NextPotentialSurfaceToAssignDETPieceFound ||
1198 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece])) {
1199 l->NextSurfaceToAssignDETPiece = k;
1200 NextPotentialSurfaceToAssignDETPieceFound = true;
1201 }
1202 DML_LOG_VERBOSE("DML::%s: j=%u k=%u, DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
1203 DML_LOG_VERBOSE("DML::%s: j=%u k=%u, NextPotentialSurfaceToAssignDETPieceFound = %u\n", __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
1204 }
1205
1206 if (NextPotentialSurfaceToAssignDETPieceFound) {
1207 l->NextDETBufferPieceInKByte = (unsigned int)(math_min2(
1208 math_round((double)l->DETBufferSizePoolInKByte * (ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]) / l->BandwidthOfSurfacesNotAssignedDETPiece /
1209 ((ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte))
1210 * (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte,
1211 math_floor2((double)l->DETBufferSizePoolInKByte, (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte)));
1212
1213 DML_LOG_VERBOSE("DML::%s: j=%u, DETBufferSizePoolInKByte = %u\n", __func__, j, l->DETBufferSizePoolInKByte);
1214 DML_LOG_VERBOSE("DML::%s: j=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, l->NextSurfaceToAssignDETPiece);
1215 DML_LOG_VERBOSE("DML::%s: j=%u, ReadBandwidthLuma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]);
1216 DML_LOG_VERBOSE("DML::%s: j=%u, ReadBandwidthChroma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]);
1217 DML_LOG_VERBOSE("DML::%s: j=%u, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, j, l->BandwidthOfSurfacesNotAssignedDETPiece);
1218 DML_LOG_VERBOSE("DML::%s: j=%u, NextDETBufferPieceInKByte = %u\n", __func__, j, l->NextDETBufferPieceInKByte);
1219 DML_LOG_VERBOSE("DML::%s: j=%u, DETBufferSizeInKByte[%u] increases from %u ", __func__, j, l->NextSurfaceToAssignDETPiece, DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]);
1220
1221 DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] = DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] + l->NextDETBufferPieceInKByte / (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]);
1222 DML_LOG_VERBOSE("to %u\n", DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]);
1223
1224 l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - l->NextDETBufferPieceInKByte;
1225 DETPieceAssignedToThisSurfaceAlready[l->NextSurfaceToAssignDETPiece] = true;
1226 l->BandwidthOfSurfacesNotAssignedDETPiece = l->BandwidthOfSurfacesNotAssignedDETPiece - (ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]);
1227 }
1228 }
1229 }
1230 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1231 }
1232 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByte / ConfigReturnBufferSegmentSizeInkByte;
1233
1234 DML_LOG_VERBOSE("DML::%s: --- After bandwidth adjustment ---\n", __func__);
1235 DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *CompressedBufferSizeInkByte);
1236 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1237 DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u (TotalReadBandWidth=%f)\n", __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1238 }
1239 }
1240
CalculateRequiredDispclk(enum dml2_odm_mode ODMMode,double PixelClock,bool isTMDS420)1241 static double CalculateRequiredDispclk(
1242 enum dml2_odm_mode ODMMode,
1243 double PixelClock,
1244 bool isTMDS420)
1245 {
1246 double DispClk;
1247
1248 if (ODMMode == dml2_odm_mode_combine_4to1) {
1249 DispClk = PixelClock / 4.0;
1250 } else if (ODMMode == dml2_odm_mode_combine_3to1) {
1251 DispClk = PixelClock / 3.0;
1252 } else if (ODMMode == dml2_odm_mode_combine_2to1) {
1253 DispClk = PixelClock / 2.0;
1254 } else {
1255 DispClk = PixelClock;
1256 }
1257
1258 if (isTMDS420) {
1259 double TMDS420MinPixClock = PixelClock / 2.0;
1260 DispClk = math_max2(DispClk, TMDS420MinPixClock);
1261 }
1262
1263 return DispClk;
1264 }
1265
TruncToValidBPP(struct dml2_core_shared_TruncToValidBPP_locals * l,double LinkBitRate,unsigned int Lanes,unsigned int HTotal,unsigned int HActive,double PixelClock,double DesiredBPP,bool DSCEnable,enum dml2_output_encoder_class Output,enum dml2_output_format_class Format,unsigned int DSCInputBitPerComponent,unsigned int DSCSlices,unsigned int AudioRate,unsigned int AudioLayout,enum dml2_odm_mode ODMModeNoDSC,enum dml2_odm_mode ODMModeDSC,unsigned int * RequiredSlots)1266 static double TruncToValidBPP(
1267 struct dml2_core_shared_TruncToValidBPP_locals *l,
1268 double LinkBitRate,
1269 unsigned int Lanes,
1270 unsigned int HTotal,
1271 unsigned int HActive,
1272 double PixelClock,
1273 double DesiredBPP,
1274 bool DSCEnable,
1275 enum dml2_output_encoder_class Output,
1276 enum dml2_output_format_class Format,
1277 unsigned int DSCInputBitPerComponent,
1278 unsigned int DSCSlices,
1279 unsigned int AudioRate,
1280 unsigned int AudioLayout,
1281 enum dml2_odm_mode ODMModeNoDSC,
1282 enum dml2_odm_mode ODMModeDSC,
1283
1284 // Output
1285 unsigned int *RequiredSlots)
1286 {
1287 (void)DSCInputBitPerComponent;
1288 (void)RequiredSlots;
1289 double MaxLinkBPP;
1290 unsigned int MinDSCBPP;
1291 double MaxDSCBPP;
1292 unsigned int NonDSCBPP0;
1293 unsigned int NonDSCBPP1;
1294 unsigned int NonDSCBPP2;
1295 enum dml2_odm_mode ODMMode;
1296
1297 if (Format == dml2_420) {
1298 NonDSCBPP0 = 12;
1299 NonDSCBPP1 = 15;
1300 NonDSCBPP2 = 18;
1301 MinDSCBPP = 6;
1302 MaxDSCBPP = 16;
1303 } else if (Format == dml2_444) {
1304 NonDSCBPP0 = 24;
1305 NonDSCBPP1 = 30;
1306 NonDSCBPP2 = 36;
1307 MinDSCBPP = 8;
1308 MaxDSCBPP = 16;
1309 } else {
1310
1311 if (Output == dml2_hdmi || Output == dml2_hdmifrl) {
1312 NonDSCBPP0 = 24;
1313 NonDSCBPP1 = 24;
1314 NonDSCBPP2 = 24;
1315 } else {
1316 NonDSCBPP0 = 16;
1317 NonDSCBPP1 = 20;
1318 NonDSCBPP2 = 24;
1319 }
1320 if (Format == dml2_n422 || Output == dml2_hdmifrl) {
1321 MinDSCBPP = 7;
1322 MaxDSCBPP = 16;
1323 } else {
1324 MinDSCBPP = 8;
1325 MaxDSCBPP = 16;
1326 }
1327 }
1328
1329 if (Output == dml2_dp2p0) {
1330 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128.0 / 132.0 * 383.0 / 384.0 * 65536.0 / 65540.0;
1331 } else if (DSCEnable && Output == dml2_dp) {
1332 MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock * (1 - 2.4 / 100);
1333 } else {
1334 MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock;
1335 }
1336
1337 ODMMode = DSCEnable ? ODMModeDSC : ODMModeNoDSC;
1338
1339 if (ODMMode == dml2_odm_mode_split_1to2) {
1340 MaxLinkBPP = 2 * MaxLinkBPP;
1341 }
1342
1343 if (DesiredBPP == 0) {
1344 if (DSCEnable) {
1345 if (MaxLinkBPP < MinDSCBPP) {
1346 return __DML2_CALCS_DPP_INVALID__;
1347 } else if (MaxLinkBPP >= MaxDSCBPP) {
1348 return MaxDSCBPP;
1349 } else {
1350 return math_floor2(16.0 * MaxLinkBPP, 1.0) / 16.0;
1351 }
1352 } else {
1353 if (MaxLinkBPP >= NonDSCBPP2) {
1354 return NonDSCBPP2;
1355 } else if (MaxLinkBPP >= NonDSCBPP1) {
1356 return NonDSCBPP1;
1357 } else if (MaxLinkBPP >= NonDSCBPP0) {
1358 return NonDSCBPP0;
1359 } else {
1360 return __DML2_CALCS_DPP_INVALID__;
1361 }
1362 }
1363 } else {
1364 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0)) ||
1365 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
1366 return __DML2_CALCS_DPP_INVALID__;
1367 } else {
1368 return DesiredBPP;
1369 }
1370 }
1371 }
1372
1373 // updated for dcn4
dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum dml2_output_format_class pixelFormat,enum dml2_output_encoder_class Output)1374 static unsigned int dscceComputeDelay(
1375 unsigned int bpc,
1376 double BPP,
1377 unsigned int sliceWidth,
1378 unsigned int numSlices,
1379 enum dml2_output_format_class pixelFormat,
1380 enum dml2_output_encoder_class Output)
1381 {
1382 // valid bpc = source bits per component in the set of {8, 10, 12}
1383 // valid bpp = increments of 1/16 of a bit
1384 // min = 6/7/8 in N420/N422/444, respectively
1385 // max = such that compression is 1:1
1386 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
1387 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
1388 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
1389
1390 // fixed value
1391 unsigned int rcModelSize = 8192;
1392
1393 // N422/N420 operate at 2 pixels per clock
1394 unsigned int pixelsPerClock, padding_pixels, ssm_group_priming_delay, ssm_pipeline_delay, obsm_pipeline_delay, slice_padded_pixels, ixd_plus_padding, ixd_plus_padding_groups, cycles_per_group, group_delay, pipeline_delay, pixels, additional_group_delay, lines_to_reach_ixd, groups_to_reach_ixd, slice_width_groups, initial_xmit_delay, number_of_lines_to_reach_ixd, slice_width_modified;
1395
1396 if (pixelFormat == dml2_420)
1397 pixelsPerClock = 2;
1398 // #all other modes operate at 1 pixel per clock
1399 else if (pixelFormat == dml2_444)
1400 pixelsPerClock = 1;
1401 else if (pixelFormat == dml2_n422 || Output == dml2_hdmifrl)
1402 pixelsPerClock = 2;
1403 else
1404 pixelsPerClock = 1;
1405
1406 //initial transmit delay as per PPS
1407 initial_xmit_delay = (unsigned int)(math_round(rcModelSize / 2.0 / BPP / pixelsPerClock));
1408
1409 //slice width as seen by dscc_bcl in pixels or pixels pairs (depending on number of pixels per pixel container based on pixel format)
1410 slice_width_modified = (pixelFormat == dml2_444 || pixelFormat == dml2_420 || Output == dml2_hdmifrl) ? sliceWidth / 2 : sliceWidth;
1411
1412 padding_pixels = ((slice_width_modified % 3) != 0) ? (3 - (slice_width_modified % 3)) * (initial_xmit_delay / slice_width_modified) : 0;
1413
1414 if ((3.0 * pixelsPerClock * BPP) >= ((double)((initial_xmit_delay + 2) / 3) * (double)(3 + (pixelFormat == dml2_n422)))) {
1415 if ((initial_xmit_delay + padding_pixels) % 3 == 1) {
1416 initial_xmit_delay++;
1417 }
1418 }
1419
1420 //sub-stream multiplexer balance fifo priming delay in groups as per dsc standard
1421 if (bpc == 8)
1422 ssm_group_priming_delay = 83;
1423 else if (bpc == 10)
1424 ssm_group_priming_delay = 91;
1425 else if (bpc == 12)
1426 ssm_group_priming_delay = 115;
1427 else if (bpc == 14)
1428 ssm_group_priming_delay = 123;
1429 else
1430 ssm_group_priming_delay = 128;
1431
1432 //slice width in groups is rounded up to the nearest group as DSC adds padded pixels such that there are an integer number of groups per slice
1433 slice_width_groups = (slice_width_modified + 2) / 3;
1434
1435 //determine number of padded pixels in the last group of a slice line, computed as
1436 slice_padded_pixels = 3 * slice_width_groups - slice_width_modified;
1437
1438 //determine integer number of complete slice lines required to reach initial transmit delay without ssm delay considered
1439 number_of_lines_to_reach_ixd = initial_xmit_delay / slice_width_modified;
1440
1441 //increase initial transmit delay by the number of padded pixels added to a slice line multipled by the integer number of complete lines to reach initial transmit delay
1442 //this step is necessary as each padded pixel added takes up a clock cycle and, therefore, adds to the overall delay
1443 ixd_plus_padding = initial_xmit_delay + slice_padded_pixels * number_of_lines_to_reach_ixd;
1444
1445 //convert the padded initial transmit delay from pixels to groups by rounding up to the nearest group as DSC processes in groups of pixels
1446 ixd_plus_padding_groups = (ixd_plus_padding + 2) / 3;
1447
1448 //number of groups required for a slice to reach initial transmit delay is the sum of the padded initial transmit delay plus the ssm group priming delay
1449 groups_to_reach_ixd = ixd_plus_padding_groups + ssm_group_priming_delay;
1450
1451 //number of lines required to reach padded initial transmit delay in groups in slices to the left of the last horizontal slice
1452 //needs to be rounded up as a complete slice lines are buffered prior to initial transmit delay being reached in the last horizontal slice
1453 lines_to_reach_ixd = (groups_to_reach_ixd + slice_width_groups - 1) / slice_width_groups; //round up lines to reach ixd to next
1454
1455 //determine if there are non-zero number of pixels reached in the group where initial transmit delay is reached
1456 //an additional group time (i.e., 3 pixel times) is required before the first output if there are no additional pixels beyond initial transmit delay
1457 additional_group_delay = ((initial_xmit_delay - number_of_lines_to_reach_ixd * slice_width_modified) % 3) == 0 ? 1 : 0;
1458
1459 //number of pipeline delay cycles in the ssm block (can be determined empirically or analytically by inspecting the ssm block)
1460 ssm_pipeline_delay = 2;
1461
1462 //number of pipe delay cycles in the obsm block (can be determined empirically or analytically by inspecting the obsm block)
1463 obsm_pipeline_delay = 1;
1464
1465 //a group of pixels is worth 6 pixels in N422/N420 mode or 3 pixels in all other modes
1466 if (pixelFormat == dml2_420 || pixelFormat == dml2_444 || pixelFormat == dml2_n422 || Output == dml2_hdmifrl)
1467 cycles_per_group = 6;
1468 else
1469 cycles_per_group = 3;
1470 //delay of the bit stream contruction layer in pixels is the sum of:
1471 //1. number of pixel containers in a slice line multipled by the number of lines required to reach initial transmit delay multipled by number of slices to the left of the last horizontal slice
1472 //2. number of pixel containers required to reach initial transmit delay (specifically, in the last horizontal slice)
1473 //3. additional group of delay if initial transmit delay is reached exactly in a group
1474 //4. ssm and obsm pipeline delay (i.e., clock cycles of delay)
1475 group_delay = (lines_to_reach_ixd * slice_width_groups * (numSlices - 1)) + groups_to_reach_ixd + additional_group_delay;
1476 pipeline_delay = ssm_pipeline_delay + obsm_pipeline_delay;
1477
1478 //pixel delay is group_delay (converted to pixels) + pipeline, however, first group is a special case since it is processed as soon as it arrives (i.e., in 3 cycles regardless of pixel format)
1479 pixels = (group_delay - 1) * cycles_per_group + 3 + pipeline_delay;
1480
1481 DML_LOG_VERBOSE("DML::%s: bpc: %u\n", __func__, bpc);
1482 DML_LOG_VERBOSE("DML::%s: BPP: %f\n", __func__, BPP);
1483 DML_LOG_VERBOSE("DML::%s: sliceWidth: %u\n", __func__, sliceWidth);
1484 DML_LOG_VERBOSE("DML::%s: numSlices: %u\n", __func__, numSlices);
1485 DML_LOG_VERBOSE("DML::%s: pixelFormat: %u\n", __func__, pixelFormat);
1486 DML_LOG_VERBOSE("DML::%s: Output: %u\n", __func__, Output);
1487 DML_LOG_VERBOSE("DML::%s: pixels: %u\n", __func__, pixels);
1488 return pixels;
1489 }
1490
1491 //updated in dcn4
dscComputeDelay(enum dml2_output_format_class pixelFormat,enum dml2_output_encoder_class Output)1492 static unsigned int dscComputeDelay(enum dml2_output_format_class pixelFormat, enum dml2_output_encoder_class Output)
1493 {
1494 unsigned int Delay = 0;
1495 unsigned int dispclk_per_dscclk = 3;
1496
1497 // sfr
1498 Delay = Delay + 2;
1499
1500 if (pixelFormat == dml2_420 || pixelFormat == dml2_n422 || (Output == dml2_hdmifrl && pixelFormat != dml2_444)) {
1501 dispclk_per_dscclk = 3 * 2;
1502 }
1503
1504 if (pixelFormat == dml2_420) {
1505 //dscc top delay for pixel compression layer
1506 Delay = Delay + 16 * dispclk_per_dscclk;
1507
1508 // dscc - input deserializer
1509 Delay = Delay + 5;
1510
1511 // dscc - input cdc fifo
1512 Delay = Delay + 1 + 4 * dispclk_per_dscclk;
1513
1514 // dscc - output cdc fifo
1515 Delay = Delay + 3 + 1 * dispclk_per_dscclk;
1516
1517 // dscc - cdc uncertainty
1518 Delay = Delay + 3 + 3 * dispclk_per_dscclk;
1519 } else if (pixelFormat == dml2_n422 || (Output == dml2_hdmifrl && pixelFormat != dml2_444)) {
1520 //dscc top delay for pixel compression layer
1521 Delay = Delay + 16 * dispclk_per_dscclk;
1522 // dsccif
1523 Delay = Delay + 1;
1524 // dscc - input deserializer
1525 Delay = Delay + 5;
1526 // dscc - input cdc fifo
1527 Delay = Delay + 1 + 4 * dispclk_per_dscclk;
1528
1529
1530 // dscc - output cdc fifo
1531 Delay = Delay + 3 + 1 * dispclk_per_dscclk;
1532 // dscc - cdc uncertainty
1533 Delay = Delay + 3 + 3 * dispclk_per_dscclk;
1534 } else if (pixelFormat == dml2_s422) {
1535 //dscc top delay for pixel compression layer
1536 Delay = Delay + 17 * dispclk_per_dscclk;
1537
1538 // dscc - input deserializer
1539 Delay = Delay + 3;
1540 // dscc - input cdc fifo
1541 Delay = Delay + 1 + 4 * dispclk_per_dscclk;
1542 // dscc - output cdc fifo
1543 Delay = Delay + 3 + 1 * dispclk_per_dscclk;
1544 // dscc - cdc uncertainty
1545 Delay = Delay + 3 + 3 * dispclk_per_dscclk;
1546 } else {
1547 //dscc top delay for pixel compression layer
1548 Delay = Delay + 16 * dispclk_per_dscclk;
1549 // dscc - input deserializer
1550 Delay = Delay + 3;
1551 // dscc - input cdc fifo
1552 Delay = Delay + 1 + 4 * dispclk_per_dscclk;
1553 // dscc - output cdc fifo
1554 Delay = Delay + 3 + 1 * dispclk_per_dscclk;
1555
1556 // dscc - cdc uncertainty
1557 Delay = Delay + 3 + 3 * dispclk_per_dscclk;
1558 }
1559
1560 // sft
1561 Delay = Delay + 1;
1562 DML_LOG_VERBOSE("DML::%s: pixelFormat = %u\n", __func__, pixelFormat);
1563 DML_LOG_VERBOSE("DML::%s: Delay = %u\n", __func__, Delay);
1564
1565 return Delay;
1566 }
1567
CalculateHostVMDynamicLevels(bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels)1568 static unsigned int CalculateHostVMDynamicLevels(
1569 bool GPUVMEnable,
1570 bool HostVMEnable,
1571 unsigned int HostVMMinPageSize,
1572 unsigned int HostVMMaxNonCachedPageTableLevels)
1573 {
1574 unsigned int HostVMDynamicLevels = 0;
1575
1576 if (GPUVMEnable && HostVMEnable) {
1577 if (HostVMMinPageSize < 2048)
1578 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1579 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
1580 HostVMDynamicLevels = (unsigned int)math_max2(0, (double)HostVMMaxNonCachedPageTableLevels - 1);
1581 else
1582 HostVMDynamicLevels = (unsigned int)math_max2(0, (double)HostVMMaxNonCachedPageTableLevels - 2);
1583 } else {
1584 HostVMDynamicLevels = 0;
1585 }
1586 return HostVMDynamicLevels;
1587 }
1588
CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_and_row_bytes_params * p)1589 static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_and_row_bytes_params *p)
1590 {
1591 unsigned int extra_dpde_bytes;
1592 unsigned int extra_mpde_bytes;
1593 unsigned int MacroTileSizeBytes;
1594 unsigned int vp_height_dpte_ub;
1595
1596 unsigned int meta_surface_bytes;
1597 unsigned int vm_bytes;
1598 unsigned int vp_height_meta_ub;
1599 unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
1600
1601 *p->MetaRequestHeight = 8 * p->BlockHeight256Bytes;
1602 *p->MetaRequestWidth = 8 * p->BlockWidth256Bytes;
1603 if (p->SurfaceTiling == dml2_sw_linear) {
1604 *p->meta_row_height = 32;
1605 *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->MetaRequestWidth - 1, *p->MetaRequestWidth) - math_floor2(p->ViewportXStart, *p->MetaRequestWidth));
1606 *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestHeight * p->BytePerPixel / 256.0); // FIXME_DCN4SW missing in old code but no dcc for linear anyways?
1607 } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
1608 *p->meta_row_height = *p->MetaRequestHeight;
1609 if (p->ViewportStationary && p->NumberOfDPPs == 1) {
1610 *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->MetaRequestWidth - 1, *p->MetaRequestWidth) - math_floor2(p->ViewportXStart, *p->MetaRequestWidth));
1611 } else {
1612 *p->meta_row_width = (unsigned int)(math_ceil2(p->SwathWidth - 1, *p->MetaRequestWidth) + *p->MetaRequestWidth);
1613 }
1614 *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestHeight * p->BytePerPixel / 256.0);
1615 } else {
1616 *p->meta_row_height = *p->MetaRequestWidth;
1617 if (p->ViewportStationary && p->NumberOfDPPs == 1) {
1618 *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + *p->MetaRequestHeight - 1, *p->MetaRequestHeight) - math_floor2(p->ViewportYStart, *p->MetaRequestHeight));
1619 } else {
1620 *p->meta_row_width = (unsigned int)(math_ceil2(p->SwathWidth - 1, *p->MetaRequestHeight) + *p->MetaRequestHeight);
1621 }
1622 *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestWidth * p->BytePerPixel / 256.0);
1623 }
1624
1625 if (p->ViewportStationary && p->is_phantom && (p->NumberOfDPPs == 1 || !dml_is_vertical_rotation(p->RotationAngle))) {
1626 vp_height_meta_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + 64 * p->BlockHeight256Bytes - 1, 64 * p->BlockHeight256Bytes) - math_floor2(p->ViewportYStart, 64 * p->BlockHeight256Bytes));
1627 } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
1628 vp_height_meta_ub = (unsigned int)(math_ceil2(p->ViewportHeight - 1, 64 * p->BlockHeight256Bytes) + 64 * p->BlockHeight256Bytes);
1629 } else {
1630 vp_height_meta_ub = (unsigned int)(math_ceil2(p->SwathWidth - 1, 64 * p->BlockHeight256Bytes) + 64 * p->BlockHeight256Bytes);
1631 }
1632
1633 meta_surface_bytes = (unsigned int)(p->DCCMetaPitch * vp_height_meta_ub * p->BytePerPixel / 256.0);
1634 DML_LOG_VERBOSE("DML::%s: DCCMetaPitch = %u\n", __func__, p->DCCMetaPitch);
1635 DML_LOG_VERBOSE("DML::%s: meta_surface_bytes = %u\n", __func__, meta_surface_bytes);
1636 if (p->GPUVMEnable == true) {
1637 double meta_vmpg_bytes = 4.0 * 1024.0;
1638 *p->meta_pte_bytes_per_frame_ub = (unsigned int)((math_ceil2((double) (meta_surface_bytes - meta_vmpg_bytes) / (8 * meta_vmpg_bytes), 1) + 1) * 64);
1639 extra_mpde_bytes = 128 * (p->GPUVMMaxPageTableLevels - 1);
1640 } else {
1641 *p->meta_pte_bytes_per_frame_ub = 0;
1642 extra_mpde_bytes = 0;
1643 }
1644
1645 if (!p->DCCEnable || !p->mrq_present) {
1646 *p->meta_pte_bytes_per_frame_ub = 0;
1647 extra_mpde_bytes = 0;
1648 *p->meta_row_bytes = 0;
1649 }
1650
1651 if (!p->GPUVMEnable) {
1652 *p->PixelPTEBytesPerRow = 0;
1653 *p->PixelPTEBytesPerRowStorage = 0;
1654 *p->dpte_row_width_ub = 0;
1655 *p->dpte_row_height = 0;
1656 *p->dpte_row_height_linear = 0;
1657 *p->PixelPTEBytesPerRow_one_row_per_frame = 0;
1658 *p->dpte_row_width_ub_one_row_per_frame = 0;
1659 *p->dpte_row_height_one_row_per_frame = 0;
1660 *p->vmpg_width = 0;
1661 *p->vmpg_height = 0;
1662 *p->PixelPTEReqWidth = 0;
1663 *p->PixelPTEReqHeight = 0;
1664 *p->PTERequestSize = 0;
1665 *p->dpde0_bytes_per_frame_ub = 0;
1666 return 0;
1667 }
1668
1669 MacroTileSizeBytes = p->MacroTileWidth * p->BytePerPixel * p->MacroTileHeight;
1670
1671 if (p->ViewportStationary && p->is_phantom && (p->NumberOfDPPs == 1 || !dml_is_vertical_rotation(p->RotationAngle))) {
1672 vp_height_dpte_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + p->MacroTileHeight - 1, p->MacroTileHeight) - math_floor2(p->ViewportYStart, p->MacroTileHeight));
1673 } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
1674 vp_height_dpte_ub = (unsigned int)(math_ceil2((double)p->ViewportHeight - 1, p->MacroTileHeight) + p->MacroTileHeight);
1675 } else {
1676 vp_height_dpte_ub = (unsigned int)(math_ceil2((double)p->SwathWidth - 1, p->MacroTileHeight) + p->MacroTileHeight);
1677 }
1678
1679 if (p->GPUVMEnable == true && p->GPUVMMaxPageTableLevels > 1) {
1680 *p->dpde0_bytes_per_frame_ub = (unsigned int)(64 * (math_ceil2((double)(p->Pitch * vp_height_dpte_ub * p->BytePerPixel - MacroTileSizeBytes) / (double)(8 * 2097152), 1) + 1));
1681 extra_dpde_bytes = 128 * (p->GPUVMMaxPageTableLevels - 2);
1682 } else {
1683 *p->dpde0_bytes_per_frame_ub = 0;
1684 extra_dpde_bytes = 0;
1685 }
1686
1687 vm_bytes = *p->meta_pte_bytes_per_frame_ub + extra_mpde_bytes + *p->dpde0_bytes_per_frame_ub + extra_dpde_bytes;
1688
1689 DML_LOG_VERBOSE("DML::%s: DCCEnable = %u\n", __func__, p->DCCEnable);
1690 DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable);
1691 DML_LOG_VERBOSE("DML::%s: SwModeLinear = %u\n", __func__, p->SurfaceTiling == dml2_sw_linear);
1692 DML_LOG_VERBOSE("DML::%s: BytePerPixel = %u\n", __func__, p->BytePerPixel);
1693 DML_LOG_VERBOSE("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, p->GPUVMMaxPageTableLevels);
1694 DML_LOG_VERBOSE("DML::%s: BlockHeight256Bytes = %u\n", __func__, p->BlockHeight256Bytes);
1695 DML_LOG_VERBOSE("DML::%s: BlockWidth256Bytes = %u\n", __func__, p->BlockWidth256Bytes);
1696 DML_LOG_VERBOSE("DML::%s: MacroTileHeight = %u\n", __func__, p->MacroTileHeight);
1697 DML_LOG_VERBOSE("DML::%s: MacroTileWidth = %u\n", __func__, p->MacroTileWidth);
1698 DML_LOG_VERBOSE("DML::%s: meta_pte_bytes_per_frame_ub = %u\n", __func__, *p->meta_pte_bytes_per_frame_ub);
1699 DML_LOG_VERBOSE("DML::%s: dpde0_bytes_per_frame_ub = %u\n", __func__, *p->dpde0_bytes_per_frame_ub);
1700 DML_LOG_VERBOSE("DML::%s: extra_mpde_bytes = %u\n", __func__, extra_mpde_bytes);
1701 DML_LOG_VERBOSE("DML::%s: extra_dpde_bytes = %u\n", __func__, extra_dpde_bytes);
1702 DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, vm_bytes);
1703 DML_LOG_VERBOSE("DML::%s: ViewportHeight = %u\n", __func__, p->ViewportHeight);
1704 DML_LOG_VERBOSE("DML::%s: SwathWidth = %u\n", __func__, p->SwathWidth);
1705 DML_LOG_VERBOSE("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub);
1706
1707 if (p->SurfaceTiling == dml2_sw_linear) {
1708 *p->PixelPTEReqHeight = 1;
1709 *p->PixelPTEReqWidth = p->GPUVMMinPageSizeKBytes * 1024 * 8 / p->BytePerPixel;
1710 PixelPTEReqWidth_linear = p->GPUVMMinPageSizeKBytes * 1024 * 8 / p->BytePerPixel;
1711 *p->PTERequestSize = 64;
1712
1713 *p->vmpg_height = 1;
1714 *p->vmpg_width = p->GPUVMMinPageSizeKBytes * 1024 / p->BytePerPixel;
1715 } else if (p->GPUVMMinPageSizeKBytes * 1024 >= dml_get_tile_block_size_bytes(p->SurfaceTiling)) { // 1 64B 8x1 PTE
1716 *p->PixelPTEReqHeight = p->MacroTileHeight;
1717 *p->PixelPTEReqWidth = 8 * 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
1718 *p->PTERequestSize = 64;
1719
1720 *p->vmpg_height = p->MacroTileHeight;
1721 *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
1722
1723 } else if (p->GPUVMMinPageSizeKBytes == 4 && dml_get_tile_block_size_bytes(p->SurfaceTiling) == 65536) { // 2 64B PTE requests to get 16 PTEs to cover the 64K tile
1724 // one 64KB tile, is 16x16x256B req
1725 *p->PixelPTEReqHeight = 16 * p->BlockHeight256Bytes;
1726 *p->PixelPTEReqWidth = 16 * p->BlockWidth256Bytes;
1727 *p->PTERequestSize = 128;
1728
1729 *p->vmpg_height = *p->PixelPTEReqHeight;
1730 *p->vmpg_width = *p->PixelPTEReqWidth;
1731 } else {
1732 // default for rest of calculation to go through, when vm is disable, the calulated pte related values shouldnt be used anyways
1733 *p->PixelPTEReqHeight = p->MacroTileHeight;
1734 *p->PixelPTEReqWidth = 8 * 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
1735 *p->PTERequestSize = 64;
1736
1737 *p->vmpg_height = p->MacroTileHeight;
1738 *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
1739
1740 if (p->GPUVMEnable == true) {
1741 DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes=%u and sw_mode=%u (tile_size=%d) not supported!\n",
1742 __func__, p->GPUVMMinPageSizeKBytes, p->SurfaceTiling, dml_get_tile_block_size_bytes(p->SurfaceTiling));
1743 DML_ASSERT(0);
1744 }
1745 }
1746
1747 DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes);
1748 DML_LOG_VERBOSE("DML::%s: PixelPTEReqHeight = %u\n", __func__, *p->PixelPTEReqHeight);
1749 DML_LOG_VERBOSE("DML::%s: PixelPTEReqWidth = %u\n", __func__, *p->PixelPTEReqWidth);
1750 DML_LOG_VERBOSE("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear);
1751 DML_LOG_VERBOSE("DML::%s: PTERequestSize = %u\n", __func__, *p->PTERequestSize);
1752 DML_LOG_VERBOSE("DML::%s: Pitch = %u\n", __func__, p->Pitch);
1753 DML_LOG_VERBOSE("DML::%s: vmpg_width = %u\n", __func__, *p->vmpg_width);
1754 DML_LOG_VERBOSE("DML::%s: vmpg_height = %u\n", __func__, *p->vmpg_height);
1755
1756 *p->dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
1757 *p->dpte_row_width_ub_one_row_per_frame = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height_one_row_per_frame / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * (double)*p->PixelPTEReqWidth);
1758 *p->PixelPTEBytesPerRow_one_row_per_frame = (unsigned int)((double)*p->dpte_row_width_ub_one_row_per_frame / (double)*p->PixelPTEReqWidth * *p->PTERequestSize);
1759 *p->dpte_row_height_linear = 0;
1760
1761 if (p->SurfaceTiling == dml2_sw_linear) {
1762 *p->dpte_row_height = (unsigned int)(math_min2(128, (double)(1ULL << (unsigned int)math_floor2(math_log((float)(p->PTEBufferSizeInRequests * *p->PixelPTEReqWidth / p->Pitch), 2.0), 1))));
1763 *p->dpte_row_width_ub = (unsigned int)(math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height - 1), (double)*p->PixelPTEReqWidth) + *p->PixelPTEReqWidth);
1764 *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqWidth * *p->PTERequestSize);
1765
1766 // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
1767 *p->dpte_row_height_linear = 1U << (unsigned int)math_floor2(math_log((float)(p->PTEBufferSizeInRequests * PixelPTEReqWidth_linear / p->Pitch), 2.0), 1);
1768 if (*p->dpte_row_height_linear > 128)
1769 *p->dpte_row_height_linear = 128;
1770
1771 #ifdef __DML_VBA_DEBUG__
1772 DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *p->dpte_row_width_ub);
1773 #endif
1774
1775 } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
1776 *p->dpte_row_height = *p->PixelPTEReqHeight;
1777
1778 if (p->GPUVMMinPageSizeKBytes > 64) {
1779 *p->dpte_row_width_ub = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * *p->PixelPTEReqWidth);
1780 } else if (p->ViewportStationary && (p->NumberOfDPPs == 1)) {
1781 *p->dpte_row_width_ub = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->PixelPTEReqWidth - 1, *p->PixelPTEReqWidth) - math_floor2(p->ViewportXStart, *p->PixelPTEReqWidth));
1782 } else {
1783 *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqWidth, 1) + 1.0) * *p->PixelPTEReqWidth);
1784 }
1785 #ifdef __DML_VBA_DEBUG__
1786 DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *p->dpte_row_width_ub);
1787 #endif
1788
1789 *p->PixelPTEBytesPerRow = *p->dpte_row_width_ub / *p->PixelPTEReqWidth * *p->PTERequestSize;
1790 } else {
1791 *p->dpte_row_height = (unsigned int)(math_min2(*p->PixelPTEReqWidth, p->MacroTileWidth));
1792
1793 if (p->ViewportStationary && (p->NumberOfDPPs == 1)) {
1794 *p->dpte_row_width_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + *p->PixelPTEReqHeight - 1, *p->PixelPTEReqHeight) - math_floor2(p->ViewportYStart, *p->PixelPTEReqHeight));
1795 } else {
1796 *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqHeight, 1) + 1) * *p->PixelPTEReqHeight);
1797 }
1798
1799 *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqHeight * *p->PTERequestSize);
1800 #ifdef __DML_VBA_DEBUG__
1801 DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *p->dpte_row_width_ub);
1802 #endif
1803 }
1804
1805 if (p->GPUVMEnable != true) {
1806 *p->PixelPTEBytesPerRow = 0;
1807 *p->PixelPTEBytesPerRow_one_row_per_frame = 0;
1808 }
1809
1810 *p->PixelPTEBytesPerRowStorage = *p->PixelPTEBytesPerRow;
1811
1812 #ifdef __DML_VBA_DEBUG__
1813 DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes);
1814 DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable);
1815 DML_LOG_VERBOSE("DML::%s: meta_row_height = %u\n", __func__, *p->meta_row_height);
1816 DML_LOG_VERBOSE("DML::%s: dpte_row_height = %u\n", __func__, *p->dpte_row_height);
1817 DML_LOG_VERBOSE("DML::%s: dpte_row_height_linear = %u\n", __func__, *p->dpte_row_height_linear);
1818 DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u\n", __func__, *p->dpte_row_width_ub);
1819 DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *p->PixelPTEBytesPerRow);
1820 DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *p->PixelPTEBytesPerRowStorage);
1821 DML_LOG_VERBOSE("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, p->PTEBufferSizeInRequests);
1822 DML_LOG_VERBOSE("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *p->dpte_row_height_one_row_per_frame);
1823 DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *p->dpte_row_width_ub_one_row_per_frame);
1824 DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *p->PixelPTEBytesPerRow_one_row_per_frame);
1825 #endif
1826
1827 return vm_bytes;
1828 } // CalculateVMAndRowBytes
1829
CalculatePrefetchSourceLines(double VRatio,unsigned int VTaps,bool Interlace,bool ProgressiveToInterlaceUnitInOPP,unsigned int SwathHeight,enum dml2_rotation_angle RotationAngle,bool mirrored,bool ViewportStationary,unsigned int SwathWidth,unsigned int ViewportHeight,unsigned int ViewportXStart,unsigned int ViewportYStart,unsigned int * VInitPreFill,unsigned int * MaxNumSwath)1830 static unsigned int CalculatePrefetchSourceLines(
1831 double VRatio,
1832 unsigned int VTaps,
1833 bool Interlace,
1834 bool ProgressiveToInterlaceUnitInOPP,
1835 unsigned int SwathHeight,
1836 enum dml2_rotation_angle RotationAngle,
1837 bool mirrored,
1838 bool ViewportStationary,
1839 unsigned int SwathWidth,
1840 unsigned int ViewportHeight,
1841 unsigned int ViewportXStart,
1842 unsigned int ViewportYStart,
1843
1844 // Output
1845 unsigned int *VInitPreFill,
1846 unsigned int *MaxNumSwath)
1847 {
1848
1849 unsigned int vp_start_rot = 0;
1850 unsigned int sw0_tmp = 0;
1851 unsigned int MaxPartialSwath = 0;
1852 double numLines = 0;
1853
1854 #ifdef __DML_VBA_DEBUG__
1855 DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio);
1856 DML_LOG_VERBOSE("DML::%s: VTaps = %u\n", __func__, VTaps);
1857 DML_LOG_VERBOSE("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart);
1858 DML_LOG_VERBOSE("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart);
1859 DML_LOG_VERBOSE("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary);
1860 DML_LOG_VERBOSE("DML::%s: SwathHeight = %u\n", __func__, SwathHeight);
1861 #endif
1862 if (ProgressiveToInterlaceUnitInOPP)
1863 *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1) / 2.0, 1));
1864 else
1865 *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1 + (Interlace ? 1 : 0) * 0.5 * VRatio) / 2.0, 1));
1866
1867 if (ViewportStationary) {
1868 if (RotationAngle == dml2_rotation_180) {
1869 vp_start_rot = SwathHeight - (((unsigned int)(ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
1870 } else if ((RotationAngle == dml2_rotation_270 && !mirrored) || (RotationAngle == dml2_rotation_90 && mirrored)) {
1871 vp_start_rot = ViewportXStart;
1872 } else if ((RotationAngle == dml2_rotation_90 && !mirrored) || (RotationAngle == dml2_rotation_270 && mirrored)) {
1873 vp_start_rot = SwathHeight - (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
1874 } else {
1875 vp_start_rot = ViewportYStart;
1876 }
1877 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
1878 if (sw0_tmp < *VInitPreFill) {
1879 *MaxNumSwath = (unsigned int)(math_ceil2((*VInitPreFill - sw0_tmp) / (double)SwathHeight, 1) + 1);
1880 } else {
1881 *MaxNumSwath = 1;
1882 }
1883 MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(vp_start_rot + *VInitPreFill - 1) % SwathHeight));
1884 } else {
1885 *MaxNumSwath = (unsigned int)(math_ceil2((*VInitPreFill - 1.0) / (double)SwathHeight, 1) + 1);
1886 if (*VInitPreFill > 1) {
1887 MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(*VInitPreFill - 2) % SwathHeight));
1888 } else {
1889 MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(*VInitPreFill + SwathHeight - 2) % SwathHeight));
1890 }
1891 }
1892 numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
1893
1894 #ifdef __DML_VBA_DEBUG__
1895 DML_LOG_VERBOSE("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot);
1896 DML_LOG_VERBOSE("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill);
1897 DML_LOG_VERBOSE("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath);
1898 DML_LOG_VERBOSE("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath);
1899 DML_LOG_VERBOSE("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
1900 #endif
1901 return (unsigned int)(numLines);
1902
1903 }
1904
CalculateRowBandwidth(bool GPUVMEnable,bool use_one_row_for_frame,enum dml2_source_format_class SourcePixelFormat,double VRatio,double VRatioChroma,bool DCCEnable,double LineTime,unsigned int PixelPTEBytesPerRowLuma,unsigned int PixelPTEBytesPerRowChroma,unsigned int dpte_row_height_luma,unsigned int dpte_row_height_chroma,bool mrq_present,unsigned int meta_row_bytes_per_row_ub_l,unsigned int meta_row_bytes_per_row_ub_c,unsigned int meta_row_height_luma,unsigned int meta_row_height_chroma,double * dpte_row_bw,double * meta_row_bw)1905 static void CalculateRowBandwidth(
1906 bool GPUVMEnable,
1907 bool use_one_row_for_frame,
1908 enum dml2_source_format_class SourcePixelFormat,
1909 double VRatio,
1910 double VRatioChroma,
1911 bool DCCEnable,
1912 double LineTime,
1913 unsigned int PixelPTEBytesPerRowLuma,
1914 unsigned int PixelPTEBytesPerRowChroma,
1915 unsigned int dpte_row_height_luma,
1916 unsigned int dpte_row_height_chroma,
1917
1918 bool mrq_present,
1919 unsigned int meta_row_bytes_per_row_ub_l,
1920 unsigned int meta_row_bytes_per_row_ub_c,
1921 unsigned int meta_row_height_luma,
1922 unsigned int meta_row_height_chroma,
1923
1924 // Output
1925 double *dpte_row_bw,
1926 double *meta_row_bw)
1927 {
1928 (void)use_one_row_for_frame;
1929 if (!DCCEnable || !mrq_present) {
1930 *meta_row_bw = 0;
1931 } else if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) {
1932 *meta_row_bw = VRatio * meta_row_bytes_per_row_ub_l / (meta_row_height_luma * LineTime)
1933 + VRatioChroma * meta_row_bytes_per_row_ub_c / (meta_row_height_chroma * LineTime);
1934 } else {
1935 *meta_row_bw = VRatio * meta_row_bytes_per_row_ub_l / (meta_row_height_luma * LineTime);
1936 }
1937
1938 if (GPUVMEnable != true) {
1939 *dpte_row_bw = 0;
1940 } else if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) {
1941 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
1942 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
1943 } else {
1944 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
1945 }
1946 }
1947
CalculateMALLUseForStaticScreen(const struct dml2_display_cfg * display_cfg,unsigned int NumberOfActiveSurfaces,unsigned int MALLAllocatedForDCN,unsigned int SurfaceSizeInMALL[],bool one_row_per_frame_fits_in_buffer[],bool is_using_mall_for_ss[])1948 static void CalculateMALLUseForStaticScreen(
1949 const struct dml2_display_cfg *display_cfg,
1950 unsigned int NumberOfActiveSurfaces,
1951 unsigned int MALLAllocatedForDCN,
1952 unsigned int SurfaceSizeInMALL[],
1953 bool one_row_per_frame_fits_in_buffer[],
1954
1955 // Output
1956 bool is_using_mall_for_ss[])
1957 {
1958
1959 unsigned int SurfaceToAddToMALL;
1960 bool CanAddAnotherSurfaceToMALL;
1961 unsigned int TotalSurfaceSizeInMALL;
1962
1963 TotalSurfaceSizeInMALL = 0;
1964 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1965 is_using_mall_for_ss[k] = (display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable);
1966 if (is_using_mall_for_ss[k])
1967 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
1968 #ifdef __DML_VBA_DEBUG__
1969 DML_LOG_VERBOSE("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, is_using_mall_for_ss[k]);
1970 DML_LOG_VERBOSE("DML::%s: k=%u, TotalSurfaceSizeInMALL = %u\n", __func__, k, TotalSurfaceSizeInMALL);
1971 #endif
1972 }
1973
1974 SurfaceToAddToMALL = 0;
1975 CanAddAnotherSurfaceToMALL = true;
1976 while (CanAddAnotherSurfaceToMALL) {
1977 CanAddAnotherSurfaceToMALL = false;
1978 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1979 if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCN * 1024 * 1024 &&
1980 !is_using_mall_for_ss[k] && display_cfg->plane_descriptors[k].overrides.refresh_from_mall != dml2_refresh_from_mall_mode_override_force_disable && one_row_per_frame_fits_in_buffer[k] &&
1981 (!CanAddAnotherSurfaceToMALL || SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
1982 CanAddAnotherSurfaceToMALL = true;
1983 SurfaceToAddToMALL = k;
1984 DML_LOG_VERBOSE("DML::%s: k=%u, UseMALLForStaticScreen = %u (dis, en, optimize)\n", __func__, k, display_cfg->plane_descriptors[k].overrides.refresh_from_mall);
1985 }
1986 }
1987 if (CanAddAnotherSurfaceToMALL) {
1988 is_using_mall_for_ss[SurfaceToAddToMALL] = true;
1989 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
1990
1991 #ifdef __DML_VBA_DEBUG__
1992 DML_LOG_VERBOSE("DML::%s: SurfaceToAddToMALL = %u\n", __func__, SurfaceToAddToMALL);
1993 DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALL = %u\n", __func__, TotalSurfaceSizeInMALL);
1994 #endif
1995 }
1996 }
1997 }
1998
CalculateDCCConfiguration(bool DCCEnabled,bool DCCProgrammingAssumesScanDirectionUnknown,enum dml2_source_format_class SourcePixelFormat,unsigned int SurfaceWidthLuma,unsigned int SurfaceWidthChroma,unsigned int SurfaceHeightLuma,unsigned int SurfaceHeightChroma,unsigned int nomDETInKByte,unsigned int RequestHeight256ByteLuma,unsigned int RequestHeight256ByteChroma,enum dml2_swizzle_mode TilingFormat,unsigned int BytePerPixelY,unsigned int BytePerPixelC,double BytePerPixelDETY,double BytePerPixelDETC,enum dml2_rotation_angle RotationAngle,enum dml2_core_internal_request_type * RequestLuma,enum dml2_core_internal_request_type * RequestChroma,unsigned int * MaxUncompressedBlockLuma,unsigned int * MaxUncompressedBlockChroma,unsigned int * MaxCompressedBlockLuma,unsigned int * MaxCompressedBlockChroma,unsigned int * IndependentBlockLuma,unsigned int * IndependentBlockChroma)1999 static void CalculateDCCConfiguration(
2000 bool DCCEnabled,
2001 bool DCCProgrammingAssumesScanDirectionUnknown,
2002 enum dml2_source_format_class SourcePixelFormat,
2003 unsigned int SurfaceWidthLuma,
2004 unsigned int SurfaceWidthChroma,
2005 unsigned int SurfaceHeightLuma,
2006 unsigned int SurfaceHeightChroma,
2007 unsigned int nomDETInKByte,
2008 unsigned int RequestHeight256ByteLuma,
2009 unsigned int RequestHeight256ByteChroma,
2010 enum dml2_swizzle_mode TilingFormat,
2011 unsigned int BytePerPixelY,
2012 unsigned int BytePerPixelC,
2013 double BytePerPixelDETY,
2014 double BytePerPixelDETC,
2015 enum dml2_rotation_angle RotationAngle,
2016
2017 // Output
2018 enum dml2_core_internal_request_type *RequestLuma,
2019 enum dml2_core_internal_request_type *RequestChroma,
2020 unsigned int *MaxUncompressedBlockLuma,
2021 unsigned int *MaxUncompressedBlockChroma,
2022 unsigned int *MaxCompressedBlockLuma,
2023 unsigned int *MaxCompressedBlockChroma,
2024 unsigned int *IndependentBlockLuma,
2025 unsigned int *IndependentBlockChroma)
2026 {
2027 (void)SurfaceWidthChroma;
2028 (void)SurfaceHeightChroma;
2029 (void)TilingFormat;
2030 (void)BytePerPixelDETY;
2031 (void)BytePerPixelDETC;
2032 unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
2033
2034 unsigned int segment_order_horz_contiguous_luma;
2035 unsigned int segment_order_horz_contiguous_chroma;
2036 unsigned int segment_order_vert_contiguous_luma;
2037 unsigned int segment_order_vert_contiguous_chroma;
2038
2039 unsigned int req128_horz_wc_l;
2040 unsigned int req128_horz_wc_c;
2041 unsigned int req128_vert_wc_l;
2042 unsigned int req128_vert_wc_c;
2043
2044 unsigned int yuv420;
2045 unsigned int horz_div_l;
2046 unsigned int horz_div_c;
2047 unsigned int vert_div_l;
2048 unsigned int vert_div_c;
2049
2050 unsigned int swath_buf_size;
2051 double detile_buf_vp_horz_limit;
2052 double detile_buf_vp_vert_limit;
2053
2054 unsigned int MAS_vp_horz_limit;
2055 unsigned int MAS_vp_vert_limit;
2056 unsigned int max_vp_horz_width;
2057 unsigned int max_vp_vert_height;
2058 unsigned int eff_surf_width_l;
2059 unsigned int eff_surf_width_c;
2060 unsigned int eff_surf_height_l;
2061 unsigned int eff_surf_height_c;
2062
2063 unsigned int full_swath_bytes_horz_wc_l;
2064 unsigned int full_swath_bytes_horz_wc_c;
2065 unsigned int full_swath_bytes_vert_wc_l;
2066 unsigned int full_swath_bytes_vert_wc_c;
2067
2068 if (dml_is_420(SourcePixelFormat))
2069 yuv420 = 1;
2070 else
2071 yuv420 = 0;
2072 horz_div_l = 1;
2073 horz_div_c = 1;
2074 vert_div_l = 1;
2075 vert_div_c = 1;
2076
2077 if (BytePerPixelY == 1)
2078 vert_div_l = 0;
2079 if (BytePerPixelC == 1)
2080 vert_div_c = 0;
2081
2082 if (BytePerPixelC == 0) {
2083 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
2084 detile_buf_vp_horz_limit = (double)swath_buf_size / ((double)RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
2085 detile_buf_vp_vert_limit = (double)swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
2086 } else {
2087 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
2088 detile_buf_vp_horz_limit = (double)swath_buf_size / ((double)RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) + (double)RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
2089 detile_buf_vp_vert_limit = (double)swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
2090 }
2091
2092 if (SourcePixelFormat == dml2_420_10) {
2093 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
2094 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
2095 }
2096
2097 detile_buf_vp_horz_limit = math_floor2(detile_buf_vp_horz_limit - 1, 16);
2098 detile_buf_vp_vert_limit = math_floor2(detile_buf_vp_vert_limit - 1, 16);
2099
2100 MAS_vp_horz_limit = SourcePixelFormat == dml2_rgbe_alpha ? 3840 : 6144;
2101 MAS_vp_vert_limit = SourcePixelFormat == dml2_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
2102 max_vp_horz_width = (unsigned int)(math_min2((double)MAS_vp_horz_limit, detile_buf_vp_horz_limit));
2103 max_vp_vert_height = (unsigned int)(math_min2((double)MAS_vp_vert_limit, detile_buf_vp_vert_limit));
2104 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
2105 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
2106 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
2107 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
2108
2109 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
2110 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
2111 if (BytePerPixelC > 0) {
2112 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
2113 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
2114 } else {
2115 full_swath_bytes_horz_wc_c = 0;
2116 full_swath_bytes_vert_wc_c = 0;
2117 }
2118
2119 if (SourcePixelFormat == dml2_420_10) {
2120 full_swath_bytes_horz_wc_l = (unsigned int)(math_ceil2((double)full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0));
2121 full_swath_bytes_horz_wc_c = (unsigned int)(math_ceil2((double)full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0));
2122 full_swath_bytes_vert_wc_l = (unsigned int)(math_ceil2((double)full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0));
2123 full_swath_bytes_vert_wc_c = (unsigned int)(math_ceil2((double)full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0));
2124 }
2125
2126 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
2127 req128_horz_wc_l = 0;
2128 req128_horz_wc_c = 0;
2129 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
2130 req128_horz_wc_l = 0;
2131 req128_horz_wc_c = 1;
2132 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
2133 req128_horz_wc_l = 1;
2134 req128_horz_wc_c = 0;
2135 } else {
2136 req128_horz_wc_l = 1;
2137 req128_horz_wc_c = 1;
2138 }
2139
2140 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
2141 req128_vert_wc_l = 0;
2142 req128_vert_wc_c = 0;
2143 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
2144 req128_vert_wc_l = 0;
2145 req128_vert_wc_c = 1;
2146 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
2147 req128_vert_wc_l = 1;
2148 req128_vert_wc_c = 0;
2149 } else {
2150 req128_vert_wc_l = 1;
2151 req128_vert_wc_c = 1;
2152 }
2153
2154 if (BytePerPixelY == 2) {
2155 segment_order_horz_contiguous_luma = 0;
2156 segment_order_vert_contiguous_luma = 1;
2157 } else {
2158 segment_order_horz_contiguous_luma = 1;
2159 segment_order_vert_contiguous_luma = 0;
2160 }
2161
2162 if (BytePerPixelC == 2) {
2163 segment_order_horz_contiguous_chroma = 0;
2164 segment_order_vert_contiguous_chroma = 1;
2165 } else {
2166 segment_order_horz_contiguous_chroma = 1;
2167 segment_order_vert_contiguous_chroma = 0;
2168 }
2169 #ifdef __DML_VBA_DEBUG__
2170 DML_LOG_VERBOSE("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled);
2171 DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
2172 DML_LOG_VERBOSE("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC);
2173 DML_LOG_VERBOSE("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l);
2174 DML_LOG_VERBOSE("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c);
2175 DML_LOG_VERBOSE("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l);
2176 DML_LOG_VERBOSE("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c);
2177 DML_LOG_VERBOSE("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma);
2178 DML_LOG_VERBOSE("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma);
2179 #endif
2180 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
2181 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
2182 *RequestLuma = dml2_core_internal_request_type_256_bytes;
2183 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
2184 *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2185 } else {
2186 *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous;
2187 }
2188 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
2189 *RequestChroma = dml2_core_internal_request_type_256_bytes;
2190 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
2191 *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2192 } else {
2193 *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous;
2194 }
2195 } else if (!dml_is_vertical_rotation(RotationAngle)) {
2196 if (req128_horz_wc_l == 0) {
2197 *RequestLuma = dml2_core_internal_request_type_256_bytes;
2198 } else if (segment_order_horz_contiguous_luma == 0) {
2199 *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2200 } else {
2201 *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous;
2202 }
2203 if (req128_horz_wc_c == 0) {
2204 *RequestChroma = dml2_core_internal_request_type_256_bytes;
2205 } else if (segment_order_horz_contiguous_chroma == 0) {
2206 *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2207 } else {
2208 *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous;
2209 }
2210 } else {
2211 if (req128_vert_wc_l == 0) {
2212 *RequestLuma = dml2_core_internal_request_type_256_bytes;
2213 } else if (segment_order_vert_contiguous_luma == 0) {
2214 *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2215 } else {
2216 *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous;
2217 }
2218 if (req128_vert_wc_c == 0) {
2219 *RequestChroma = dml2_core_internal_request_type_256_bytes;
2220 } else if (segment_order_vert_contiguous_chroma == 0) {
2221 *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2222 } else {
2223 *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous;
2224 }
2225 }
2226
2227 if (*RequestLuma == dml2_core_internal_request_type_256_bytes) {
2228 *MaxUncompressedBlockLuma = 256;
2229 *MaxCompressedBlockLuma = 256;
2230 *IndependentBlockLuma = 0;
2231 } else if (*RequestLuma == dml2_core_internal_request_type_128_bytes_contiguous) {
2232 *MaxUncompressedBlockLuma = 256;
2233 *MaxCompressedBlockLuma = 128;
2234 *IndependentBlockLuma = 128;
2235 } else {
2236 *MaxUncompressedBlockLuma = 256;
2237 *MaxCompressedBlockLuma = 64;
2238 *IndependentBlockLuma = 64;
2239 }
2240
2241 if (*RequestChroma == dml2_core_internal_request_type_256_bytes) {
2242 *MaxUncompressedBlockChroma = 256;
2243 *MaxCompressedBlockChroma = 256;
2244 *IndependentBlockChroma = 0;
2245 } else if (*RequestChroma == dml2_core_internal_request_type_128_bytes_contiguous) {
2246 *MaxUncompressedBlockChroma = 256;
2247 *MaxCompressedBlockChroma = 128;
2248 *IndependentBlockChroma = 128;
2249 } else {
2250 *MaxUncompressedBlockChroma = 256;
2251 *MaxCompressedBlockChroma = 64;
2252 *IndependentBlockChroma = 64;
2253 }
2254
2255 if (DCCEnabled != true || BytePerPixelC == 0) {
2256 *MaxUncompressedBlockChroma = 0;
2257 *MaxCompressedBlockChroma = 0;
2258 *IndependentBlockChroma = 0;
2259 }
2260
2261 if (DCCEnabled != true) {
2262 *MaxUncompressedBlockLuma = 0;
2263 *MaxCompressedBlockLuma = 0;
2264 *IndependentBlockLuma = 0;
2265 }
2266
2267 #ifdef __DML_VBA_DEBUG__
2268 DML_LOG_VERBOSE("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma);
2269 DML_LOG_VERBOSE("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma);
2270 DML_LOG_VERBOSE("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma);
2271 DML_LOG_VERBOSE("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma);
2272 DML_LOG_VERBOSE("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma);
2273 DML_LOG_VERBOSE("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma);
2274 #endif
2275
2276 }
2277
calculate_mcache_row_bytes(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_calculate_mcache_row_bytes_params * p)2278 static void calculate_mcache_row_bytes(
2279 struct dml2_core_internal_scratch *scratch,
2280 struct dml2_core_calcs_calculate_mcache_row_bytes_params *p)
2281 {
2282 (void)scratch;
2283 unsigned int vmpg_bytes = 0;
2284 unsigned int blk_bytes = 0;
2285 float meta_per_mvmpg_per_channel = 0;
2286 unsigned int est_blk_per_vmpg = 2;
2287 unsigned int mvmpg_per_row_ub = 0;
2288 unsigned int full_vp_width_mvmpg_aligned = 0;
2289 unsigned int full_vp_height_mvmpg_aligned = 0;
2290 unsigned int meta_per_mvmpg_per_channel_ub = 0;
2291 unsigned int mvmpg_per_mcache;
2292
2293 #ifdef __DML_VBA_DEBUG__
2294 DML_LOG_VERBOSE("DML::%s: num_chans = %u\n", __func__, p->num_chans);
2295 DML_LOG_VERBOSE("DML::%s: mem_word_bytes = %u\n", __func__, p->mem_word_bytes);
2296 DML_LOG_VERBOSE("DML::%s: mcache_line_size_bytes = %u\n", __func__, p->mcache_line_size_bytes);
2297 DML_LOG_VERBOSE("DML::%s: mcache_size_bytes = %u\n", __func__, p->mcache_size_bytes);
2298 DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable);
2299 DML_LOG_VERBOSE("DML::%s: gpuvm_page_size_kbytes = %u\n", __func__, p->gpuvm_page_size_kbytes);
2300 DML_LOG_VERBOSE("DML::%s: vp_stationary = %u\n", __func__, p->vp_stationary);
2301 DML_LOG_VERBOSE("DML::%s: tiling_mode = %u\n", __func__, p->tiling_mode);
2302 DML_LOG_VERBOSE("DML::%s: vp_start_x = %u\n", __func__, p->vp_start_x);
2303 DML_LOG_VERBOSE("DML::%s: vp_start_y = %u\n", __func__, p->vp_start_y);
2304 DML_LOG_VERBOSE("DML::%s: full_vp_width = %u\n", __func__, p->full_vp_width);
2305 DML_LOG_VERBOSE("DML::%s: full_vp_height = %u\n", __func__, p->full_vp_height);
2306 DML_LOG_VERBOSE("DML::%s: blk_width = %u\n", __func__, p->blk_width);
2307 DML_LOG_VERBOSE("DML::%s: blk_height = %u\n", __func__, p->blk_height);
2308 DML_LOG_VERBOSE("DML::%s: vmpg_width = %u\n", __func__, p->vmpg_width);
2309 DML_LOG_VERBOSE("DML::%s: vmpg_height = %u\n", __func__, p->vmpg_height);
2310 DML_LOG_VERBOSE("DML::%s: full_swath_bytes = %u\n", __func__, p->full_swath_bytes);
2311 #endif
2312 DML_ASSERT(p->mcache_line_size_bytes != 0);
2313 DML_ASSERT(p->mcache_size_bytes != 0);
2314
2315 *p->mvmpg_width = 0;
2316 *p->mvmpg_height = 0;
2317
2318 if (p->full_vp_height == 0 && p->full_vp_width == 0) {
2319 *p->num_mcaches = 0;
2320 *p->mcache_row_bytes = 0;
2321 *p->mcache_row_bytes_per_channel = 0;
2322 } else {
2323 blk_bytes = dml_get_tile_block_size_bytes(p->tiling_mode);
2324
2325 // if gpuvm is not enable, the alignment boundary should be in terms of tiling block size
2326 vmpg_bytes = p->gpuvm_page_size_kbytes * 1024;
2327
2328 //With vmpg_bytes >= tile blk_bytes, the meta_row_width alignment equations are relative to the vmpg_width/height.
2329 // But for 4KB page with 64KB tile block, we need the meta for all pages in the tile block.
2330 // Therefore, the alignment is relative to the blk_width/height. The factor of 16 vmpg per 64KB tile block is applied at the end.
2331 *p->mvmpg_width = p->blk_width;
2332 *p->mvmpg_height = p->blk_height;
2333 if (p->gpuvm_enable) {
2334 if (vmpg_bytes >= blk_bytes) {
2335 *p->mvmpg_width = p->vmpg_width;
2336 *p->mvmpg_height = p->vmpg_height;
2337 } else if (!((blk_bytes == 65536) && (vmpg_bytes == 4096))) {
2338 DML_LOG_VERBOSE("ERROR: DML::%s: Tiling size and vm page size combination not supported\n", __func__);
2339 DML_ASSERT(0);
2340 }
2341 }
2342
2343 //For plane0 & 1, first calculate full_vp_width/height_l/c aligned to vmpg_width/height_l/c
2344 full_vp_width_mvmpg_aligned = (unsigned int)(math_floor2((p->vp_start_x + p->full_vp_width) + *p->mvmpg_width - 1, *p->mvmpg_width) - math_floor2(p->vp_start_x, *p->mvmpg_width));
2345 full_vp_height_mvmpg_aligned = (unsigned int)(math_floor2((p->vp_start_y + p->full_vp_height) + *p->mvmpg_height - 1, *p->mvmpg_height) - math_floor2(p->vp_start_y, *p->mvmpg_height));
2346
2347 *p->full_vp_access_width_mvmpg_aligned = p->surf_vert ? full_vp_height_mvmpg_aligned : full_vp_width_mvmpg_aligned;
2348
2349 //Use the equation for the exact alignment when possible. Note that the exact alignment cannot be used for horizontal access if vmpg_bytes > blk_bytes.
2350 if (!p->surf_vert) { //horizontal access
2351 if (p->vp_stationary == 1 && vmpg_bytes <= blk_bytes)
2352 *p->meta_row_width_ub = full_vp_width_mvmpg_aligned;
2353 else
2354 *p->meta_row_width_ub = (unsigned int)math_ceil2((double)p->full_vp_width - 1, *p->mvmpg_width) + *p->mvmpg_width;
2355 mvmpg_per_row_ub = *p->meta_row_width_ub / *p->mvmpg_width;
2356 } else { //vertical access
2357 if (p->vp_stationary == 1)
2358 *p->meta_row_width_ub = full_vp_height_mvmpg_aligned;
2359 else
2360 *p->meta_row_width_ub = (unsigned int)math_ceil2((double)p->full_vp_height - 1, *p->mvmpg_height) + *p->mvmpg_height;
2361 mvmpg_per_row_ub = *p->meta_row_width_ub / *p->mvmpg_height;
2362 }
2363
2364 if (p->gpuvm_enable) {
2365 meta_per_mvmpg_per_channel = (float)vmpg_bytes / (float)256 / p->num_chans;
2366
2367 //but using the est_blk_per_vmpg between 2 and 4, to be not as pessimestic
2368 if (p->surf_vert && vmpg_bytes > blk_bytes) {
2369 meta_per_mvmpg_per_channel = (float)est_blk_per_vmpg * blk_bytes / (float)256 / p->num_chans;
2370 }
2371
2372 *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel)); // dcc_dr_oh_nom
2373 } else {
2374 meta_per_mvmpg_per_channel = (float) blk_bytes / (float)256 / p->num_chans;
2375
2376 if (!p->surf_vert)
2377 *p->dcc_dram_bw_nom_overhead_factor = 1 + 1.0 / 256.0;
2378 else
2379 *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel));
2380 }
2381
2382 meta_per_mvmpg_per_channel_ub = (unsigned int)math_ceil2((double)meta_per_mvmpg_per_channel, p->mcache_line_size_bytes);
2383
2384 //but for 4KB vmpg with 64KB tile blk
2385 if (p->gpuvm_enable && (blk_bytes == 65536) && (vmpg_bytes == 4096))
2386 meta_per_mvmpg_per_channel_ub = 16 * meta_per_mvmpg_per_channel_ub;
2387
2388 // If this mcache_row_bytes for the full viewport of the surface is less than or equal to mcache_bytes,
2389 // then one mcache can be used for this request stream. If not, it is useful to know the width of the viewport that can be supported in the mcache_bytes.
2390 if (p->gpuvm_enable || p->surf_vert) {
2391 *p->mcache_row_bytes_per_channel = mvmpg_per_row_ub * meta_per_mvmpg_per_channel_ub;
2392 *p->mcache_row_bytes = *p->mcache_row_bytes_per_channel * p->num_chans;
2393 } else { // horizontal and gpuvm disable
2394 *p->mcache_row_bytes = *p->meta_row_width_ub * p->blk_height * p->bytes_per_pixel / 256;
2395 if (p->mcache_line_size_bytes != 0)
2396 *p->mcache_row_bytes_per_channel = (unsigned int)math_ceil2((double)*p->mcache_row_bytes / p->num_chans, p->mcache_line_size_bytes);
2397 }
2398
2399 *p->dcc_dram_bw_pref_overhead_factor = 1 + math_max2(1.0 / 256.0, *p->mcache_row_bytes / p->full_swath_bytes); // dcc_dr_oh_pref
2400 if (p->mcache_size_bytes != 0)
2401 *p->num_mcaches = (unsigned int)math_ceil2((double)*p->mcache_row_bytes_per_channel / p->mcache_size_bytes, 1);
2402
2403 mvmpg_per_mcache = p->mcache_size_bytes / meta_per_mvmpg_per_channel_ub;
2404 *p->mvmpg_per_mcache_lb = (unsigned int)math_floor2(mvmpg_per_mcache, 1);
2405
2406 #ifdef __DML_VBA_DEBUG__
2407 DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable);
2408 DML_LOG_VERBOSE("DML::%s: vmpg_bytes = %u\n", __func__, vmpg_bytes);
2409 DML_LOG_VERBOSE("DML::%s: blk_bytes = %u\n", __func__, blk_bytes);
2410 DML_LOG_VERBOSE("DML::%s: meta_per_mvmpg_per_channel = %f\n", __func__, meta_per_mvmpg_per_channel);
2411 DML_LOG_VERBOSE("DML::%s: mvmpg_per_row_ub = %u\n", __func__, mvmpg_per_row_ub);
2412 DML_LOG_VERBOSE("DML::%s: meta_row_width_ub = %u\n", __func__, *p->meta_row_width_ub);
2413 DML_LOG_VERBOSE("DML::%s: mvmpg_width = %u\n", __func__, *p->mvmpg_width);
2414 DML_LOG_VERBOSE("DML::%s: mvmpg_height = %u\n", __func__, *p->mvmpg_height);
2415 DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_nom_overhead_factor);
2416 DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_pref_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_pref_overhead_factor);
2417 #endif
2418 }
2419
2420 #ifdef __DML_VBA_DEBUG__
2421 DML_LOG_VERBOSE("DML::%s: mcache_row_bytes = %u\n", __func__, *p->mcache_row_bytes);
2422 DML_LOG_VERBOSE("DML::%s: mcache_row_bytes_per_channel = %u\n", __func__, *p->mcache_row_bytes_per_channel);
2423 DML_LOG_VERBOSE("DML::%s: num_mcaches = %u\n", __func__, *p->num_mcaches);
2424 #endif
2425 DML_ASSERT(*p->num_mcaches > 0);
2426 }
2427
calculate_mcache_setting(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_calculate_mcache_setting_params * p)2428 static void calculate_mcache_setting(
2429 struct dml2_core_internal_scratch *scratch,
2430 struct dml2_core_calcs_calculate_mcache_setting_params *p)
2431 {
2432 unsigned int n;
2433
2434 struct dml2_core_shared_calculate_mcache_setting_locals *l = &scratch->calculate_mcache_setting_locals;
2435 memset(l, 0, sizeof(struct dml2_core_shared_calculate_mcache_setting_locals));
2436
2437 *p->num_mcaches_l = 0;
2438 *p->mcache_row_bytes_l = 0;
2439 *p->mcache_row_bytes_per_channel_l = 0;
2440 *p->dcc_dram_bw_nom_overhead_factor_l = 1.0;
2441 *p->dcc_dram_bw_pref_overhead_factor_l = 1.0;
2442
2443 *p->num_mcaches_c = 0;
2444 *p->mcache_row_bytes_c = 0;
2445 *p->mcache_row_bytes_per_channel_c = 0;
2446 *p->dcc_dram_bw_nom_overhead_factor_c = 1.0;
2447 *p->dcc_dram_bw_pref_overhead_factor_c = 1.0;
2448
2449 *p->mall_comb_mcache_l = 0;
2450 *p->mall_comb_mcache_c = 0;
2451 *p->lc_comb_mcache = 0;
2452
2453 if (!p->dcc_enable)
2454 return;
2455
2456 l->is_dual_plane = dml_is_420(p->source_format) || p->source_format == dml2_rgbe_alpha;
2457
2458 l->l_p.num_chans = p->num_chans;
2459 l->l_p.mem_word_bytes = p->mem_word_bytes;
2460 l->l_p.mcache_size_bytes = p->mcache_size_bytes;
2461 l->l_p.mcache_line_size_bytes = p->mcache_line_size_bytes;
2462 l->l_p.gpuvm_enable = p->gpuvm_enable;
2463 l->l_p.gpuvm_page_size_kbytes = p->gpuvm_page_size_kbytes;
2464 l->l_p.surf_vert = p->surf_vert;
2465 l->l_p.vp_stationary = p->vp_stationary;
2466 l->l_p.tiling_mode = p->tiling_mode;
2467 l->l_p.vp_start_x = p->vp_start_x_l;
2468 l->l_p.vp_start_y = p->vp_start_y_l;
2469 l->l_p.full_vp_width = p->full_vp_width_l;
2470 l->l_p.full_vp_height = p->full_vp_height_l;
2471 l->l_p.blk_width = p->blk_width_l;
2472 l->l_p.blk_height = p->blk_height_l;
2473 l->l_p.vmpg_width = p->vmpg_width_l;
2474 l->l_p.vmpg_height = p->vmpg_height_l;
2475 l->l_p.full_swath_bytes = p->full_swath_bytes_l;
2476 l->l_p.bytes_per_pixel = p->bytes_per_pixel_l;
2477
2478 // output
2479 l->l_p.num_mcaches = p->num_mcaches_l;
2480 l->l_p.mcache_row_bytes = p->mcache_row_bytes_l;
2481 l->l_p.mcache_row_bytes_per_channel = p->mcache_row_bytes_per_channel_l;
2482 l->l_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_l;
2483 l->l_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_l;
2484 l->l_p.mvmpg_width = &l->mvmpg_width_l;
2485 l->l_p.mvmpg_height = &l->mvmpg_height_l;
2486 l->l_p.full_vp_access_width_mvmpg_aligned = &l->full_vp_access_width_mvmpg_aligned_l;
2487 l->l_p.meta_row_width_ub = &l->meta_row_width_l;
2488 l->l_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_l;
2489
2490 calculate_mcache_row_bytes(scratch, &l->l_p);
2491 DML_ASSERT(*p->num_mcaches_l > 0);
2492
2493 if (l->is_dual_plane) {
2494 l->c_p.num_chans = p->num_chans;
2495 l->c_p.mem_word_bytes = p->mem_word_bytes;
2496 l->c_p.mcache_size_bytes = p->mcache_size_bytes;
2497 l->c_p.mcache_line_size_bytes = p->mcache_line_size_bytes;
2498 l->c_p.gpuvm_enable = p->gpuvm_enable;
2499 l->c_p.gpuvm_page_size_kbytes = p->gpuvm_page_size_kbytes;
2500 l->c_p.surf_vert = p->surf_vert;
2501 l->c_p.vp_stationary = p->vp_stationary;
2502 l->c_p.tiling_mode = p->tiling_mode;
2503 l->c_p.vp_start_x = p->vp_start_x_c;
2504 l->c_p.vp_start_y = p->vp_start_y_c;
2505 l->c_p.full_vp_width = p->full_vp_width_c;
2506 l->c_p.full_vp_height = p->full_vp_height_c;
2507 l->c_p.blk_width = p->blk_width_c;
2508 l->c_p.blk_height = p->blk_height_c;
2509 l->c_p.vmpg_width = p->vmpg_width_c;
2510 l->c_p.vmpg_height = p->vmpg_height_c;
2511 l->c_p.full_swath_bytes = p->full_swath_bytes_c;
2512 l->c_p.bytes_per_pixel = p->bytes_per_pixel_c;
2513
2514 // output
2515 l->c_p.num_mcaches = p->num_mcaches_c;
2516 l->c_p.mcache_row_bytes = p->mcache_row_bytes_c;
2517 l->c_p.mcache_row_bytes_per_channel = p->mcache_row_bytes_per_channel_c;
2518 l->c_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_c;
2519 l->c_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_c;
2520 l->c_p.mvmpg_width = &l->mvmpg_width_c;
2521 l->c_p.mvmpg_height = &l->mvmpg_height_c;
2522 l->c_p.full_vp_access_width_mvmpg_aligned = &l->full_vp_access_width_mvmpg_aligned_c;
2523 l->c_p.meta_row_width_ub = &l->meta_row_width_c;
2524 l->c_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_c;
2525
2526 calculate_mcache_row_bytes(scratch, &l->c_p);
2527 DML_ASSERT(*p->num_mcaches_c > 0);
2528 }
2529
2530 // Sharing for iMALL access
2531 l->mcache_remainder_l = *p->mcache_row_bytes_per_channel_l % p->mcache_size_bytes;
2532 l->mcache_remainder_c = *p->mcache_row_bytes_per_channel_c % p->mcache_size_bytes;
2533 l->mvmpg_access_width_l = p->surf_vert ? l->mvmpg_height_l : l->mvmpg_width_l;
2534 l->mvmpg_access_width_c = p->surf_vert ? l->mvmpg_height_c : l->mvmpg_width_c;
2535
2536 if (p->imall_enable) {
2537 *p->mall_comb_mcache_l = (2 * l->mcache_remainder_l <= p->mcache_size_bytes);
2538
2539 if (l->is_dual_plane)
2540 *p->mall_comb_mcache_c = (2 * l->mcache_remainder_c <= p->mcache_size_bytes);
2541 }
2542
2543 if (!p->surf_vert) // horizonatal access
2544 l->luma_time_factor = (double)l->mvmpg_height_c / l->mvmpg_height_l * 2;
2545 else // vertical access
2546 l->luma_time_factor = (double)l->mvmpg_width_c / l->mvmpg_width_l * 2;
2547
2548 // The algorithm starts with computing a non-integer, avg_mcache_element_size_l/c:
2549 if (*p->num_mcaches_l) {
2550 l->avg_mcache_element_size_l = l->meta_row_width_l / *p->num_mcaches_l;
2551 }
2552 if (l->is_dual_plane) {
2553 l->avg_mcache_element_size_c = l->meta_row_width_c / *p->num_mcaches_c;
2554
2555 /* if either remainder is 0, then mcache sharing is not needed or not possible due to full utilization */
2556 if (l->mcache_remainder_l && l->mcache_remainder_c) {
2557 if (!p->imall_enable || (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c)) {
2558 l->lc_comb_last_mcache_size = (unsigned int)((l->mcache_remainder_l * (*p->mall_comb_mcache_l ? 2 : 1) * l->luma_time_factor) +
2559 (l->mcache_remainder_c * (*p->mall_comb_mcache_c ? 2 : 1)));
2560 }
2561 *p->lc_comb_mcache = (l->lc_comb_last_mcache_size <= p->mcache_size_bytes) && (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c);
2562 }
2563 }
2564
2565 #ifdef __DML_VBA_DEBUG__
2566 DML_LOG_VERBOSE("DML::%s: imall_enable = %u\n", __func__, p->imall_enable);
2567 DML_LOG_VERBOSE("DML::%s: is_dual_plane = %u\n", __func__, l->is_dual_plane);
2568 DML_LOG_VERBOSE("DML::%s: surf_vert = %u\n", __func__, p->surf_vert);
2569 DML_LOG_VERBOSE("DML::%s: mvmpg_width_l = %u\n", __func__, l->mvmpg_width_l);
2570 DML_LOG_VERBOSE("DML::%s: mvmpg_height_l = %u\n", __func__, l->mvmpg_height_l);
2571 DML_LOG_VERBOSE("DML::%s: mcache_remainder_l = %f\n", __func__, l->mcache_remainder_l);
2572 DML_LOG_VERBOSE("DML::%s: num_mcaches_l = %u\n", __func__, *p->num_mcaches_l);
2573 DML_LOG_VERBOSE("DML::%s: avg_mcache_element_size_l = %u\n", __func__, l->avg_mcache_element_size_l);
2574 DML_LOG_VERBOSE("DML::%s: mvmpg_access_width_l = %u\n", __func__, l->mvmpg_access_width_l);
2575 DML_LOG_VERBOSE("DML::%s: mall_comb_mcache_l = %u\n", __func__, *p->mall_comb_mcache_l);
2576
2577 if (l->is_dual_plane) {
2578 DML_LOG_VERBOSE("DML::%s: mvmpg_width_c = %u\n", __func__, l->mvmpg_width_c);
2579 DML_LOG_VERBOSE("DML::%s: mvmpg_height_c = %u\n", __func__, l->mvmpg_height_c);
2580 DML_LOG_VERBOSE("DML::%s: mcache_remainder_c = %f\n", __func__, l->mcache_remainder_c);
2581 DML_LOG_VERBOSE("DML::%s: luma_time_factor = %f\n", __func__, l->luma_time_factor);
2582 DML_LOG_VERBOSE("DML::%s: num_mcaches_c = %u\n", __func__, *p->num_mcaches_c);
2583 DML_LOG_VERBOSE("DML::%s: avg_mcache_element_size_c = %u\n", __func__, l->avg_mcache_element_size_c);
2584 DML_LOG_VERBOSE("DML::%s: mvmpg_access_width_c = %u\n", __func__, l->mvmpg_access_width_c);
2585 DML_LOG_VERBOSE("DML::%s: mall_comb_mcache_c = %u\n", __func__, *p->mall_comb_mcache_c);
2586 DML_LOG_VERBOSE("DML::%s: lc_comb_last_mcache_size = %u\n", __func__, l->lc_comb_last_mcache_size);
2587 DML_LOG_VERBOSE("DML::%s: lc_comb_mcache = %u\n", __func__, *p->lc_comb_mcache);
2588 }
2589 #endif
2590 // calculate split_coordinate
2591 l->full_vp_access_width_l = p->surf_vert ? p->full_vp_height_l : p->full_vp_width_l;
2592 l->full_vp_access_width_c = p->surf_vert ? p->full_vp_height_c : p->full_vp_width_c;
2593
2594 for (n = 0; n < *p->num_mcaches_l - 1; n++) {
2595 p->mcache_offsets_l[n] = (unsigned int)(math_floor2((n + 1) * l->avg_mcache_element_size_l / l->mvmpg_access_width_l, 1)) * l->mvmpg_access_width_l;
2596 }
2597 p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l;
2598
2599 if (l->is_dual_plane) {
2600 for (n = 0; n < *p->num_mcaches_c - 1; n++) {
2601 p->mcache_offsets_c[n] = (unsigned int)(math_floor2((n + 1) * l->avg_mcache_element_size_c / l->mvmpg_access_width_c, 1)) * l->mvmpg_access_width_c;
2602 }
2603 p->mcache_offsets_c[*p->num_mcaches_c - 1] = l->full_vp_access_width_c;
2604 }
2605 #ifdef __DML_VBA_DEBUG__
2606 for (n = 0; n < *p->num_mcaches_l; n++)
2607 DML_LOG_VERBOSE("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]);
2608
2609 if (l->is_dual_plane) {
2610 for (n = 0; n < *p->num_mcaches_c; n++)
2611 DML_LOG_VERBOSE("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]);
2612 }
2613 #endif
2614
2615 // Luma/Chroma combine in the last mcache
2616 // In the case of Luma/Chroma combine-mCache (with lc_comb_mcache==1), all mCaches except the last segment are filled as much as possible, when stay aligned to mvmpg boundary
2617 if (*p->lc_comb_mcache && l->is_dual_plane) {
2618 for (n = 0; n < *p->num_mcaches_l - 1; n++)
2619 p->mcache_offsets_l[n] = (n + 1) * l->mvmpg_per_mcache_lb_l * l->mvmpg_access_width_l;
2620 p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l;
2621
2622 for (n = 0; n < *p->num_mcaches_c - 1; n++)
2623 p->mcache_offsets_c[n] = (n + 1) * l->mvmpg_per_mcache_lb_c * l->mvmpg_access_width_c;
2624 p->mcache_offsets_c[*p->num_mcaches_c - 1] = l->full_vp_access_width_c;
2625
2626 #ifdef __DML_VBA_DEBUG__
2627 for (n = 0; n < *p->num_mcaches_l; n++)
2628 DML_LOG_VERBOSE("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]);
2629
2630 for (n = 0; n < *p->num_mcaches_c; n++)
2631 DML_LOG_VERBOSE("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]);
2632 #endif
2633 }
2634
2635 *p->mcache_shift_granularity_l = l->mvmpg_access_width_l;
2636 *p->mcache_shift_granularity_c = l->mvmpg_access_width_c;
2637 }
2638
calculate_mall_bw_overhead_factor(double mall_prefetch_sdp_overhead_factor[],double mall_prefetch_dram_overhead_factor[],const struct dml2_display_cfg * display_cfg,unsigned int num_active_planes)2639 static void calculate_mall_bw_overhead_factor(
2640 double mall_prefetch_sdp_overhead_factor[], //mall_sdp_oh_nom/pref
2641 double mall_prefetch_dram_overhead_factor[], //mall_dram_oh_nom/pref
2642
2643 // input
2644 const struct dml2_display_cfg *display_cfg,
2645 unsigned int num_active_planes)
2646 {
2647 for (unsigned int k = 0; k < num_active_planes; ++k) {
2648 mall_prefetch_sdp_overhead_factor[k] = 1.0;
2649 mall_prefetch_dram_overhead_factor[k] = 1.0;
2650
2651 // SDP - on the return side
2652 if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall) // always no data return
2653 mall_prefetch_sdp_overhead_factor[k] = 1.25;
2654 else if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return)
2655 mall_prefetch_sdp_overhead_factor[k] = 0.25;
2656
2657 // DRAM
2658 if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall)
2659 mall_prefetch_dram_overhead_factor[k] = 2.0;
2660
2661 #ifdef __DML_VBA_DEBUG__
2662 DML_LOG_VERBOSE("DML::%s: k=%u, mall_prefetch_sdp_overhead_factor = %f\n", __func__, k, mall_prefetch_sdp_overhead_factor[k]);
2663 DML_LOG_VERBOSE("DML::%s: k=%u, mall_prefetch_dram_overhead_factor = %f\n", __func__, k, mall_prefetch_dram_overhead_factor[k]);
2664 #endif
2665 }
2666 }
2667
dml_get_return_bandwidth_available(const struct dml2_soc_bb * soc,enum dml2_core_internal_soc_state_type state_type,enum dml2_core_internal_bw_type bw_type,bool is_avg_bw,bool is_hvm_en,bool is_hvm_only,double dcfclk_mhz,double fclk_mhz,double dram_bw_mbps)2668 static double dml_get_return_bandwidth_available(
2669 const struct dml2_soc_bb *soc,
2670 enum dml2_core_internal_soc_state_type state_type,
2671 enum dml2_core_internal_bw_type bw_type,
2672 bool is_avg_bw,
2673 bool is_hvm_en,
2674 bool is_hvm_only,
2675 double dcfclk_mhz,
2676 double fclk_mhz,
2677 double dram_bw_mbps)
2678 {
2679 double return_bw_mbps = 0.;
2680 double ideal_sdp_bandwidth = (double)soc->return_bus_width_bytes * dcfclk_mhz;
2681 double ideal_fabric_bandwidth = fclk_mhz * (double)soc->fabric_datapath_to_dcn_data_return_bytes;
2682 double ideal_dram_bandwidth = dram_bw_mbps; //dram_speed_mts * soc->clk_table.dram_config.channel_count * soc->clk_table.dram_config.channel_width_bytes;
2683
2684 double derate_sdp_factor;
2685 double derate_fabric_factor;
2686 double derate_dram_factor;
2687
2688 double derate_sdp_bandwidth;
2689 double derate_fabric_bandwidth;
2690 double derate_dram_bandwidth;
2691
2692 if (is_avg_bw) {
2693 if (state_type == dml2_core_internal_soc_state_svp_prefetch) {
2694 derate_sdp_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.dcfclk_derate_percent / 100.0;
2695 derate_fabric_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.fclk_derate_percent / 100.0;
2696 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.dram_derate_percent_pixel / 100.0;
2697 } else { // just assume sys_active
2698 derate_sdp_factor = soc->qos_parameters.derate_table.system_active_average.dcfclk_derate_percent / 100.0;
2699 derate_fabric_factor = soc->qos_parameters.derate_table.system_active_average.fclk_derate_percent / 100.0;
2700 derate_dram_factor = soc->qos_parameters.derate_table.system_active_average.dram_derate_percent_pixel / 100.0;
2701 }
2702 } else { // urgent bw
2703 if (state_type == dml2_core_internal_soc_state_svp_prefetch) {
2704 derate_sdp_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dcfclk_derate_percent / 100.0;
2705 derate_fabric_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.fclk_derate_percent / 100.0;
2706 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100.0;
2707
2708 if (is_hvm_en) {
2709 if (is_hvm_only)
2710 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_vm / 100.0;
2711 else
2712 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel_and_vm / 100.0;
2713 } else {
2714 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100.0;
2715 }
2716 } else { // just assume sys_active
2717 derate_sdp_factor = soc->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0;
2718 derate_fabric_factor = soc->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100.0;
2719
2720 if (is_hvm_en) {
2721 if (is_hvm_only)
2722 derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_vm / 100.0;
2723 else
2724 derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel_and_vm / 100.0;
2725 } else {
2726 derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100.0;
2727 }
2728 }
2729 }
2730
2731 derate_sdp_bandwidth = ideal_sdp_bandwidth * derate_sdp_factor;
2732 derate_fabric_bandwidth = ideal_fabric_bandwidth * derate_fabric_factor;
2733 derate_dram_bandwidth = ideal_dram_bandwidth * derate_dram_factor;
2734
2735 if (bw_type == dml2_core_internal_bw_sdp)
2736 return_bw_mbps = math_min2(derate_sdp_bandwidth, derate_fabric_bandwidth);
2737 else // dml2_core_internal_bw_dram
2738 return_bw_mbps = derate_dram_bandwidth;
2739
2740 DML_LOG_VERBOSE("DML::%s: is_avg_bw = %u\n", __func__, is_avg_bw);
2741 DML_LOG_VERBOSE("DML::%s: is_hvm_en = %u\n", __func__, is_hvm_en);
2742 DML_LOG_VERBOSE("DML::%s: is_hvm_only = %u\n", __func__, is_hvm_only);
2743 DML_LOG_VERBOSE("DML::%s: state_type = %s\n", __func__, dml2_core_internal_soc_state_type_str(state_type));
2744 DML_LOG_VERBOSE("DML::%s: bw_type = %s\n", __func__, dml2_core_internal_bw_type_str(bw_type));
2745 DML_LOG_VERBOSE("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz);
2746 DML_LOG_VERBOSE("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz);
2747 DML_LOG_VERBOSE("DML::%s: ideal_sdp_bandwidth = %f\n", __func__, ideal_sdp_bandwidth);
2748 DML_LOG_VERBOSE("DML::%s: ideal_fabric_bandwidth = %f\n", __func__, ideal_fabric_bandwidth);
2749 DML_LOG_VERBOSE("DML::%s: ideal_dram_bandwidth = %f\n", __func__, ideal_dram_bandwidth);
2750 DML_LOG_VERBOSE("DML::%s: derate_sdp_bandwidth = %f (derate %f)\n", __func__, derate_sdp_bandwidth, derate_sdp_factor);
2751 DML_LOG_VERBOSE("DML::%s: derate_fabric_bandwidth = %f (derate %f)\n", __func__, derate_fabric_bandwidth, derate_fabric_factor);
2752 DML_LOG_VERBOSE("DML::%s: derate_dram_bandwidth = %f (derate %f)\n", __func__, derate_dram_bandwidth, derate_dram_factor);
2753 DML_LOG_VERBOSE("DML::%s: return_bw_mbps = %f\n", __func__, return_bw_mbps);
2754 return return_bw_mbps;
2755 }
2756
calculate_bandwidth_available(double avg_bandwidth_available_min[dml2_core_internal_soc_state_max],double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available_min[dml2_core_internal_soc_state_max],double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_max],double urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_max],const struct dml2_soc_bb * soc,bool HostVMEnable,double dcfclk_mhz,double fclk_mhz,double dram_bw_mbps)2757 static noinline_for_stack void calculate_bandwidth_available(
2758 double avg_bandwidth_available_min[dml2_core_internal_soc_state_max],
2759 double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
2760 double urg_bandwidth_available_min[dml2_core_internal_soc_state_max], // min between SDP and DRAM
2761 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
2762 double urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_max],
2763 double urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_max],
2764
2765 const struct dml2_soc_bb *soc,
2766 bool HostVMEnable,
2767 double dcfclk_mhz,
2768 double fclk_mhz,
2769 double dram_bw_mbps)
2770 {
2771 unsigned int n, m;
2772
2773 DML_LOG_VERBOSE("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz);
2774 DML_LOG_VERBOSE("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz);
2775 DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, dram_bw_mbps);
2776
2777 // Calculate all the bandwidth availabe
2778 for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
2779 for (n = 0; n < dml2_core_internal_bw_max; n++) {
2780 avg_bandwidth_available[m][n] = dml_get_return_bandwidth_available(soc,
2781 m, // soc_state
2782 n, // bw_type
2783 1, // avg_bw
2784 HostVMEnable,
2785 0, // hvm_only
2786 dcfclk_mhz,
2787 fclk_mhz,
2788 dram_bw_mbps);
2789
2790 urg_bandwidth_available[m][n] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 0, dcfclk_mhz, fclk_mhz, dram_bw_mbps);
2791
2792
2793 #ifdef __DML_VBA_DEBUG__
2794 DML_LOG_VERBOSE("DML::%s: avg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), avg_bandwidth_available[m][n]);
2795 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_bandwidth_available[m][n]);
2796 #endif
2797
2798 // urg_bandwidth_available_vm_only is indexed by soc_state
2799 if (n == dml2_core_internal_bw_dram) {
2800 urg_bandwidth_available_vm_only[m] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 1, dcfclk_mhz, fclk_mhz, dram_bw_mbps);
2801 urg_bandwidth_available_pixel_and_vm[m] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 0, dcfclk_mhz, fclk_mhz, dram_bw_mbps);
2802 }
2803 }
2804
2805 avg_bandwidth_available_min[m] = math_min2(avg_bandwidth_available[m][dml2_core_internal_bw_dram], avg_bandwidth_available[m][dml2_core_internal_bw_sdp]);
2806 urg_bandwidth_available_min[m] = math_min2(urg_bandwidth_available[m][dml2_core_internal_bw_dram], urg_bandwidth_available[m][dml2_core_internal_bw_sdp]);
2807
2808 #ifdef __DML_VBA_DEBUG__
2809 DML_LOG_VERBOSE("DML::%s: avg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), avg_bandwidth_available_min[m]);
2810 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_min[m]);
2811 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_vm_only[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_vm_only[n]);
2812 #endif
2813 }
2814 }
2815
calculate_avg_bandwidth_required(double avg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],const struct dml2_display_cfg * display_cfg,unsigned int num_active_planes,double ReadBandwidthLuma[],double ReadBandwidthChroma[],double cursor_bw[],double dcc_dram_bw_nom_overhead_factor_p0[],double dcc_dram_bw_nom_overhead_factor_p1[],double mall_prefetch_dram_overhead_factor[],double mall_prefetch_sdp_overhead_factor[])2816 static void calculate_avg_bandwidth_required(
2817 double avg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
2818
2819 // input
2820 const struct dml2_display_cfg *display_cfg,
2821 unsigned int num_active_planes,
2822 double ReadBandwidthLuma[],
2823 double ReadBandwidthChroma[],
2824 double cursor_bw[],
2825 double dcc_dram_bw_nom_overhead_factor_p0[],
2826 double dcc_dram_bw_nom_overhead_factor_p1[],
2827 double mall_prefetch_dram_overhead_factor[],
2828 double mall_prefetch_sdp_overhead_factor[])
2829 {
2830 unsigned int n, m, k;
2831 double sdp_overhead_factor;
2832 double dram_overhead_factor_p0;
2833 double dram_overhead_factor_p1;
2834
2835 // Average BW support check
2836 for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
2837 for (n = 0; n < dml2_core_internal_bw_max; n++) { // sdp, dram
2838 avg_bandwidth_required[m][n] = 0;
2839 }
2840 }
2841
2842 // SysActive and SVP Prefetch AVG bandwidth Check
2843 for (k = 0; k < num_active_planes; ++k) {
2844 #ifdef __DML_VBA_DEBUG__
2845 DML_LOG_VERBOSE("DML::%s: plane %0d\n", __func__, k);
2846 DML_LOG_VERBOSE("DML::%s: ReadBandwidthLuma=%f\n", __func__, ReadBandwidthLuma[k]);
2847 DML_LOG_VERBOSE("DML::%s: ReadBandwidthChroma=%f\n", __func__, ReadBandwidthChroma[k]);
2848 DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor_p0=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p0[k]);
2849 DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor_p1=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p1[k]);
2850 DML_LOG_VERBOSE("DML::%s: mall_prefetch_dram_overhead_factor=%f\n", __func__, mall_prefetch_dram_overhead_factor[k]);
2851 DML_LOG_VERBOSE("DML::%s: mall_prefetch_sdp_overhead_factor=%f\n", __func__, mall_prefetch_sdp_overhead_factor[k]);
2852 #endif
2853
2854 sdp_overhead_factor = mall_prefetch_sdp_overhead_factor[k];
2855 dram_overhead_factor_p0 = dcc_dram_bw_nom_overhead_factor_p0[k] * mall_prefetch_dram_overhead_factor[k];
2856 dram_overhead_factor_p1 = dcc_dram_bw_nom_overhead_factor_p1[k] * mall_prefetch_dram_overhead_factor[k];
2857
2858 // FIXME_DCN4, was missing cursor_bw in here, but do I actually need that and tdlut bw for average bandwidth calculation?
2859 // active avg bw not include phantom, but svp_prefetch avg bw should include phantom pipes
2860 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
2861 avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] += sdp_overhead_factor * (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) + cursor_bw[k];
2862 avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k];
2863 }
2864 avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] += sdp_overhead_factor * (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) + cursor_bw[k];
2865 avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k];
2866
2867 #ifdef __DML_VBA_DEBUG__
2868 DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
2869 DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
2870 DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
2871 DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
2872 #endif
2873 }
2874 }
2875
CalculateVMRowAndSwath(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculateVMRowAndSwath_params * p)2876 static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch,
2877 struct dml2_core_calcs_CalculateVMRowAndSwath_params *p)
2878 {
2879 struct dml2_core_calcs_CalculateVMRowAndSwath_locals *s = &scratch->CalculateVMRowAndSwath_locals;
2880
2881 s->HostVMDynamicLevels = CalculateHostVMDynamicLevels(p->display_cfg->gpuvm_enable, p->display_cfg->hostvm_enable, p->HostVMMinPageSize, p->display_cfg->hostvm_max_non_cached_page_table_levels);
2882
2883 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
2884 if (p->display_cfg->gpuvm_enable == true) {
2885 p->vm_group_bytes[k] = 512;
2886 p->dpte_group_bytes[k] = 512;
2887 } else {
2888 p->vm_group_bytes[k] = 0;
2889 p->dpte_group_bytes[k] = 0;
2890 }
2891
2892 if (dml_is_420(p->myPipe[k].SourcePixelFormat) || p->myPipe[k].SourcePixelFormat == dml2_rgbe_alpha) {
2893 if ((p->myPipe[k].SourcePixelFormat == dml2_420_10 || p->myPipe[k].SourcePixelFormat == dml2_420_12) && !dml_is_vertical_rotation(p->myPipe[k].RotationAngle)) {
2894 s->PTEBufferSizeInRequestsForLuma[k] = (p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma) / 2;
2895 s->PTEBufferSizeInRequestsForChroma[k] = s->PTEBufferSizeInRequestsForLuma[k];
2896 } else {
2897 s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma;
2898 s->PTEBufferSizeInRequestsForChroma[k] = p->PTEBufferSizeInRequestsChroma;
2899 }
2900
2901 scratch->calculate_vm_and_row_bytes_params.ViewportStationary = p->myPipe[k].ViewportStationary;
2902 scratch->calculate_vm_and_row_bytes_params.DCCEnable = p->myPipe[k].DCCEnable;
2903 scratch->calculate_vm_and_row_bytes_params.NumberOfDPPs = p->myPipe[k].DPPPerSurface;
2904 scratch->calculate_vm_and_row_bytes_params.BlockHeight256Bytes = p->myPipe[k].BlockHeight256BytesC;
2905 scratch->calculate_vm_and_row_bytes_params.BlockWidth256Bytes = p->myPipe[k].BlockWidth256BytesC;
2906 scratch->calculate_vm_and_row_bytes_params.SourcePixelFormat = p->myPipe[k].SourcePixelFormat;
2907 scratch->calculate_vm_and_row_bytes_params.SurfaceTiling = p->myPipe[k].SurfaceTiling;
2908 scratch->calculate_vm_and_row_bytes_params.BytePerPixel = p->myPipe[k].BytePerPixelC;
2909 scratch->calculate_vm_and_row_bytes_params.RotationAngle = p->myPipe[k].RotationAngle;
2910 scratch->calculate_vm_and_row_bytes_params.SwathWidth = p->SwathWidthC[k];
2911 scratch->calculate_vm_and_row_bytes_params.ViewportHeight = p->myPipe[k].ViewportHeightC;
2912 scratch->calculate_vm_and_row_bytes_params.ViewportXStart = p->myPipe[k].ViewportXStartC;
2913 scratch->calculate_vm_and_row_bytes_params.ViewportYStart = p->myPipe[k].ViewportYStartC;
2914 scratch->calculate_vm_and_row_bytes_params.GPUVMEnable = p->display_cfg->gpuvm_enable;
2915 scratch->calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = p->display_cfg->gpuvm_max_page_table_levels;
2916 scratch->calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
2917 scratch->calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = s->PTEBufferSizeInRequestsForChroma[k];
2918 scratch->calculate_vm_and_row_bytes_params.Pitch = p->myPipe[k].PitchC;
2919 scratch->calculate_vm_and_row_bytes_params.MacroTileWidth = p->myPipe[k].BlockWidthC;
2920 scratch->calculate_vm_and_row_bytes_params.MacroTileHeight = p->myPipe[k].BlockHeightC;
2921 scratch->calculate_vm_and_row_bytes_params.is_phantom = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]);
2922 scratch->calculate_vm_and_row_bytes_params.DCCMetaPitch = p->myPipe[k].DCCMetaPitchC;
2923 scratch->calculate_vm_and_row_bytes_params.mrq_present = p->mrq_present;
2924
2925 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &s->PixelPTEBytesPerRowC[k];
2926 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &s->PixelPTEBytesPerRowStorageC[k];
2927 scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub = &p->dpte_row_width_chroma_ub[k];
2928 scratch->calculate_vm_and_row_bytes_params.dpte_row_height = &p->dpte_row_height_chroma[k];
2929 scratch->calculate_vm_and_row_bytes_params.dpte_row_height_linear = &p->dpte_row_height_linear_chroma[k];
2930 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &s->PixelPTEBytesPerRowC_one_row_per_frame[k];
2931 scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &s->dpte_row_width_chroma_ub_one_row_per_frame[k];
2932 scratch->calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &s->dpte_row_height_chroma_one_row_per_frame[k];
2933 scratch->calculate_vm_and_row_bytes_params.vmpg_width = &p->vmpg_width_c[k];
2934 scratch->calculate_vm_and_row_bytes_params.vmpg_height = &p->vmpg_height_c[k];
2935 scratch->calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &p->PixelPTEReqWidthC[k];
2936 scratch->calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &p->PixelPTEReqHeightC[k];
2937 scratch->calculate_vm_and_row_bytes_params.PTERequestSize = &p->PTERequestSizeC[k];
2938 scratch->calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &p->dpde0_bytes_per_frame_ub_c[k];
2939
2940 scratch->calculate_vm_and_row_bytes_params.meta_row_bytes = &s->meta_row_bytes_per_row_ub_c[k];
2941 scratch->calculate_vm_and_row_bytes_params.MetaRequestWidth = &p->meta_req_width_chroma[k];
2942 scratch->calculate_vm_and_row_bytes_params.MetaRequestHeight = &p->meta_req_height_chroma[k];
2943 scratch->calculate_vm_and_row_bytes_params.meta_row_width = &p->meta_row_width_chroma[k];
2944 scratch->calculate_vm_and_row_bytes_params.meta_row_height = &p->meta_row_height_chroma[k];
2945 scratch->calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &p->meta_pte_bytes_per_frame_ub_c[k];
2946
2947 s->vm_bytes_c = CalculateVMAndRowBytes(&scratch->calculate_vm_and_row_bytes_params);
2948
2949 p->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2950 p->myPipe[k].VRatioChroma,
2951 p->myPipe[k].VTapsChroma,
2952 p->myPipe[k].InterlaceEnable,
2953 p->myPipe[k].ProgressiveToInterlaceUnitInOPP,
2954 p->myPipe[k].SwathHeightC,
2955 p->myPipe[k].RotationAngle,
2956 p->myPipe[k].mirrored,
2957 p->myPipe[k].ViewportStationary,
2958 p->SwathWidthC[k],
2959 p->myPipe[k].ViewportHeightC,
2960 p->myPipe[k].ViewportXStartC,
2961 p->myPipe[k].ViewportYStartC,
2962
2963 // Output
2964 &p->VInitPreFillC[k],
2965 &p->MaxNumSwathC[k]);
2966 } else {
2967 s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma;
2968 s->PTEBufferSizeInRequestsForChroma[k] = 0;
2969 s->PixelPTEBytesPerRowC[k] = 0;
2970 s->PixelPTEBytesPerRowStorageC[k] = 0;
2971 s->vm_bytes_c = 0;
2972 p->MaxNumSwathC[k] = 0;
2973 p->PrefetchSourceLinesC[k] = 0;
2974 s->dpte_row_height_chroma_one_row_per_frame[k] = 0;
2975 s->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
2976 s->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
2977 }
2978
2979 scratch->calculate_vm_and_row_bytes_params.ViewportStationary = p->myPipe[k].ViewportStationary;
2980 scratch->calculate_vm_and_row_bytes_params.DCCEnable = p->myPipe[k].DCCEnable;
2981 scratch->calculate_vm_and_row_bytes_params.NumberOfDPPs = p->myPipe[k].DPPPerSurface;
2982 scratch->calculate_vm_and_row_bytes_params.BlockHeight256Bytes = p->myPipe[k].BlockHeight256BytesY;
2983 scratch->calculate_vm_and_row_bytes_params.BlockWidth256Bytes = p->myPipe[k].BlockWidth256BytesY;
2984 scratch->calculate_vm_and_row_bytes_params.SourcePixelFormat = p->myPipe[k].SourcePixelFormat;
2985 scratch->calculate_vm_and_row_bytes_params.SurfaceTiling = p->myPipe[k].SurfaceTiling;
2986 scratch->calculate_vm_and_row_bytes_params.BytePerPixel = p->myPipe[k].BytePerPixelY;
2987 scratch->calculate_vm_and_row_bytes_params.RotationAngle = p->myPipe[k].RotationAngle;
2988 scratch->calculate_vm_and_row_bytes_params.SwathWidth = p->SwathWidthY[k];
2989 scratch->calculate_vm_and_row_bytes_params.ViewportHeight = p->myPipe[k].ViewportHeight;
2990 scratch->calculate_vm_and_row_bytes_params.ViewportXStart = p->myPipe[k].ViewportXStart;
2991 scratch->calculate_vm_and_row_bytes_params.ViewportYStart = p->myPipe[k].ViewportYStart;
2992 scratch->calculate_vm_and_row_bytes_params.GPUVMEnable = p->display_cfg->gpuvm_enable;
2993 scratch->calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = p->display_cfg->gpuvm_max_page_table_levels;
2994 scratch->calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
2995 scratch->calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = s->PTEBufferSizeInRequestsForLuma[k];
2996 scratch->calculate_vm_and_row_bytes_params.Pitch = p->myPipe[k].PitchY;
2997 scratch->calculate_vm_and_row_bytes_params.MacroTileWidth = p->myPipe[k].BlockWidthY;
2998 scratch->calculate_vm_and_row_bytes_params.MacroTileHeight = p->myPipe[k].BlockHeightY;
2999 scratch->calculate_vm_and_row_bytes_params.is_phantom = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]);
3000 scratch->calculate_vm_and_row_bytes_params.DCCMetaPitch = p->myPipe[k].DCCMetaPitchY;
3001 scratch->calculate_vm_and_row_bytes_params.mrq_present = p->mrq_present;
3002
3003 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &s->PixelPTEBytesPerRowY[k];
3004 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &s->PixelPTEBytesPerRowStorageY[k];
3005 scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub = &p->dpte_row_width_luma_ub[k];
3006 scratch->calculate_vm_and_row_bytes_params.dpte_row_height = &p->dpte_row_height_luma[k];
3007 scratch->calculate_vm_and_row_bytes_params.dpte_row_height_linear = &p->dpte_row_height_linear_luma[k];
3008 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &s->PixelPTEBytesPerRowY_one_row_per_frame[k];
3009 scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &s->dpte_row_width_luma_ub_one_row_per_frame[k];
3010 scratch->calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &s->dpte_row_height_luma_one_row_per_frame[k];
3011 scratch->calculate_vm_and_row_bytes_params.vmpg_width = &p->vmpg_width_y[k];
3012 scratch->calculate_vm_and_row_bytes_params.vmpg_height = &p->vmpg_height_y[k];
3013 scratch->calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &p->PixelPTEReqWidthY[k];
3014 scratch->calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &p->PixelPTEReqHeightY[k];
3015 scratch->calculate_vm_and_row_bytes_params.PTERequestSize = &p->PTERequestSizeY[k];
3016 scratch->calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &p->dpde0_bytes_per_frame_ub_l[k];
3017
3018 scratch->calculate_vm_and_row_bytes_params.meta_row_bytes = &s->meta_row_bytes_per_row_ub_l[k];
3019 scratch->calculate_vm_and_row_bytes_params.MetaRequestWidth = &p->meta_req_width_luma[k];
3020 scratch->calculate_vm_and_row_bytes_params.MetaRequestHeight = &p->meta_req_height_luma[k];
3021 scratch->calculate_vm_and_row_bytes_params.meta_row_width = &p->meta_row_width_luma[k];
3022 scratch->calculate_vm_and_row_bytes_params.meta_row_height = &p->meta_row_height_luma[k];
3023 scratch->calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &p->meta_pte_bytes_per_frame_ub_l[k];
3024
3025 s->vm_bytes_l = CalculateVMAndRowBytes(&scratch->calculate_vm_and_row_bytes_params);
3026
3027 p->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
3028 p->myPipe[k].VRatio,
3029 p->myPipe[k].VTaps,
3030 p->myPipe[k].InterlaceEnable,
3031 p->myPipe[k].ProgressiveToInterlaceUnitInOPP,
3032 p->myPipe[k].SwathHeightY,
3033 p->myPipe[k].RotationAngle,
3034 p->myPipe[k].mirrored,
3035 p->myPipe[k].ViewportStationary,
3036 p->SwathWidthY[k],
3037 p->myPipe[k].ViewportHeight,
3038 p->myPipe[k].ViewportXStart,
3039 p->myPipe[k].ViewportYStart,
3040
3041 // Output
3042 &p->VInitPreFillY[k],
3043 &p->MaxNumSwathY[k]);
3044
3045 #ifdef __DML_VBA_DEBUG__
3046 DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes_l = %u (before hvm level)\n", __func__, k, s->vm_bytes_l);
3047 DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes_c = %u (before hvm level)\n", __func__, k, s->vm_bytes_c);
3048 DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes_per_row_ub_l = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_l[k]);
3049 DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes_per_row_ub_c = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_c[k]);
3050 #endif
3051 p->vm_bytes[k] = (s->vm_bytes_l + s->vm_bytes_c) * (1 + 8 * s->HostVMDynamicLevels);
3052 p->meta_row_bytes[k] = s->meta_row_bytes_per_row_ub_l[k] + s->meta_row_bytes_per_row_ub_c[k];
3053 p->meta_row_bytes_per_row_ub_l[k] = s->meta_row_bytes_per_row_ub_l[k];
3054 p->meta_row_bytes_per_row_ub_c[k] = s->meta_row_bytes_per_row_ub_c[k];
3055
3056 #ifdef __DML_VBA_DEBUG__
3057 DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes = %u\n", __func__, k, p->meta_row_bytes[k]);
3058 DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes = %u (after hvm level)\n", __func__, k, p->vm_bytes[k]);
3059 #endif
3060 if (s->PixelPTEBytesPerRowStorageY[k] <= 64 * s->PTEBufferSizeInRequestsForLuma[k] && s->PixelPTEBytesPerRowStorageC[k] <= 64 * s->PTEBufferSizeInRequestsForChroma[k]) {
3061 p->PTEBufferSizeNotExceeded[k] = true;
3062 } else {
3063 p->PTEBufferSizeNotExceeded[k] = false;
3064 }
3065
3066 s->one_row_per_frame_fits_in_buffer[k] = (s->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForLuma[k] &&
3067 s->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForChroma[k]);
3068 #ifdef __DML_VBA_DEBUG__
3069 if (p->PTEBufferSizeNotExceeded[k] == 0 || s->one_row_per_frame_fits_in_buffer[k] == 0) {
3070 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
3071 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
3072 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowStorageY = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageY[k]);
3073 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowStorageC = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageC[k]);
3074 DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeInRequestsForLuma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForLuma[k]);
3075 DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeInRequestsForChroma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForChroma[k]);
3076 DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded (not one_row_per_frame) = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
3077
3078 DML_LOG_VERBOSE("DML::%s: k=%u, HostVMDynamicLevels = %u\n", __func__, k, s->HostVMDynamicLevels);
3079 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowY_one_row_per_frame[k]);
3080 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowC_one_row_per_frame[k]);
3081 DML_LOG_VERBOSE("DML::%s: k=%u, one_row_per_frame_fits_in_buffer = %u\n", __func__, k, s->one_row_per_frame_fits_in_buffer[k]);
3082 }
3083 #endif
3084 }
3085
3086 CalculateMALLUseForStaticScreen(
3087 p->display_cfg,
3088 p->NumberOfActiveSurfaces,
3089 p->MALLAllocatedForDCN,
3090 p->SurfaceSizeInMALL,
3091 s->one_row_per_frame_fits_in_buffer,
3092 // Output
3093 p->is_using_mall_for_ss);
3094
3095 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3096 if (p->display_cfg->gpuvm_enable) {
3097 if (p->display_cfg->plane_descriptors[k].overrides.hw.force_pte_buffer_mode.enable == 1) {
3098 p->PTE_BUFFER_MODE[k] = p->display_cfg->plane_descriptors[k].overrides.hw.force_pte_buffer_mode.value;
3099 }
3100 p->PTE_BUFFER_MODE[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) ||
3101 dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64);
3102 p->BIGK_FRAGMENT_SIZE[k] = (unsigned int)(math_log((float)p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes * 1024, 2) - 12);
3103 } else {
3104 p->PTE_BUFFER_MODE[k] = 0;
3105 p->BIGK_FRAGMENT_SIZE[k] = 0;
3106 }
3107 }
3108
3109 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3110 p->DCCMetaBufferSizeNotExceeded[k] = true;
3111 #ifdef __DML_VBA_DEBUG__
3112 DML_LOG_VERBOSE("DML::%s: k=%u, SurfaceSizeInMALL = %u\n", __func__, k, p->SurfaceSizeInMALL[k]);
3113 DML_LOG_VERBOSE("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, p->is_using_mall_for_ss[k]);
3114 #endif
3115 p->use_one_row_for_frame[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) ||
3116 (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64 && dml_is_vertical_rotation(p->myPipe[k].RotationAngle));
3117
3118 p->use_one_row_for_frame_flip[k] = p->use_one_row_for_frame[k] && !(p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame);
3119
3120 if (p->use_one_row_for_frame[k]) {
3121 p->dpte_row_height_luma[k] = s->dpte_row_height_luma_one_row_per_frame[k];
3122 p->dpte_row_width_luma_ub[k] = s->dpte_row_width_luma_ub_one_row_per_frame[k];
3123 s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY_one_row_per_frame[k];
3124 p->dpte_row_height_chroma[k] = s->dpte_row_height_chroma_one_row_per_frame[k];
3125 p->dpte_row_width_chroma_ub[k] = s->dpte_row_width_chroma_ub_one_row_per_frame[k];
3126 s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC_one_row_per_frame[k];
3127 p->PTEBufferSizeNotExceeded[k] = s->one_row_per_frame_fits_in_buffer[k];
3128 }
3129
3130 if (p->meta_row_bytes[k] <= p->DCCMetaBufferSizeBytes) {
3131 p->DCCMetaBufferSizeNotExceeded[k] = true;
3132 } else {
3133 p->DCCMetaBufferSizeNotExceeded[k] = false;
3134
3135 #ifdef __DML_VBA_DEBUG__
3136 DML_LOG_VERBOSE("DML::%s: k=%d, meta_row_bytes = %d\n", __func__, k, p->meta_row_bytes[k]);
3137 DML_LOG_VERBOSE("DML::%s: k=%d, DCCMetaBufferSizeBytes = %d\n", __func__, k, p->DCCMetaBufferSizeBytes);
3138 DML_LOG_VERBOSE("DML::%s: k=%d, DCCMetaBufferSizeNotExceeded = %d\n", __func__, k, p->DCCMetaBufferSizeNotExceeded[k]);
3139 #endif
3140 }
3141
3142 s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY[k] * (1 + 8 * s->HostVMDynamicLevels);
3143 s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC[k] * (1 + 8 * s->HostVMDynamicLevels);
3144 p->PixelPTEBytesPerRow[k] = s->PixelPTEBytesPerRowY[k] + s->PixelPTEBytesPerRowC[k];
3145 p->dpte_row_bytes_per_row_l[k] = s->PixelPTEBytesPerRowY[k];
3146 p->dpte_row_bytes_per_row_c[k] = s->PixelPTEBytesPerRowC[k];
3147
3148 // if one row of dPTEs is meant to span the entire frame, then for these calculations, we will pretend like that one big row is fetched in two halfs
3149 if (p->use_one_row_for_frame[k])
3150 p->PixelPTEBytesPerRow[k] = p->PixelPTEBytesPerRow[k] / 2;
3151
3152 CalculateRowBandwidth(
3153 p->display_cfg->gpuvm_enable,
3154 p->use_one_row_for_frame[k],
3155 p->myPipe[k].SourcePixelFormat,
3156 p->myPipe[k].VRatio,
3157 p->myPipe[k].VRatioChroma,
3158 p->myPipe[k].DCCEnable,
3159 p->myPipe[k].HTotal / p->myPipe[k].PixelClock,
3160 s->PixelPTEBytesPerRowY[k],
3161 s->PixelPTEBytesPerRowC[k],
3162 p->dpte_row_height_luma[k],
3163 p->dpte_row_height_chroma[k],
3164
3165 p->mrq_present,
3166 p->meta_row_bytes_per_row_ub_l[k],
3167 p->meta_row_bytes_per_row_ub_c[k],
3168 p->meta_row_height_luma[k],
3169 p->meta_row_height_chroma[k],
3170
3171 // Output
3172 &p->dpte_row_bw[k],
3173 &p->meta_row_bw[k]);
3174 #ifdef __DML_VBA_DEBUG__
3175 DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]);
3176 DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame_flip = %u\n", __func__, k, p->use_one_row_for_frame_flip[k]);
3177 DML_LOG_VERBOSE("DML::%s: k=%u, UseMALLForPStateChange = %u\n", __func__, k, p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config);
3178 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_height_luma = %u\n", __func__, k, p->dpte_row_height_luma[k]);
3179 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]);
3180 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
3181 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_height_chroma = %u\n", __func__, k, p->dpte_row_height_chroma[k]);
3182 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]);
3183 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
3184 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRow = %u\n", __func__, k, p->PixelPTEBytesPerRow[k]);
3185 DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
3186 DML_LOG_VERBOSE("DML::%s: k=%u, gpuvm_enable = %u\n", __func__, k, p->display_cfg->gpuvm_enable);
3187 DML_LOG_VERBOSE("DML::%s: k=%u, PTE_BUFFER_MODE = %u\n", __func__, k, p->PTE_BUFFER_MODE[k]);
3188 DML_LOG_VERBOSE("DML::%s: k=%u, BIGK_FRAGMENT_SIZE = %u\n", __func__, k, p->BIGK_FRAGMENT_SIZE[k]);
3189 #endif
3190 }
3191 }
3192
CalculateUrgentLatency(double UrgentLatencyPixelDataOnly,double UrgentLatencyPixelMixedWithVMData,double UrgentLatencyVMDataOnly,bool DoUrgentLatencyAdjustment,double UrgentLatencyAdjustmentFabricClockComponent,double UrgentLatencyAdjustmentFabricClockReference,double FabricClock,double uclk_freq_mhz,enum dml2_qos_param_type qos_type,unsigned int urgent_ramp_uclk_cycles,unsigned int df_qos_response_time_fclk_cycles,unsigned int max_round_trip_to_furthest_cs_fclk_cycles,unsigned int mall_overhead_fclk_cycles,double umc_urgent_ramp_latency_margin,double fabric_max_transport_latency_margin)3193 static double CalculateUrgentLatency(
3194 double UrgentLatencyPixelDataOnly,
3195 double UrgentLatencyPixelMixedWithVMData,
3196 double UrgentLatencyVMDataOnly,
3197 bool DoUrgentLatencyAdjustment,
3198 double UrgentLatencyAdjustmentFabricClockComponent,
3199 double UrgentLatencyAdjustmentFabricClockReference,
3200 double FabricClock,
3201 double uclk_freq_mhz,
3202 enum dml2_qos_param_type qos_type,
3203 unsigned int urgent_ramp_uclk_cycles,
3204 unsigned int df_qos_response_time_fclk_cycles,
3205 unsigned int max_round_trip_to_furthest_cs_fclk_cycles,
3206 unsigned int mall_overhead_fclk_cycles,
3207 double umc_urgent_ramp_latency_margin,
3208 double fabric_max_transport_latency_margin)
3209 {
3210 double urgent_latency = 0;
3211 if (qos_type == dml2_qos_param_type_dcn4x) {
3212 urgent_latency = (df_qos_response_time_fclk_cycles + mall_overhead_fclk_cycles) / FabricClock
3213 + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1 + fabric_max_transport_latency_margin / 100.0)
3214 + urgent_ramp_uclk_cycles / uclk_freq_mhz * (1 + umc_urgent_ramp_latency_margin / 100.0);
3215 } else {
3216 urgent_latency = math_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
3217 if (DoUrgentLatencyAdjustment == true) {
3218 urgent_latency = urgent_latency + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
3219 }
3220 }
3221 #ifdef __DML_VBA_DEBUG__
3222 if (qos_type == dml2_qos_param_type_dcn4x) {
3223 DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type);
3224 DML_LOG_VERBOSE("DML::%s: urgent_ramp_uclk_cycles = %d\n", __func__, urgent_ramp_uclk_cycles);
3225 DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
3226 DML_LOG_VERBOSE("DML::%s: umc_urgent_ramp_latency_margin = %f\n", __func__, umc_urgent_ramp_latency_margin);
3227 } else {
3228 DML_LOG_VERBOSE("DML::%s: UrgentLatencyPixelDataOnly = %f\n", __func__, UrgentLatencyPixelDataOnly);
3229 DML_LOG_VERBOSE("DML::%s: UrgentLatencyPixelMixedWithVMData = %f\n", __func__, UrgentLatencyPixelMixedWithVMData);
3230 DML_LOG_VERBOSE("DML::%s: UrgentLatencyVMDataOnly = %f\n", __func__, UrgentLatencyVMDataOnly);
3231 DML_LOG_VERBOSE("DML::%s: UrgentLatencyAdjustmentFabricClockComponent = %f\n", __func__, UrgentLatencyAdjustmentFabricClockComponent);
3232 DML_LOG_VERBOSE("DML::%s: UrgentLatencyAdjustmentFabricClockReference = %f\n", __func__, UrgentLatencyAdjustmentFabricClockReference);
3233 }
3234 DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, FabricClock);
3235 DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, urgent_latency);
3236 #endif
3237 return urgent_latency;
3238 }
3239
CalculateTripToMemory(double UrgLatency,double FabricClock,double uclk_freq_mhz,enum dml2_qos_param_type qos_type,unsigned int trip_to_memory_uclk_cycles,unsigned int max_round_trip_to_furthest_cs_fclk_cycles,unsigned int mall_overhead_fclk_cycles,double umc_max_latency_margin,double fabric_max_transport_latency_margin)3240 static double CalculateTripToMemory(
3241 double UrgLatency,
3242 double FabricClock,
3243 double uclk_freq_mhz,
3244 enum dml2_qos_param_type qos_type,
3245 unsigned int trip_to_memory_uclk_cycles,
3246 unsigned int max_round_trip_to_furthest_cs_fclk_cycles,
3247 unsigned int mall_overhead_fclk_cycles,
3248 double umc_max_latency_margin,
3249 double fabric_max_transport_latency_margin)
3250 {
3251 double trip_to_memory_us;
3252 if (qos_type == dml2_qos_param_type_dcn4x) {
3253 trip_to_memory_us = mall_overhead_fclk_cycles / FabricClock
3254 + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0)
3255 + trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0);
3256 } else {
3257 trip_to_memory_us = UrgLatency;
3258 }
3259
3260 #ifdef __DML_VBA_DEBUG__
3261 if (qos_type == dml2_qos_param_type_dcn4x) {
3262 DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type);
3263 DML_LOG_VERBOSE("DML::%s: max_round_trip_to_furthest_cs_fclk_cycles = %d\n", __func__, max_round_trip_to_furthest_cs_fclk_cycles);
3264 DML_LOG_VERBOSE("DML::%s: mall_overhead_fclk_cycles = %d\n", __func__, mall_overhead_fclk_cycles);
3265 DML_LOG_VERBOSE("DML::%s: trip_to_memory_uclk_cycles = %d\n", __func__, trip_to_memory_uclk_cycles);
3266 DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
3267 DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, FabricClock);
3268 DML_LOG_VERBOSE("DML::%s: fabric_max_transport_latency_margin = %f\n", __func__, fabric_max_transport_latency_margin);
3269 DML_LOG_VERBOSE("DML::%s: umc_max_latency_margin = %f\n", __func__, umc_max_latency_margin);
3270 } else {
3271 DML_LOG_VERBOSE("DML::%s: UrgLatency = %f\n", __func__, UrgLatency);
3272 }
3273 DML_LOG_VERBOSE("DML::%s: trip_to_memory_us = %f\n", __func__, trip_to_memory_us);
3274 #endif
3275
3276
3277 return trip_to_memory_us;
3278 }
3279
CalculateMetaTripToMemory(double UrgLatency,double FabricClock,double uclk_freq_mhz,enum dml2_qos_param_type qos_type,unsigned int meta_trip_to_memory_uclk_cycles,unsigned int meta_trip_to_memory_fclk_cycles,double umc_max_latency_margin,double fabric_max_transport_latency_margin)3280 static double CalculateMetaTripToMemory(
3281 double UrgLatency,
3282 double FabricClock,
3283 double uclk_freq_mhz,
3284 enum dml2_qos_param_type qos_type,
3285 unsigned int meta_trip_to_memory_uclk_cycles,
3286 unsigned int meta_trip_to_memory_fclk_cycles,
3287 double umc_max_latency_margin,
3288 double fabric_max_transport_latency_margin)
3289 {
3290 double meta_trip_to_memory_us;
3291 if (qos_type == dml2_qos_param_type_dcn4x) {
3292 meta_trip_to_memory_us = meta_trip_to_memory_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0)
3293 + meta_trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0);
3294 } else {
3295 meta_trip_to_memory_us = UrgLatency;
3296 }
3297
3298 #ifdef __DML_VBA_DEBUG__
3299 if (qos_type == dml2_qos_param_type_dcn4x) {
3300 DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type);
3301 DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_fclk_cycles = %d\n", __func__, meta_trip_to_memory_fclk_cycles);
3302 DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_uclk_cycles = %d\n", __func__, meta_trip_to_memory_uclk_cycles);
3303 DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
3304 } else {
3305 DML_LOG_VERBOSE("DML::%s: UrgLatency = %f\n", __func__, UrgLatency);
3306 }
3307 DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_us = %f\n", __func__, meta_trip_to_memory_us);
3308 #endif
3309
3310
3311 return meta_trip_to_memory_us;
3312 }
3313
calculate_cursor_req_attributes(unsigned int cursor_width,unsigned int cursor_bpp,unsigned int * cursor_lines_per_chunk,unsigned int * cursor_bytes_per_line,unsigned int * cursor_bytes_per_chunk,unsigned int * cursor_bytes)3314 static void calculate_cursor_req_attributes(
3315 unsigned int cursor_width,
3316 unsigned int cursor_bpp,
3317
3318 // output
3319 unsigned int *cursor_lines_per_chunk,
3320 unsigned int *cursor_bytes_per_line,
3321 unsigned int *cursor_bytes_per_chunk,
3322 unsigned int *cursor_bytes)
3323 {
3324 unsigned int cursor_bytes_per_req = 0;
3325 unsigned int cursor_width_bytes = 0;
3326 unsigned int cursor_height = 0;
3327
3328 //SW determines the cursor pitch to support the maximum cursor_width that will be used but the following restrictions apply.
3329 //- For 2bpp, cursor_pitch = 256 pixels due to min cursor request size of 64B
3330 //- For 32 or 64 bpp, cursor_pitch = 64, 128 or 256 pixels depending on the cursor width
3331
3332 //The cursor requestor uses a cursor request size of 64B, 128B, or 256B depending on the cursor_width and cursor_bpp as follows.
3333
3334 cursor_width_bytes = (unsigned int)math_ceil2((double)cursor_width * cursor_bpp / 8, 1);
3335 if (cursor_width_bytes <= 64)
3336 cursor_bytes_per_req = 64;
3337 else if (cursor_width_bytes <= 128)
3338 cursor_bytes_per_req = 128;
3339 else
3340 cursor_bytes_per_req = 256;
3341
3342 //If cursor_width_bytes is greater than 256B, then multiple 256B requests are issued to fetch the entire cursor line.
3343 *cursor_bytes_per_line = (unsigned int)math_ceil2((double)cursor_width_bytes, cursor_bytes_per_req);
3344
3345 //Nominally, the cursor chunk is 1KB or 2KB but it is restricted to a power of 2 number of lines with a maximum of 16 lines.
3346 if (cursor_bpp == 2) {
3347 *cursor_lines_per_chunk = 16;
3348 } else if (cursor_bpp == 32) {
3349 if (cursor_width <= 32)
3350 *cursor_lines_per_chunk = 16;
3351 else if (cursor_width <= 64)
3352 *cursor_lines_per_chunk = 8;
3353 else if (cursor_width <= 128)
3354 *cursor_lines_per_chunk = 4;
3355 else
3356 *cursor_lines_per_chunk = 2;
3357 } else if (cursor_bpp == 64) {
3358 if (cursor_width <= 16)
3359 *cursor_lines_per_chunk = 16;
3360 else if (cursor_width <= 32)
3361 *cursor_lines_per_chunk = 8;
3362 else if (cursor_width <= 64)
3363 *cursor_lines_per_chunk = 4;
3364 else if (cursor_width <= 128)
3365 *cursor_lines_per_chunk = 2;
3366 else
3367 *cursor_lines_per_chunk = 1;
3368 } else {
3369 if (cursor_width > 0) {
3370 DML_LOG_VERBOSE("DML::%s: Invalid cursor_bpp = %d\n", __func__, cursor_bpp);
3371 DML_ASSERT(0);
3372 }
3373 }
3374
3375 *cursor_bytes_per_chunk = *cursor_bytes_per_line * *cursor_lines_per_chunk;
3376
3377 // For the cursor implementation, all requested data is stored in the return buffer. Given this fact, the cursor_bytes can be directly compared with the CursorBufferSize.
3378 // Only cursor_width is provided for worst case sizing so assume that the cursor is square
3379 cursor_height = cursor_width;
3380 *cursor_bytes = *cursor_bytes_per_line * cursor_height;
3381 #ifdef __DML_VBA_DEBUG__
3382 DML_LOG_VERBOSE("DML::%s: cursor_bpp = %d\n", __func__, cursor_bpp);
3383 DML_LOG_VERBOSE("DML::%s: cursor_width = %d\n", __func__, cursor_width);
3384 DML_LOG_VERBOSE("DML::%s: cursor_width_bytes = %d\n", __func__, cursor_width_bytes);
3385 DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_req = %d\n", __func__, cursor_bytes_per_req);
3386 DML_LOG_VERBOSE("DML::%s: cursor_lines_per_chunk = %d\n", __func__, *cursor_lines_per_chunk);
3387 DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_line = %d\n", __func__, *cursor_bytes_per_line);
3388 DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, *cursor_bytes_per_chunk);
3389 DML_LOG_VERBOSE("DML::%s: cursor_bytes = %d\n", __func__, *cursor_bytes);
3390 DML_LOG_VERBOSE("DML::%s: cursor_pitch = %d\n", __func__, cursor_bpp == 2 ? 256 : 1U << (unsigned int)math_ceil2(math_log((float)cursor_width, 2), 1));
3391 #endif
3392 }
3393
calculate_cursor_urgent_burst_factor(unsigned int CursorBufferSize,unsigned int CursorWidth,unsigned int cursor_bytes_per_chunk,unsigned int cursor_lines_per_chunk,double LineTime,double UrgentLatency,double * UrgentBurstFactorCursor,bool * NotEnoughUrgentLatencyHiding)3394 static void calculate_cursor_urgent_burst_factor(
3395 unsigned int CursorBufferSize,
3396 unsigned int CursorWidth,
3397 unsigned int cursor_bytes_per_chunk,
3398 unsigned int cursor_lines_per_chunk,
3399 double LineTime,
3400 double UrgentLatency,
3401
3402 double *UrgentBurstFactorCursor,
3403 bool *NotEnoughUrgentLatencyHiding)
3404 {
3405 unsigned int LinesInCursorBuffer = 0;
3406 double CursorBufferSizeInTime = 0;
3407
3408 if (CursorWidth > 0) {
3409 LinesInCursorBuffer = (unsigned int)math_floor2(CursorBufferSize * 1024.0 / (double)cursor_bytes_per_chunk, 1) * cursor_lines_per_chunk;
3410
3411 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime;
3412 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
3413 *NotEnoughUrgentLatencyHiding = 1;
3414 *UrgentBurstFactorCursor = 1;
3415 } else {
3416 *NotEnoughUrgentLatencyHiding = 0;
3417 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
3418 }
3419
3420 #ifdef __DML_VBA_DEBUG__
3421 DML_LOG_VERBOSE("DML::%s: LinesInCursorBuffer = %u\n", __func__, LinesInCursorBuffer);
3422 DML_LOG_VERBOSE("DML::%s: CursorBufferSizeInTime = %f\n", __func__, CursorBufferSizeInTime);
3423 DML_LOG_VERBOSE("DML::%s: CursorBufferSize = %u (kbytes)\n", __func__, CursorBufferSize);
3424 DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %u\n", __func__, cursor_bytes_per_chunk);
3425 DML_LOG_VERBOSE("DML::%s: cursor_lines_per_chunk = %u\n", __func__, cursor_lines_per_chunk);
3426 DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, *UrgentBurstFactorCursor);
3427 DML_LOG_VERBOSE("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding);
3428 #endif
3429
3430 }
3431 }
3432
CalculateUrgentBurstFactor(const struct dml2_plane_parameters * plane_cfg,unsigned int swath_width_luma_ub,unsigned int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double LineTime,double UrgentLatency,double VRatio,double VRatioC,double BytePerPixelInDETY,double BytePerPixelInDETC,unsigned int DETBufferSizeY,unsigned int DETBufferSizeC,double * UrgentBurstFactorLuma,double * UrgentBurstFactorChroma,bool * NotEnoughUrgentLatencyHiding)3433 static void CalculateUrgentBurstFactor(
3434 const struct dml2_plane_parameters *plane_cfg,
3435 unsigned int swath_width_luma_ub,
3436 unsigned int swath_width_chroma_ub,
3437 unsigned int SwathHeightY,
3438 unsigned int SwathHeightC,
3439 double LineTime,
3440 double UrgentLatency,
3441 double VRatio,
3442 double VRatioC,
3443 double BytePerPixelInDETY,
3444 double BytePerPixelInDETC,
3445 unsigned int DETBufferSizeY,
3446 unsigned int DETBufferSizeC,
3447 // Output
3448 double *UrgentBurstFactorLuma,
3449 double *UrgentBurstFactorChroma,
3450 bool *NotEnoughUrgentLatencyHiding)
3451 {
3452 double LinesInDETLuma;
3453 double LinesInDETChroma;
3454 double DETBufferSizeInTimeLuma;
3455 double DETBufferSizeInTimeChroma;
3456
3457 *NotEnoughUrgentLatencyHiding = 0;
3458 *UrgentBurstFactorLuma = 0;
3459 *UrgentBurstFactorChroma = 0;
3460
3461 #ifdef __DML_VBA_DEBUG__
3462 DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio);
3463 DML_LOG_VERBOSE("DML::%s: VRatioC = %f\n", __func__, VRatioC);
3464 DML_LOG_VERBOSE("DML::%s: DETBufferSizeY = %d\n", __func__, DETBufferSizeY);
3465 DML_LOG_VERBOSE("DML::%s: DETBufferSizeC = %d\n", __func__, DETBufferSizeC);
3466 DML_LOG_VERBOSE("DML::%s: BytePerPixelInDETY = %f\n", __func__, BytePerPixelInDETY);
3467 DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3468 DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, LineTime);
3469 #endif
3470 DML_ASSERT(VRatio > 0);
3471
3472 LinesInDETLuma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
3473
3474 DETBufferSizeInTimeLuma = math_floor2(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
3475 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
3476 *NotEnoughUrgentLatencyHiding = 1;
3477 *UrgentBurstFactorLuma = 1;
3478 } else {
3479 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
3480 }
3481
3482 if (BytePerPixelInDETC > 0) {
3483 LinesInDETChroma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC / swath_width_chroma_ub;
3484
3485 DETBufferSizeInTimeChroma = math_floor2(LinesInDETChroma, SwathHeightC) * LineTime / VRatioC;
3486 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
3487 *NotEnoughUrgentLatencyHiding = 1;
3488 *UrgentBurstFactorChroma = 1;
3489 } else {
3490 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
3491 }
3492 }
3493
3494 #ifdef __DML_VBA_DEBUG__
3495 DML_LOG_VERBOSE("DML::%s: LinesInDETLuma = %f\n", __func__, LinesInDETLuma);
3496 DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
3497 DML_LOG_VERBOSE("DML::%s: DETBufferSizeInTimeLuma = %f\n", __func__, DETBufferSizeInTimeLuma);
3498 DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, *UrgentBurstFactorLuma);
3499 DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, *UrgentBurstFactorChroma);
3500 DML_LOG_VERBOSE("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding);
3501 #endif
3502 }
3503
CalculateDCFCLKDeepSleepTdlut(const struct dml2_display_cfg * display_cfg,unsigned int NumberOfActiveSurfaces,unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],unsigned int SwathWidthY[],unsigned int SwathWidthC[],unsigned int DPPPerSurface[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int ReturnBusWidth,double dispclk,unsigned int tdlut_bytes_to_deliver[],double prefetch_swath_time_us[],double * DCFClkDeepSleep)3504 static void CalculateDCFCLKDeepSleepTdlut(
3505 const struct dml2_display_cfg *display_cfg,
3506 unsigned int NumberOfActiveSurfaces,
3507 unsigned int BytePerPixelY[],
3508 unsigned int BytePerPixelC[],
3509 unsigned int SwathWidthY[],
3510 unsigned int SwathWidthC[],
3511 unsigned int DPPPerSurface[],
3512 double PSCL_THROUGHPUT[],
3513 double PSCL_THROUGHPUT_CHROMA[],
3514 double Dppclk[],
3515 double ReadBandwidthLuma[],
3516 double ReadBandwidthChroma[],
3517 unsigned int ReturnBusWidth,
3518
3519 double dispclk,
3520 unsigned int tdlut_bytes_to_deliver[],
3521 double prefetch_swath_time_us[],
3522
3523 // Output
3524 double *DCFClkDeepSleep)
3525 {
3526 double DisplayPipeLineDeliveryTimeLuma;
3527 double DisplayPipeLineDeliveryTimeChroma;
3528 double DCFClkDeepSleepPerSurface[DML2_MAX_PLANES];
3529 double ReadBandwidth = 0.0;
3530
3531 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
3532 double pixel_rate_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
3533
3534 if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) {
3535 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_rate_mhz;
3536 } else {
3537 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
3538 }
3539 if (BytePerPixelC[k] == 0) {
3540 DisplayPipeLineDeliveryTimeChroma = 0;
3541 } else {
3542 if (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio <= 1) {
3543 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerSurface[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_rate_mhz;
3544 } else {
3545 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
3546 }
3547 }
3548
3549 if (BytePerPixelC[k] > 0) {
3550 DCFClkDeepSleepPerSurface[k] = math_max2(__DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
3551 __DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
3552 } else {
3553 DCFClkDeepSleepPerSurface[k] = __DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
3554 }
3555 DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], pixel_rate_mhz / 16);
3556
3557 // adjust for 3dlut delivery time
3558 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && tdlut_bytes_to_deliver[k] > 0) {
3559 double tdlut_required_deepsleep_dcfclk = (double) tdlut_bytes_to_deliver[k] / 64.0 / prefetch_swath_time_us[k];
3560
3561 DML_LOG_VERBOSE("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
3562 DML_LOG_VERBOSE("DML::%s: k=%d, tdlut_bytes_to_deliver = %d\n", __func__, k, tdlut_bytes_to_deliver[k]);
3563 DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_swath_time_us = %f\n", __func__, k, prefetch_swath_time_us[k]);
3564 DML_LOG_VERBOSE("DML::%s: k=%d, tdlut_required_deepsleep_dcfclk = %f\n", __func__, k, tdlut_required_deepsleep_dcfclk);
3565
3566 // increase the deepsleep dcfclk to match the original dispclk throughput rate
3567 if (tdlut_required_deepsleep_dcfclk > DCFClkDeepSleepPerSurface[k]) {
3568 DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], tdlut_required_deepsleep_dcfclk);
3569 DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], dispclk / 4.0);
3570 }
3571 }
3572
3573 #ifdef __DML_VBA_DEBUG__
3574 DML_LOG_VERBOSE("DML::%s: k=%u, PixelClock = %f\n", __func__, k, pixel_rate_mhz);
3575 DML_LOG_VERBOSE("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
3576 #endif
3577 }
3578
3579 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
3580 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
3581 }
3582
3583 *DCFClkDeepSleep = math_max2(8.0, __DML2_CALCS_DCFCLK_FACTOR__ * ReadBandwidth / (double)ReturnBusWidth);
3584
3585 #ifdef __DML_VBA_DEBUG__
3586 DML_LOG_VERBOSE("DML::%s: __DML2_CALCS_DCFCLK_FACTOR__ = %f\n", __func__, __DML2_CALCS_DCFCLK_FACTOR__);
3587 DML_LOG_VERBOSE("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
3588 DML_LOG_VERBOSE("DML::%s: ReturnBusWidth = %u\n", __func__, ReturnBusWidth);
3589 DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
3590 #endif
3591
3592 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
3593 *DCFClkDeepSleep = math_max2(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
3594 }
3595
3596 DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
3597 }
3598
CalculateDCFCLKDeepSleep(const struct dml2_display_cfg * display_cfg,unsigned int NumberOfActiveSurfaces,unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],unsigned int SwathWidthY[],unsigned int SwathWidthC[],unsigned int DPPPerSurface[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int ReturnBusWidth,double * DCFClkDeepSleep)3599 static noinline_for_stack void CalculateDCFCLKDeepSleep(
3600 const struct dml2_display_cfg *display_cfg,
3601 unsigned int NumberOfActiveSurfaces,
3602 unsigned int BytePerPixelY[],
3603 unsigned int BytePerPixelC[],
3604 unsigned int SwathWidthY[],
3605 unsigned int SwathWidthC[],
3606 unsigned int DPPPerSurface[],
3607 double PSCL_THROUGHPUT[],
3608 double PSCL_THROUGHPUT_CHROMA[],
3609 double Dppclk[],
3610 double ReadBandwidthLuma[],
3611 double ReadBandwidthChroma[],
3612 unsigned int ReturnBusWidth,
3613
3614 // Output
3615 double *DCFClkDeepSleep)
3616 {
3617 double zero_double[DML2_MAX_PLANES];
3618 unsigned int zero_integer[DML2_MAX_PLANES];
3619
3620 memset(zero_double, 0, DML2_MAX_PLANES * sizeof(double));
3621 memset(zero_integer, 0, DML2_MAX_PLANES * sizeof(unsigned int));
3622
3623 CalculateDCFCLKDeepSleepTdlut(
3624 display_cfg,
3625 NumberOfActiveSurfaces,
3626 BytePerPixelY,
3627 BytePerPixelC,
3628 SwathWidthY,
3629 SwathWidthC,
3630 DPPPerSurface,
3631 PSCL_THROUGHPUT,
3632 PSCL_THROUGHPUT_CHROMA,
3633 Dppclk,
3634 ReadBandwidthLuma,
3635 ReadBandwidthChroma,
3636 ReturnBusWidth,
3637 0,
3638 zero_integer, //tdlut_bytes_to_deliver,
3639 zero_double, //prefetch_swath_time_us,
3640
3641 // Output
3642 DCFClkDeepSleep);
3643 }
3644
CalculateWriteBackDelay(enum dml2_source_format_class WritebackPixelFormat,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackVTaps,unsigned int WritebackDestinationWidth,unsigned int WritebackDestinationHeight,unsigned int WritebackSourceHeight,unsigned int HTotal)3645 static double CalculateWriteBackDelay(
3646 enum dml2_source_format_class WritebackPixelFormat,
3647 double WritebackHRatio,
3648 double WritebackVRatio,
3649 unsigned int WritebackVTaps,
3650 unsigned int WritebackDestinationWidth,
3651 unsigned int WritebackDestinationHeight,
3652 unsigned int WritebackSourceHeight,
3653 unsigned int HTotal)
3654 {
3655 (void)WritebackPixelFormat;
3656 (void)WritebackHRatio;
3657 double CalculateWriteBackDelay;
3658 double Line_length;
3659 double Output_lines_last_notclamped;
3660 double WritebackVInit;
3661
3662 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3663 Line_length = math_max2((double)WritebackDestinationWidth, math_ceil2((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
3664 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - math_ceil2(((double)WritebackSourceHeight - (double)WritebackVInit) / (double)WritebackVRatio, 1.0);
3665 if (Output_lines_last_notclamped < 0) {
3666 CalculateWriteBackDelay = 0;
3667 } else {
3668 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3669 }
3670 return CalculateWriteBackDelay;
3671 }
3672
CalculateMaxVStartup(bool ptoi_supported,unsigned int vblank_nom_default_us,const struct dml2_timing_cfg * timing,double write_back_delay_us)3673 static unsigned int CalculateMaxVStartup(
3674 bool ptoi_supported,
3675 unsigned int vblank_nom_default_us,
3676 const struct dml2_timing_cfg *timing,
3677 double write_back_delay_us)
3678 {
3679 unsigned int vblank_size = 0;
3680 unsigned int max_vstartup_lines = 0;
3681
3682 double line_time_us = (double)timing->h_total / ((double)timing->pixel_clock_khz / 1000);
3683 unsigned int vblank_actual = timing->v_total - timing->v_active;
3684 unsigned int vblank_nom_default_in_line = (unsigned int)math_floor2((double)vblank_nom_default_us / line_time_us, 1.0);
3685 unsigned int vblank_avail = (timing->vblank_nom == 0) ? vblank_nom_default_in_line : (unsigned int)timing->vblank_nom;
3686
3687 vblank_size = (unsigned int)math_min2(vblank_actual, vblank_avail);
3688
3689 if (timing->interlaced && !ptoi_supported)
3690 max_vstartup_lines = (unsigned int)(math_floor2((vblank_size - 1) / 2.0, 1.0));
3691 else
3692 max_vstartup_lines = vblank_size - (unsigned int)math_max2(1.0, math_ceil2(write_back_delay_us / line_time_us, 1.0));
3693 #ifdef __DML_VBA_DEBUG__
3694 DML_LOG_VERBOSE("DML::%s: VBlankNom = %lu\n", __func__, timing->vblank_nom);
3695 DML_LOG_VERBOSE("DML::%s: vblank_nom_default_us = %u\n", __func__, vblank_nom_default_us);
3696 DML_LOG_VERBOSE("DML::%s: line_time_us = %f\n", __func__, line_time_us);
3697 DML_LOG_VERBOSE("DML::%s: vblank_actual = %u\n", __func__, vblank_actual);
3698 DML_LOG_VERBOSE("DML::%s: vblank_avail = %u\n", __func__, vblank_avail);
3699 DML_LOG_VERBOSE("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines);
3700 #endif
3701 max_vstartup_lines = (unsigned int)math_min2(max_vstartup_lines, DML_MAX_VSTARTUP_START);
3702 return max_vstartup_lines;
3703 }
3704
CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params * p)3705 static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch *scratch,
3706 struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *p)
3707 {
3708 unsigned int MaximumSwathHeightY[DML2_MAX_PLANES] = { 0 };
3709 unsigned int MaximumSwathHeightC[DML2_MAX_PLANES] = { 0 };
3710 unsigned int RoundedUpSwathSizeBytesY[DML2_MAX_PLANES] = { 0 };
3711 unsigned int RoundedUpSwathSizeBytesC[DML2_MAX_PLANES] = { 0 };
3712 unsigned int SwathWidthSingleDPP[DML2_MAX_PLANES] = { 0 };
3713 unsigned int SwathWidthSingleDPPChroma[DML2_MAX_PLANES] = { 0 };
3714
3715 unsigned int TotalActiveDPP = 0;
3716 bool NoChromaOrLinear = true;
3717 unsigned int SurfaceDoingUnboundedRequest = 0;
3718 unsigned int DETBufferSizeInKByteForSwathCalculation;
3719
3720 const long TTUFIFODEPTH = 8;
3721 const long MAXIMUMCOMPRESSION = 4;
3722
3723 #ifdef __DML_VBA_DEBUG__
3724 DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP);
3725 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3726 DML_LOG_VERBOSE("DML::%s: DPPPerSurface[%u] = %u\n", __func__, k, p->DPPPerSurface[k]);
3727 }
3728 #endif
3729 CalculateSwathWidth(
3730 p->display_cfg,
3731 p->ForceSingleDPP,
3732 p->NumberOfActiveSurfaces,
3733 p->ODMMode,
3734 p->BytePerPixY,
3735 p->BytePerPixC,
3736 p->Read256BytesBlockHeightY,
3737 p->Read256BytesBlockHeightC,
3738 p->Read256BytesBlockWidthY,
3739 p->Read256BytesBlockWidthC,
3740 p->surf_linear128_l,
3741 p->surf_linear128_c,
3742 p->DPPPerSurface,
3743
3744 // Output
3745 p->req_per_swath_ub_l,
3746 p->req_per_swath_ub_c,
3747 SwathWidthSingleDPP,
3748 SwathWidthSingleDPPChroma,
3749 p->SwathWidth,
3750 p->SwathWidthChroma,
3751 MaximumSwathHeightY,
3752 MaximumSwathHeightC,
3753 p->swath_width_luma_ub,
3754 p->swath_width_chroma_ub);
3755
3756 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3757 p->full_swath_bytes_l[k] = (unsigned int)(p->swath_width_luma_ub[k] * p->BytePerPixDETY[k] * MaximumSwathHeightY[k]);
3758 p->full_swath_bytes_c[k] = (unsigned int)(p->swath_width_chroma_ub[k] * p->BytePerPixDETC[k] * MaximumSwathHeightC[k]);
3759 #ifdef __DML_VBA_DEBUG__
3760 DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]);
3761 DML_LOG_VERBOSE("DML::%s: k=%u swath_width_luma_ub = %u\n", __func__, k, p->swath_width_luma_ub[k]);
3762 DML_LOG_VERBOSE("DML::%s: k=%u BytePerPixDETY = %f\n", __func__, k, p->BytePerPixDETY[k]);
3763 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightY = %u\n", __func__, k, MaximumSwathHeightY[k]);
3764 DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]);
3765 DML_LOG_VERBOSE("DML::%s: k=%u swath_width_chroma_ub = %u\n", __func__, k, p->swath_width_chroma_ub[k]);
3766 DML_LOG_VERBOSE("DML::%s: k=%u BytePerPixDETC = %f\n", __func__, k, p->BytePerPixDETC[k]);
3767 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightC = %u\n", __func__, k, MaximumSwathHeightC[k]);
3768 DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]);
3769 #endif
3770 if (p->display_cfg->plane_descriptors[k].pixel_format == dml2_420_10) {
3771 p->full_swath_bytes_l[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_l[k], 256));
3772 p->full_swath_bytes_c[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_c[k], 256));
3773 }
3774 }
3775
3776 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3777 TotalActiveDPP = TotalActiveDPP + (p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]);
3778 if (p->DPPPerSurface[k] > 0)
3779 SurfaceDoingUnboundedRequest = k;
3780 if (dml_is_420(p->display_cfg->plane_descriptors[k].pixel_format) || p->display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha
3781 || p->display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) {
3782 NoChromaOrLinear = false;
3783 }
3784 }
3785
3786 *p->UnboundedRequestEnabled = UnboundedRequest(p->display_cfg->overrides.hw.force_unbounded_requesting.enable, p->display_cfg->overrides.hw.force_unbounded_requesting.value, TotalActiveDPP, NoChromaOrLinear);
3787
3788 CalculateDETBufferSize(
3789 &scratch->CalculateDETBufferSize_locals,
3790 p->display_cfg,
3791 p->ForceSingleDPP,
3792 p->NumberOfActiveSurfaces,
3793 *p->UnboundedRequestEnabled,
3794 p->nomDETInKByte,
3795 p->MaxTotalDETInKByte,
3796 p->ConfigReturnBufferSizeInKByte,
3797 p->MinCompressedBufferSizeInKByte,
3798 p->ConfigReturnBufferSegmentSizeInkByte,
3799 p->CompressedBufferSegmentSizeInkByte,
3800 p->ReadBandwidthLuma,
3801 p->ReadBandwidthChroma,
3802 p->full_swath_bytes_l,
3803 p->full_swath_bytes_c,
3804 p->DPPPerSurface,
3805
3806 // Output
3807 p->DETBufferSizeInKByte, // per hubp pipe
3808 p->CompressedBufferSizeInkByte);
3809
3810 #ifdef __DML_VBA_DEBUG__
3811 DML_LOG_VERBOSE("DML::%s: TotalActiveDPP = %u\n", __func__, TotalActiveDPP);
3812 DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte);
3813 DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte);
3814 DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, *p->UnboundedRequestEnabled);
3815 DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte);
3816 #endif
3817
3818 *p->ViewportSizeSupport = true;
3819 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3820
3821 DETBufferSizeInKByteForSwathCalculation = (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 1024 : p->DETBufferSizeInKByte[k]);
3822 #ifdef __DML_VBA_DEBUG__
3823 DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation = %u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation);
3824 #endif
3825 if (p->display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) {
3826 p->SwathHeightY[k] = MaximumSwathHeightY[k];
3827 p->SwathHeightC[k] = MaximumSwathHeightC[k];
3828 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k];
3829 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k];
3830
3831 if (p->surf_linear128_l[k])
3832 p->request_size_bytes_luma[k] = 128;
3833 else
3834 p->request_size_bytes_luma[k] = 256;
3835
3836 if (p->surf_linear128_c[k])
3837 p->request_size_bytes_chroma[k] = 128;
3838 else
3839 p->request_size_bytes_chroma[k] = 256;
3840
3841 } else if (p->full_swath_bytes_l[k] + p->full_swath_bytes_c[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
3842 p->SwathHeightY[k] = MaximumSwathHeightY[k];
3843 p->SwathHeightC[k] = MaximumSwathHeightC[k];
3844 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k];
3845 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k];
3846 p->request_size_bytes_luma[k] = 256;
3847 p->request_size_bytes_chroma[k] = 256;
3848
3849 } else if (p->full_swath_bytes_l[k] >= 1.5 * p->full_swath_bytes_c[k] && p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
3850 p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
3851 p->SwathHeightC[k] = MaximumSwathHeightC[k];
3852 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2;
3853 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k];
3854 p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
3855 p->request_size_bytes_chroma[k] = 256;
3856
3857 } else if (p->full_swath_bytes_l[k] < 1.5 * p->full_swath_bytes_c[k] && p->full_swath_bytes_l[k] + p->full_swath_bytes_c[k] / 2 <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
3858 p->SwathHeightY[k] = MaximumSwathHeightY[k];
3859 p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
3860 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k];
3861 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2;
3862 p->request_size_bytes_luma[k] = 256;
3863 p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
3864
3865 } else {
3866 p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
3867 p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
3868 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2;
3869 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2;
3870 p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
3871 p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
3872 }
3873
3874 if (p->SwathHeightC[k] == 0)
3875 p->request_size_bytes_chroma[k] = 0;
3876
3877 if ((p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] / 2 > DETBufferSizeInKByteForSwathCalculation * 1024 / 2) ||
3878 p->SwathWidth[k] > p->MaximumSwathWidthLuma[k] || (p->SwathHeightC[k] > 0 && p->SwathWidthChroma[k] > p->MaximumSwathWidthChroma[k])) {
3879 *p->ViewportSizeSupport = false;
3880 DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l=%u\n", __func__, k, p->full_swath_bytes_l[k]);
3881 DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c=%u\n", __func__, k, p->full_swath_bytes_c[k]);
3882 DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation=%u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation);
3883 DML_LOG_VERBOSE("DML::%s: k=%u SwathWidth=%u\n", __func__, k, p->SwathWidth[k]);
3884 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, p->MaximumSwathWidthLuma[k]);
3885 DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthChroma=%d\n", __func__, k, p->SwathWidthChroma[k]);
3886 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, p->MaximumSwathWidthChroma[k]);
3887 p->ViewportSizeSupportPerSurface[k] = false;
3888 } else {
3889 p->ViewportSizeSupportPerSurface[k] = true;
3890 }
3891
3892 if (p->SwathHeightC[k] == 0) {
3893 #ifdef __DML_VBA_DEBUG__
3894 DML_LOG_VERBOSE("DML::%s: k=%u, All DET will be used for plane0\n", __func__, k);
3895 #endif
3896 p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024;
3897 p->DETBufferSizeC[k] = 0;
3898 } else if (RoundedUpSwathSizeBytesY[k] <= 1.5 * RoundedUpSwathSizeBytesC[k]) {
3899 #ifdef __DML_VBA_DEBUG__
3900 DML_LOG_VERBOSE("DML::%s: k=%u, Half DET will be used for plane0, and half for plane1\n", __func__, k);
3901 #endif
3902 p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024 / 2;
3903 p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 / 2;
3904 } else {
3905 #ifdef __DML_VBA_DEBUG__
3906 DML_LOG_VERBOSE("DML::%s: k=%u, 2/3 DET will be used for plane0, and 1/3 for plane1\n", __func__, k);
3907 #endif
3908 p->DETBufferSizeY[k] = (unsigned int)(math_floor2(p->DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024));
3909 p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 - p->DETBufferSizeY[k];
3910 }
3911
3912 #ifdef __DML_VBA_DEBUG__
3913 DML_LOG_VERBOSE("DML::%s: k=%u SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
3914 DML_LOG_VERBOSE("DML::%s: k=%u SwathHeightC = %u\n", __func__, k, p->SwathHeightC[k]);
3915 DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]);
3916 DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]);
3917 DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesY = %u\n", __func__, k, RoundedUpSwathSizeBytesY[k]);
3918 DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]);
3919 DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]);
3920 DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
3921 DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeC = %u\n", __func__, k, p->DETBufferSizeC[k]);
3922 DML_LOG_VERBOSE("DML::%s: k=%u ViewportSizeSupportPerSurface = %u\n", __func__, k, p->ViewportSizeSupportPerSurface[k]);
3923 #endif
3924
3925 }
3926
3927 *p->compbuf_reserved_space_64b = 2 * p->pixel_chunk_size_kbytes * 1024 / 64;
3928 if (*p->UnboundedRequestEnabled) {
3929 *p->compbuf_reserved_space_64b = (unsigned int)math_ceil2(math_max2(*p->compbuf_reserved_space_64b,
3930 (double)(p->rob_buffer_size_kbytes * 1024 / 64) - (double)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / (p->mrq_present ? MAXIMUMCOMPRESSION : 1) / 64)), 1.0);
3931 #ifdef __DML_VBA_DEBUG__
3932 DML_LOG_VERBOSE("DML::%s: RoundedUpSwathSizeBytesY[%d] = %u\n", __func__, SurfaceDoingUnboundedRequest, RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest]);
3933 DML_LOG_VERBOSE("DML::%s: rob_buffer_size_kbytes = %u\n", __func__, p->rob_buffer_size_kbytes);
3934 #endif
3935 }
3936 #ifdef __DML_VBA_DEBUG__
3937 DML_LOG_VERBOSE("DML::%s: compbuf_reserved_space_64b = %u\n", __func__, *p->compbuf_reserved_space_64b);
3938 #endif
3939
3940 *p->hw_debug5 = false;
3941 #ifdef ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE
3942 if (p->NumberOfActiveSurfaces > 1)
3943 *p->hw_debug5 = true;
3944 #else
3945 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3946 if (!(p->mrq_present) && (!(*p->UnboundedRequestEnabled)) && (TotalActiveDPP == 1)
3947 && p->display_cfg->plane_descriptors[k].surface.dcc.enable
3948 && ((p->rob_buffer_size_kbytes * 1024 * (p->mrq_present ? MAXIMUMCOMPRESSION : 1)
3949 + *p->CompressedBufferSizeInkByte * MAXIMUMCOMPRESSION * 1024) > TTUFIFODEPTH * (RoundedUpSwathSizeBytesY[k] + RoundedUpSwathSizeBytesC[k])))
3950 *p->hw_debug5 = true;
3951 #ifdef __DML_VBA_DEBUG__
3952 DML_LOG_VERBOSE("DML::%s: k=%u UnboundedRequestEnabled = %u\n", __func__, k, *p->UnboundedRequestEnabled);
3953 DML_LOG_VERBOSE("DML::%s: k=%u MAXIMUMCOMPRESSION = %lu\n", __func__, k, MAXIMUMCOMPRESSION);
3954 DML_LOG_VERBOSE("DML::%s: k=%u TTUFIFODEPTH = %lu\n", __func__, k, TTUFIFODEPTH);
3955 DML_LOG_VERBOSE("DML::%s: k=%u CompressedBufferSizeInkByte = %u\n", __func__, k, *p->CompressedBufferSizeInkByte);
3956 DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]);
3957 DML_LOG_VERBOSE("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5);
3958 #endif
3959 }
3960 #endif
3961 }
3962
DecideODMMode(unsigned int HActive,double MaxDispclk,unsigned int MaximumPixelsPerLinePerDSCUnit,enum dml2_output_format_class OutFormat,bool UseDSC,unsigned int NumberOfDSCSlices,double SurfaceRequiredDISPCLKWithoutODMCombine,double SurfaceRequiredDISPCLKWithODMCombineTwoToOne,double SurfaceRequiredDISPCLKWithODMCombineThreeToOne,double SurfaceRequiredDISPCLKWithODMCombineFourToOne)3963 static enum dml2_odm_mode DecideODMMode(unsigned int HActive,
3964 double MaxDispclk,
3965 unsigned int MaximumPixelsPerLinePerDSCUnit,
3966 enum dml2_output_format_class OutFormat,
3967 bool UseDSC,
3968 unsigned int NumberOfDSCSlices,
3969 double SurfaceRequiredDISPCLKWithoutODMCombine,
3970 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne,
3971 double SurfaceRequiredDISPCLKWithODMCombineThreeToOne,
3972 double SurfaceRequiredDISPCLKWithODMCombineFourToOne)
3973 {
3974 (void)SurfaceRequiredDISPCLKWithODMCombineFourToOne;
3975 enum dml2_odm_mode MinimumRequiredODMModeForMaxDispClock;
3976 enum dml2_odm_mode MinimumRequiredODMModeForMaxDSCHActive;
3977 enum dml2_odm_mode MinimumRequiredODMModeForMax420HActive;
3978 enum dml2_odm_mode ODMMode = dml2_odm_mode_bypass;
3979
3980 MinimumRequiredODMModeForMaxDispClock =
3981 (SurfaceRequiredDISPCLKWithoutODMCombine <= MaxDispclk) ? dml2_odm_mode_bypass :
3982 (SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= MaxDispclk) ? dml2_odm_mode_combine_2to1 :
3983 (SurfaceRequiredDISPCLKWithODMCombineThreeToOne <= MaxDispclk) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1;
3984 if (ODMMode < MinimumRequiredODMModeForMaxDispClock)
3985 ODMMode = MinimumRequiredODMModeForMaxDispClock;
3986
3987 if (UseDSC) {
3988 MinimumRequiredODMModeForMaxDSCHActive =
3989 (HActive <= 1 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_bypass :
3990 (HActive <= 2 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_combine_2to1 :
3991 (HActive <= 3 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1;
3992 if (ODMMode < MinimumRequiredODMModeForMaxDSCHActive)
3993 ODMMode = MinimumRequiredODMModeForMaxDSCHActive;
3994 }
3995
3996 if (OutFormat == dml2_420) {
3997 MinimumRequiredODMModeForMax420HActive =
3998 (HActive <= 1 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_bypass :
3999 (HActive <= 2 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_combine_2to1 :
4000 (HActive <= 3 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1;
4001 if (ODMMode < MinimumRequiredODMModeForMax420HActive)
4002 ODMMode = MinimumRequiredODMModeForMax420HActive;
4003 }
4004
4005 if (UseDSC) {
4006 if (ODMMode == dml2_odm_mode_bypass && NumberOfDSCSlices > 4)
4007 ODMMode = dml2_odm_mode_combine_2to1;
4008 if (ODMMode == dml2_odm_mode_combine_2to1 && NumberOfDSCSlices > 8)
4009 ODMMode = dml2_odm_mode_combine_3to1;
4010 if (ODMMode == dml2_odm_mode_combine_3to1 && NumberOfDSCSlices != 12)
4011 ODMMode = dml2_odm_mode_combine_4to1;
4012 }
4013
4014 return ODMMode;
4015 }
4016
CalculateODMConstraints(enum dml2_odm_mode ODMUse,double SurfaceRequiredDISPCLKWithoutODMCombine,double SurfaceRequiredDISPCLKWithODMCombineTwoToOne,double SurfaceRequiredDISPCLKWithODMCombineThreeToOne,double SurfaceRequiredDISPCLKWithODMCombineFourToOne,unsigned int MaximumPixelsPerLinePerDSCUnit,double * DISPCLKRequired,unsigned int * NumberOfDPPRequired,unsigned int * MaxHActiveForDSC,unsigned int * MaxDSCSlices,unsigned int * MaxHActiveFor420)4017 static void CalculateODMConstraints(
4018 enum dml2_odm_mode ODMUse,
4019 double SurfaceRequiredDISPCLKWithoutODMCombine,
4020 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne,
4021 double SurfaceRequiredDISPCLKWithODMCombineThreeToOne,
4022 double SurfaceRequiredDISPCLKWithODMCombineFourToOne,
4023 unsigned int MaximumPixelsPerLinePerDSCUnit,
4024 /* Output */
4025 double *DISPCLKRequired,
4026 unsigned int *NumberOfDPPRequired,
4027 unsigned int *MaxHActiveForDSC,
4028 unsigned int *MaxDSCSlices,
4029 unsigned int *MaxHActiveFor420)
4030 {
4031 switch (ODMUse) {
4032 case dml2_odm_mode_combine_2to1:
4033 *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
4034 *NumberOfDPPRequired = 2;
4035 break;
4036 case dml2_odm_mode_combine_3to1:
4037 *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineThreeToOne;
4038 *NumberOfDPPRequired = 3;
4039 break;
4040 case dml2_odm_mode_combine_4to1:
4041 *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
4042 *NumberOfDPPRequired = 4;
4043 break;
4044 case dml2_odm_mode_auto:
4045 case dml2_odm_mode_split_1to2:
4046 case dml2_odm_mode_mso_1to2:
4047 case dml2_odm_mode_mso_1to4:
4048 case dml2_odm_mode_bypass:
4049 default:
4050 *DISPCLKRequired = SurfaceRequiredDISPCLKWithoutODMCombine;
4051 *NumberOfDPPRequired = 1;
4052 break;
4053 }
4054 *MaxHActiveForDSC = *NumberOfDPPRequired * MaximumPixelsPerLinePerDSCUnit;
4055 *MaxDSCSlices = *NumberOfDPPRequired * DML_MAX_NUM_OF_SLICES_PER_DSC;
4056 *MaxHActiveFor420 = *NumberOfDPPRequired * DML2_MAX_FMT_420_BUFFER_WIDTH;
4057 }
4058
ValidateODMMode(enum dml2_odm_mode ODMMode,double MaxDispclk,unsigned int HActive,enum dml2_output_format_class OutFormat,bool UseDSC,unsigned int NumberOfDSCSlices,unsigned int TotalNumberOfActiveDPP,unsigned int TotalNumberOfActiveOPP,unsigned int MaxNumDPP,unsigned int MaxNumOPP,double DISPCLKRequired,unsigned int NumberOfDPPRequired,unsigned int MaxHActiveForDSC,unsigned int MaxDSCSlices,unsigned int MaxHActiveFor420)4059 static bool ValidateODMMode(enum dml2_odm_mode ODMMode,
4060 double MaxDispclk,
4061 unsigned int HActive,
4062 enum dml2_output_format_class OutFormat,
4063 bool UseDSC,
4064 unsigned int NumberOfDSCSlices,
4065 unsigned int TotalNumberOfActiveDPP,
4066 unsigned int TotalNumberOfActiveOPP,
4067 unsigned int MaxNumDPP,
4068 unsigned int MaxNumOPP,
4069 double DISPCLKRequired,
4070 unsigned int NumberOfDPPRequired,
4071 unsigned int MaxHActiveForDSC,
4072 unsigned int MaxDSCSlices,
4073 unsigned int MaxHActiveFor420)
4074 {
4075 bool are_odm_segments_symmetrical = (ODMMode == dml2_odm_mode_combine_3to1) ? UseDSC : true;
4076 bool is_max_dsc_slice_required = (ODMMode == dml2_odm_mode_combine_3to1);
4077 unsigned int pixels_per_clock_cycle = (OutFormat == dml2_420 || OutFormat == dml2_n422) ? 2 : 1;
4078 unsigned int h_timing_div_mode =
4079 (ODMMode == dml2_odm_mode_combine_4to1 || ODMMode == dml2_odm_mode_combine_3to1) ? 4 :
4080 (ODMMode == dml2_odm_mode_combine_2to1) ? 2 : pixels_per_clock_cycle;
4081
4082 if (DISPCLKRequired > MaxDispclk)
4083 return false;
4084 if ((TotalNumberOfActiveDPP + NumberOfDPPRequired) > MaxNumDPP || (TotalNumberOfActiveOPP + NumberOfDPPRequired) > MaxNumOPP)
4085 return false;
4086 if (are_odm_segments_symmetrical) {
4087 if (HActive % (NumberOfDPPRequired * pixels_per_clock_cycle))
4088 return false;
4089 }
4090 if (HActive % h_timing_div_mode)
4091 /*
4092 * TODO - OTG_H_TOTAL, OTG_H_BLANK_START/END and
4093 * OTG_H_SYNC_A_START/END all need to be visible by h timing div
4094 * mode. This logic only checks H active.
4095 */
4096 return false;
4097
4098 if (UseDSC) {
4099 if (HActive > MaxHActiveForDSC)
4100 return false;
4101 if (NumberOfDSCSlices > MaxDSCSlices)
4102 return false;
4103 if (HActive % NumberOfDSCSlices)
4104 return false;
4105 if (NumberOfDSCSlices % NumberOfDPPRequired)
4106 return false;
4107 if (is_max_dsc_slice_required) {
4108 if (NumberOfDSCSlices != MaxDSCSlices)
4109 return false;
4110 }
4111 }
4112
4113 if (OutFormat == dml2_420) {
4114 if (HActive > MaxHActiveFor420)
4115 return false;
4116 }
4117
4118 return true;
4119 }
4120
CalculateODMMode(unsigned int MaximumPixelsPerLinePerDSCUnit,unsigned int HActive,enum dml2_output_format_class OutFormat,enum dml2_output_encoder_class Output,enum dml2_odm_mode ODMUse,double MaxDispclk,bool DSCEnable,unsigned int TotalNumberOfActiveDPP,unsigned int TotalNumberOfActiveOPP,unsigned int MaxNumDPP,unsigned int MaxNumOPP,double PixelClock,unsigned int NumberOfDSCSlices,bool * TotalAvailablePipesSupport,unsigned int * NumberOfDPP,enum dml2_odm_mode * ODMMode,double * RequiredDISPCLKPerSurface)4121 static noinline_for_stack void CalculateODMMode(
4122 unsigned int MaximumPixelsPerLinePerDSCUnit,
4123 unsigned int HActive,
4124 enum dml2_output_format_class OutFormat,
4125 enum dml2_output_encoder_class Output,
4126 enum dml2_odm_mode ODMUse,
4127 double MaxDispclk,
4128 bool DSCEnable,
4129 unsigned int TotalNumberOfActiveDPP,
4130 unsigned int TotalNumberOfActiveOPP,
4131 unsigned int MaxNumDPP,
4132 unsigned int MaxNumOPP,
4133 double PixelClock,
4134 unsigned int NumberOfDSCSlices,
4135
4136 // Output
4137 bool *TotalAvailablePipesSupport,
4138 unsigned int *NumberOfDPP,
4139 enum dml2_odm_mode *ODMMode,
4140 double *RequiredDISPCLKPerSurface)
4141 {
4142 double SurfaceRequiredDISPCLKWithoutODMCombine;
4143 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
4144 double SurfaceRequiredDISPCLKWithODMCombineThreeToOne;
4145 double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
4146 double DISPCLKRequired;
4147 unsigned int NumberOfDPPRequired;
4148 unsigned int MaxHActiveForDSC;
4149 unsigned int MaxDSCSlices;
4150 unsigned int MaxHActiveFor420;
4151 bool success;
4152 bool UseDSC = DSCEnable && (NumberOfDSCSlices > 0);
4153 enum dml2_odm_mode DecidedODMMode;
4154 bool isTMDS420 = (OutFormat == dml2_420 && Output == dml2_hdmi);
4155
4156 SurfaceRequiredDISPCLKWithoutODMCombine = CalculateRequiredDispclk(dml2_odm_mode_bypass, PixelClock, isTMDS420);
4157 SurfaceRequiredDISPCLKWithODMCombineTwoToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_2to1, PixelClock, isTMDS420);
4158 SurfaceRequiredDISPCLKWithODMCombineThreeToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_3to1, PixelClock, isTMDS420);
4159 SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_4to1, PixelClock, isTMDS420);
4160 #ifdef __DML_VBA_DEBUG__
4161 DML_LOG_VERBOSE("DML::%s: ODMUse = %d\n", __func__, ODMUse);
4162 DML_LOG_VERBOSE("DML::%s: Output = %d\n", __func__, Output);
4163 DML_LOG_VERBOSE("DML::%s: DSCEnable = %d\n", __func__, DSCEnable);
4164 DML_LOG_VERBOSE("DML::%s: MaxDispclk = %f\n", __func__, MaxDispclk);
4165 DML_LOG_VERBOSE("DML::%s: MaximumPixelsPerLinePerDSCUnit = %d\n", __func__, MaximumPixelsPerLinePerDSCUnit);
4166 DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithoutODMCombine = %f\n", __func__, SurfaceRequiredDISPCLKWithoutODMCombine);
4167 DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineTwoToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineTwoToOne);
4168 DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineThreeToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineThreeToOne);
4169 DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineFourToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineFourToOne);
4170 #endif
4171 if (ODMUse == dml2_odm_mode_auto)
4172 DecidedODMMode = DecideODMMode(HActive,
4173 MaxDispclk,
4174 MaximumPixelsPerLinePerDSCUnit,
4175 OutFormat,
4176 UseDSC,
4177 NumberOfDSCSlices,
4178 SurfaceRequiredDISPCLKWithoutODMCombine,
4179 SurfaceRequiredDISPCLKWithODMCombineTwoToOne,
4180 SurfaceRequiredDISPCLKWithODMCombineThreeToOne,
4181 SurfaceRequiredDISPCLKWithODMCombineFourToOne);
4182 else
4183 DecidedODMMode = ODMUse;
4184 CalculateODMConstraints(DecidedODMMode,
4185 SurfaceRequiredDISPCLKWithoutODMCombine,
4186 SurfaceRequiredDISPCLKWithODMCombineTwoToOne,
4187 SurfaceRequiredDISPCLKWithODMCombineThreeToOne,
4188 SurfaceRequiredDISPCLKWithODMCombineFourToOne,
4189 MaximumPixelsPerLinePerDSCUnit,
4190 &DISPCLKRequired,
4191 &NumberOfDPPRequired,
4192 &MaxHActiveForDSC,
4193 &MaxDSCSlices,
4194 &MaxHActiveFor420);
4195 success = ValidateODMMode(DecidedODMMode,
4196 MaxDispclk,
4197 HActive,
4198 OutFormat,
4199 UseDSC,
4200 NumberOfDSCSlices,
4201 TotalNumberOfActiveDPP,
4202 TotalNumberOfActiveOPP,
4203 MaxNumDPP,
4204 MaxNumOPP,
4205 DISPCLKRequired,
4206 NumberOfDPPRequired,
4207 MaxHActiveForDSC,
4208 MaxDSCSlices,
4209 MaxHActiveFor420);
4210
4211 *ODMMode = DecidedODMMode;
4212 *TotalAvailablePipesSupport = success;
4213 *NumberOfDPP = NumberOfDPPRequired;
4214 *RequiredDISPCLKPerSurface = success ? DISPCLKRequired : 0;
4215 #ifdef __DML_VBA_DEBUG__
4216 DML_LOG_VERBOSE("DML::%s: ODMMode = %d\n", __func__, *ODMMode);
4217 DML_LOG_VERBOSE("DML::%s: NumberOfDPP = %d\n", __func__, *NumberOfDPP);
4218 DML_LOG_VERBOSE("DML::%s: TotalAvailablePipesSupport = %d\n", __func__, *TotalAvailablePipesSupport);
4219 DML_LOG_VERBOSE("DML::%s: RequiredDISPCLKPerSurface = %f\n", __func__, *RequiredDISPCLKPerSurface);
4220 #endif
4221 }
4222
CalculateOutputLink(struct dml2_core_internal_scratch * s,double PHYCLK,double PHYCLKD18,double PHYCLKD32,double Downspreading,enum dml2_output_encoder_class Output,enum dml2_output_format_class OutputFormat,unsigned int HTotal,unsigned int HActive,double PixelClockBackEnd,double ForcedOutputLinkBPP,unsigned int DSCInputBitPerComponent,unsigned int NumberOfDSCSlices,double AudioSampleRate,unsigned int AudioSampleLayout,enum dml2_odm_mode ODMModeNoDSC,enum dml2_odm_mode ODMModeDSC,enum dml2_dsc_enable_option DSCEnable,unsigned int OutputLinkDPLanes,enum dml2_output_link_dp_rate OutputLinkDPRate,bool * RequiresDSC,bool * RequiresFEC,double * OutBpp,enum dml2_core_internal_output_type * OutputType,enum dml2_core_internal_output_type_rate * OutputRate,unsigned int * RequiredSlots)4223 static noinline_for_stack void CalculateOutputLink(
4224 struct dml2_core_internal_scratch *s,
4225 double PHYCLK,
4226 double PHYCLKD18,
4227 double PHYCLKD32,
4228 double Downspreading,
4229 enum dml2_output_encoder_class Output,
4230 enum dml2_output_format_class OutputFormat,
4231 unsigned int HTotal,
4232 unsigned int HActive,
4233 double PixelClockBackEnd,
4234 double ForcedOutputLinkBPP,
4235 unsigned int DSCInputBitPerComponent,
4236 unsigned int NumberOfDSCSlices,
4237 double AudioSampleRate,
4238 unsigned int AudioSampleLayout,
4239 enum dml2_odm_mode ODMModeNoDSC,
4240 enum dml2_odm_mode ODMModeDSC,
4241 enum dml2_dsc_enable_option DSCEnable,
4242 unsigned int OutputLinkDPLanes,
4243 enum dml2_output_link_dp_rate OutputLinkDPRate,
4244
4245 // Output
4246 bool *RequiresDSC,
4247 bool *RequiresFEC,
4248 double *OutBpp,
4249 enum dml2_core_internal_output_type *OutputType,
4250 enum dml2_core_internal_output_type_rate *OutputRate,
4251 unsigned int *RequiredSlots)
4252 {
4253 bool LinkDSCEnable;
4254 unsigned int dummy;
4255 *RequiresDSC = false;
4256 *RequiresFEC = false;
4257 *OutBpp = 0;
4258
4259 *OutputType = dml2_core_internal_output_type_unknown;
4260 *OutputRate = dml2_core_internal_output_rate_unknown;
4261
4262 #ifdef __DML_VBA_DEBUG__
4263 DML_LOG_VERBOSE("DML::%s: DSCEnable = %u (dis, en, en_if_necessary)\n", __func__, DSCEnable);
4264 DML_LOG_VERBOSE("DML::%s: PHYCLK = %f\n", __func__, PHYCLK);
4265 DML_LOG_VERBOSE("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd);
4266 DML_LOG_VERBOSE("DML::%s: AudioSampleRate = %f\n", __func__, AudioSampleRate);
4267 DML_LOG_VERBOSE("DML::%s: HActive = %u\n", __func__, HActive);
4268 DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal);
4269 DML_LOG_VERBOSE("DML::%s: ODMModeNoDSC = %u\n", __func__, ODMModeNoDSC);
4270 DML_LOG_VERBOSE("DML::%s: ODMModeDSC = %u\n", __func__, ODMModeDSC);
4271 DML_LOG_VERBOSE("DML::%s: ForcedOutputLinkBPP = %f\n", __func__, ForcedOutputLinkBPP);
4272 DML_LOG_VERBOSE("DML::%s: Output (encoder) = %u\n", __func__, Output);
4273 DML_LOG_VERBOSE("DML::%s: OutputLinkDPRate = %u\n", __func__, OutputLinkDPRate);
4274 #endif
4275 {
4276 if (Output == dml2_hdmi) {
4277 *RequiresDSC = false;
4278 *RequiresFEC = false;
4279 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, math_min2(600, PHYCLK) * 10, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, false, Output,
4280 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4281 //OutputTypeAndRate = "HDMI";
4282 *OutputType = dml2_core_internal_output_type_hdmi;
4283 } else if (Output == dml2_dp || Output == dml2_dp2p0 || Output == dml2_edp) {
4284 if (DSCEnable == dml2_dsc_enable) {
4285 *RequiresDSC = true;
4286 LinkDSCEnable = true;
4287 if (Output == dml2_dp || Output == dml2_dp2p0) {
4288 *RequiresFEC = true;
4289 } else {
4290 *RequiresFEC = false;
4291 }
4292 } else {
4293 *RequiresDSC = false;
4294 LinkDSCEnable = false;
4295 if (Output == dml2_dp2p0) {
4296 *RequiresFEC = true;
4297 } else {
4298 *RequiresFEC = false;
4299 }
4300 }
4301 if (Output == dml2_dp2p0) {
4302 *OutBpp = 0;
4303 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr10) && PHYCLKD32 >= 10000.0 / 32) {
4304 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4305 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4306 if (*OutBpp == 0 && PHYCLKD32 < 13500.0 / 32 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4307 *RequiresDSC = true;
4308 LinkDSCEnable = true;
4309 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4310 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4311 }
4312 //OutputTypeAndRate = Output & " UHBR10";
4313 *OutputType = dml2_core_internal_output_type_dp2p0;
4314 *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr10;
4315 }
4316 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr13p5) && *OutBpp == 0 && PHYCLKD32 >= 13500.0 / 32) {
4317 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4318 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4319
4320 if (*OutBpp == 0 && PHYCLKD32 < 20000.0 / 32 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4321 *RequiresDSC = true;
4322 LinkDSCEnable = true;
4323 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4324 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4325 }
4326 //OutputTypeAndRate = Output & " UHBR13p5";
4327 *OutputType = dml2_core_internal_output_type_dp2p0;
4328 *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr13p5;
4329 }
4330 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr20) && *OutBpp == 0 && PHYCLKD32 >= 20000.0 / 32) {
4331 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4332 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4333 if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4334 *RequiresDSC = true;
4335 LinkDSCEnable = true;
4336 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4337 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4338 }
4339 //OutputTypeAndRate = Output & " UHBR20";
4340 *OutputType = dml2_core_internal_output_type_dp2p0;
4341 *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr20;
4342 }
4343 } else { // output is dp or edp
4344 *OutBpp = 0;
4345 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr) && PHYCLK >= 270) {
4346 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4347 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4348 if (*OutBpp == 0 && PHYCLK < 540 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4349 *RequiresDSC = true;
4350 LinkDSCEnable = true;
4351 if (Output == dml2_dp) {
4352 *RequiresFEC = true;
4353 }
4354 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4355 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4356 }
4357 //OutputTypeAndRate = Output & " HBR";
4358 *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp;
4359 *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr;
4360 }
4361 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr2) && *OutBpp == 0 && PHYCLK >= 540) {
4362 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4363 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4364
4365 if (*OutBpp == 0 && PHYCLK < 810 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4366 *RequiresDSC = true;
4367 LinkDSCEnable = true;
4368 if (Output == dml2_dp) {
4369 *RequiresFEC = true;
4370 }
4371 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4372 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4373 }
4374 //OutputTypeAndRate = Output & " HBR2";
4375 *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp;
4376 *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr2;
4377 }
4378 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr3) && *OutBpp == 0 && PHYCLK >= 810) { // VBA_ERROR, vba code doesn't have hbr3 check
4379 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4380 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4381
4382 if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4383 *RequiresDSC = true;
4384 LinkDSCEnable = true;
4385 if (Output == dml2_dp) {
4386 *RequiresFEC = true;
4387 }
4388 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4389 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4390 }
4391 //OutputTypeAndRate = Output & " HBR3";
4392 *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp;
4393 *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr3;
4394 }
4395 }
4396 } else if (Output == dml2_hdmifrl) {
4397 if (DSCEnable == dml2_dsc_enable) {
4398 *RequiresDSC = true;
4399 LinkDSCEnable = true;
4400 *RequiresFEC = true;
4401 } else {
4402 *RequiresDSC = false;
4403 LinkDSCEnable = false;
4404 *RequiresFEC = false;
4405 }
4406 *OutBpp = 0;
4407 if (PHYCLKD18 >= 3000.0 / 18) {
4408 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 3000, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4409 //OutputTypeAndRate = Output & "3x3";
4410 *OutputType = dml2_core_internal_output_type_hdmifrl;
4411 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_3x3;
4412 }
4413 if (*OutBpp == 0 && PHYCLKD18 >= 6000.0 / 18) {
4414 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 6000, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4415 //OutputTypeAndRate = Output & "6x3";
4416 *OutputType = dml2_core_internal_output_type_hdmifrl;
4417 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_6x3;
4418 }
4419 if (*OutBpp == 0 && PHYCLKD18 >= 6000.0 / 18) {
4420 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 6000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4421 //OutputTypeAndRate = Output & "6x4";
4422 *OutputType = dml2_core_internal_output_type_hdmifrl;
4423 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_6x4;
4424 }
4425 if (*OutBpp == 0 && PHYCLKD18 >= 8000.0 / 18) {
4426 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 8000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4427 //OutputTypeAndRate = Output & "8x4";
4428 *OutputType = dml2_core_internal_output_type_hdmifrl;
4429 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_8x4;
4430 }
4431 if (*OutBpp == 0 && PHYCLKD18 >= 10000.0 / 18) {
4432 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 10000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4433 if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0 && PHYCLKD18 < 12000.0 / 18) {
4434 *RequiresDSC = true;
4435 LinkDSCEnable = true;
4436 *RequiresFEC = true;
4437 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 10000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4438 }
4439 //OutputTypeAndRate = Output & "10x4";
4440 *OutputType = dml2_core_internal_output_type_hdmifrl;
4441 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_10x4;
4442 }
4443 if (*OutBpp == 0 && PHYCLKD18 >= 12000.0 / 18) {
4444 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 12000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4445 if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4446 *RequiresDSC = true;
4447 LinkDSCEnable = true;
4448 *RequiresFEC = true;
4449 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 12000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4450 }
4451 //OutputTypeAndRate = Output & "12x4";
4452 *OutputType = dml2_core_internal_output_type_hdmifrl;
4453 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_12x4;
4454 }
4455 }
4456 }
4457 #ifdef __DML_VBA_DEBUG__
4458 DML_LOG_VERBOSE("DML::%s: RequiresDSC = %u\n", __func__, *RequiresDSC);
4459 DML_LOG_VERBOSE("DML::%s: RequiresFEC = %u\n", __func__, *RequiresFEC);
4460 DML_LOG_VERBOSE("DML::%s: OutBpp = %f\n", __func__, *OutBpp);
4461 #endif
4462 }
4463
CalculateWriteBackDISPCLK(enum dml2_source_format_class WritebackPixelFormat,double PixelClock,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackHTaps,unsigned int WritebackVTaps,unsigned int WritebackSourceWidth,unsigned int WritebackDestinationWidth,unsigned int HTotal,unsigned int WritebackLineBufferSize)4464 static double CalculateWriteBackDISPCLK(
4465 enum dml2_source_format_class WritebackPixelFormat,
4466 double PixelClock,
4467 double WritebackHRatio,
4468 double WritebackVRatio,
4469 unsigned int WritebackHTaps,
4470 unsigned int WritebackVTaps,
4471 unsigned int WritebackSourceWidth,
4472 unsigned int WritebackDestinationWidth,
4473 unsigned int HTotal,
4474 unsigned int WritebackLineBufferSize)
4475 {
4476 (void)WritebackPixelFormat;
4477 (void)WritebackVRatio;
4478 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4479
4480 DISPCLK_H = PixelClock * math_ceil2((double)WritebackHTaps / 8.0, 1) / WritebackHRatio;
4481 DISPCLK_V = PixelClock * (WritebackVTaps * math_ceil2((double)WritebackDestinationWidth / 6.0, 1) + 8.0) / (double)HTotal;
4482 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / (double)WritebackSourceWidth;
4483 return math_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
4484 }
4485
RequiredDTBCLK(bool DSCEnable,double PixelClock,enum dml2_output_format_class OutputFormat,double OutputBpp,unsigned int DSCSlices,unsigned int HTotal,unsigned int HActive,unsigned int AudioRate,unsigned int AudioLayout)4486 static double RequiredDTBCLK(
4487 bool DSCEnable,
4488 double PixelClock,
4489 enum dml2_output_format_class OutputFormat,
4490 double OutputBpp,
4491 unsigned int DSCSlices,
4492 unsigned int HTotal,
4493 unsigned int HActive,
4494 unsigned int AudioRate,
4495 unsigned int AudioLayout)
4496 {
4497 if (DSCEnable != true) {
4498 return math_max2(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
4499 } else {
4500 double PixelWordRate = PixelClock / (OutputFormat == dml2_444 ? 1 : 2);
4501 double HCActive = math_ceil2(DSCSlices * math_ceil2(OutputBpp * math_ceil2(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
4502 double HCBlank = 64 + 32 * math_ceil2(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
4503 double AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
4504 double HActiveTribyteRate = PixelWordRate * HCActive / HActive;
4505 return math_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
4506 }
4507 }
4508
DSCDelayRequirement(bool DSCEnabled,enum dml2_odm_mode ODMMode,unsigned int DSCInputBitPerComponent,double OutputBpp,unsigned int HActive,unsigned int HTotal,unsigned int NumberOfDSCSlices,enum dml2_output_format_class OutputFormat,enum dml2_output_encoder_class Output,double PixelClock,double PixelClockBackEnd)4509 static unsigned int DSCDelayRequirement(
4510 bool DSCEnabled,
4511 enum dml2_odm_mode ODMMode,
4512 unsigned int DSCInputBitPerComponent,
4513 double OutputBpp,
4514 unsigned int HActive,
4515 unsigned int HTotal,
4516 unsigned int NumberOfDSCSlices,
4517 enum dml2_output_format_class OutputFormat,
4518 enum dml2_output_encoder_class Output,
4519 double PixelClock,
4520 double PixelClockBackEnd)
4521 {
4522 unsigned int DSCDelayRequirement_val = 0;
4523 unsigned int NumberOfDSCSlicesFactor = 1;
4524
4525 if (DSCEnabled == true && OutputBpp != 0) {
4526
4527 if (ODMMode == dml2_odm_mode_combine_4to1)
4528 NumberOfDSCSlicesFactor = 4;
4529 else if (ODMMode == dml2_odm_mode_combine_3to1)
4530 NumberOfDSCSlicesFactor = 3;
4531 else if (ODMMode == dml2_odm_mode_combine_2to1)
4532 NumberOfDSCSlicesFactor = 2;
4533
4534 DSCDelayRequirement_val = NumberOfDSCSlicesFactor * (dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (unsigned int)(math_ceil2((double)HActive / (double)NumberOfDSCSlices, 1.0)),
4535 (NumberOfDSCSlices / NumberOfDSCSlicesFactor), OutputFormat, Output) + dscComputeDelay(OutputFormat, Output));
4536
4537 DSCDelayRequirement_val = (unsigned int)(DSCDelayRequirement_val + (HTotal - HActive) * math_ceil2((double)DSCDelayRequirement_val / (double)HActive, 1.0));
4538 DSCDelayRequirement_val = (unsigned int)(DSCDelayRequirement_val * PixelClock / PixelClockBackEnd);
4539
4540 } else {
4541 DSCDelayRequirement_val = 0;
4542 }
4543 #ifdef __DML_VBA_DEBUG__
4544 DML_LOG_VERBOSE("DML::%s: DSCEnabled= %u\n", __func__, DSCEnabled);
4545 DML_LOG_VERBOSE("DML::%s: ODMMode = %u\n", __func__, ODMMode);
4546 DML_LOG_VERBOSE("DML::%s: OutputBpp = %f\n", __func__, OutputBpp);
4547 DML_LOG_VERBOSE("DML::%s: HActive = %u\n", __func__, HActive);
4548 DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal);
4549 DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, PixelClock);
4550 DML_LOG_VERBOSE("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd);
4551 DML_LOG_VERBOSE("DML::%s: OutputFormat = %u\n", __func__, OutputFormat);
4552 DML_LOG_VERBOSE("DML::%s: DSCInputBitPerComponent = %u\n", __func__, DSCInputBitPerComponent);
4553 DML_LOG_VERBOSE("DML::%s: NumberOfDSCSlices = %u\n", __func__, NumberOfDSCSlices);
4554 DML_LOG_VERBOSE("DML::%s: DSCDelayRequirement_val = %u\n", __func__, DSCDelayRequirement_val);
4555 #endif
4556
4557 return DSCDelayRequirement_val;
4558 }
4559
CalculateSurfaceSizeInMall(const struct dml2_display_cfg * display_cfg,unsigned int NumberOfActiveSurfaces,unsigned int MALLAllocatedForDCN,unsigned int BytesPerPixelY[],unsigned int BytesPerPixelC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int ReadBlockWidthY[],unsigned int ReadBlockWidthC[],unsigned int ReadBlockHeightY[],unsigned int ReadBlockHeightC[],unsigned int SurfaceSizeInMALL[],bool * ExceededMALLSize)4560 static void CalculateSurfaceSizeInMall(
4561 const struct dml2_display_cfg *display_cfg,
4562 unsigned int NumberOfActiveSurfaces,
4563 unsigned int MALLAllocatedForDCN,
4564 unsigned int BytesPerPixelY[],
4565 unsigned int BytesPerPixelC[],
4566 unsigned int Read256BytesBlockWidthY[],
4567 unsigned int Read256BytesBlockWidthC[],
4568 unsigned int Read256BytesBlockHeightY[],
4569 unsigned int Read256BytesBlockHeightC[],
4570 unsigned int ReadBlockWidthY[],
4571 unsigned int ReadBlockWidthC[],
4572 unsigned int ReadBlockHeightY[],
4573 unsigned int ReadBlockHeightC[],
4574
4575 // Output
4576 unsigned int SurfaceSizeInMALL[],
4577 bool *ExceededMALLSize)
4578 {
4579 (void)Read256BytesBlockWidthY;
4580 (void)Read256BytesBlockWidthC;
4581 (void)Read256BytesBlockHeightY;
4582 (void)Read256BytesBlockHeightC;
4583 unsigned int TotalSurfaceSizeInMALLForSS = 0;
4584 unsigned int TotalSurfaceSizeInMALLForSubVP = 0;
4585 unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024;
4586
4587 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
4588 const struct dml2_composition_cfg *composition = &display_cfg->plane_descriptors[k].composition;
4589 const struct dml2_surface_cfg *surface = &display_cfg->plane_descriptors[k].surface;
4590
4591 if (composition->viewport.stationary) {
4592 SurfaceSizeInMALL[k] = (unsigned int)(math_min2(math_ceil2((double)surface->plane0.width, ReadBlockWidthY[k]),
4593 math_floor2(composition->viewport.plane0.x_start + composition->viewport.plane0.width + ReadBlockWidthY[k] - 1, ReadBlockWidthY[k]) -
4594 math_floor2((double)composition->viewport.plane0.x_start, ReadBlockWidthY[k])) *
4595 math_min2(math_ceil2((double)surface->plane0.height, ReadBlockHeightY[k]),
4596 math_floor2((double)composition->viewport.plane0.y_start + composition->viewport.plane0.height + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
4597 math_floor2((double)composition->viewport.plane0.y_start, ReadBlockHeightY[k])) * BytesPerPixelY[k]);
4598
4599 if (ReadBlockWidthC[k] > 0) {
4600 SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] +
4601 math_min2(math_ceil2((double)surface->plane1.width, ReadBlockWidthC[k]),
4602 math_floor2((double)composition->viewport.plane1.y_start + composition->viewport.plane1.width + ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
4603 math_floor2((double)composition->viewport.plane1.y_start, ReadBlockWidthC[k])) *
4604 math_min2(math_ceil2((double)surface->plane1.height, ReadBlockHeightC[k]),
4605 math_floor2((double)composition->viewport.plane1.y_start + composition->viewport.plane1.height + ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
4606 math_floor2(composition->viewport.plane1.y_start, ReadBlockHeightC[k])) * BytesPerPixelC[k]);
4607 }
4608 } else {
4609 SurfaceSizeInMALL[k] = (unsigned int)(math_ceil2(math_min2(surface->plane0.width, composition->viewport.plane0.width + ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
4610 math_ceil2(math_min2(surface->plane0.height, composition->viewport.plane0.height + ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * BytesPerPixelY[k]);
4611 if (ReadBlockWidthC[k] > 0) {
4612 SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] +
4613 math_ceil2(math_min2(surface->plane1.width, composition->viewport.plane1.width + ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
4614 math_ceil2(math_min2(surface->plane1.height, composition->viewport.plane1.height + ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * BytesPerPixelC[k]);
4615 }
4616 }
4617 }
4618
4619 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
4620 /* SS and Subvp counted separate as they are never used at the same time */
4621 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))
4622 TotalSurfaceSizeInMALLForSubVP += SurfaceSizeInMALL[k];
4623 else if (display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable)
4624 TotalSurfaceSizeInMALLForSS += SurfaceSizeInMALL[k];
4625 }
4626
4627 *ExceededMALLSize = (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) ||
4628 (TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes);
4629
4630 #ifdef __DML_VBA_DEBUG__
4631 DML_LOG_VERBOSE("DML::%s: MALLAllocatedForDCN = %u\n", __func__, MALLAllocatedForDCN * 1024 * 1024);
4632 DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALLForSubVP = %u\n", __func__, TotalSurfaceSizeInMALLForSubVP);
4633 DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALLForSS = %u\n", __func__, TotalSurfaceSizeInMALLForSS);
4634 DML_LOG_VERBOSE("DML::%s: ExceededMALLSize = %u\n", __func__, *ExceededMALLSize);
4635 #endif
4636 }
4637
calculate_tdlut_setting(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_calculate_tdlut_setting_params * p)4638 static void calculate_tdlut_setting(
4639 struct dml2_core_internal_scratch *scratch,
4640 struct dml2_core_calcs_calculate_tdlut_setting_params *p)
4641 {
4642 (void)scratch;
4643 // locals
4644 unsigned int tdlut_bpe = 8;
4645 unsigned int tdlut_width;
4646 unsigned int tdlut_pitch_bytes;
4647 unsigned int tdlut_footprint_bytes;
4648 unsigned int vmpg_bytes;
4649 unsigned int tdlut_vmpg_per_frame;
4650 unsigned int tdlut_pte_req_per_frame;
4651 unsigned int tdlut_bytes_per_line;
4652 double tdlut_drain_rate;
4653 unsigned int tdlut_mpc_width;
4654 unsigned int tdlut_bytes_per_group_simple;
4655
4656 if (!p->setup_for_tdlut) {
4657 *p->tdlut_groups_per_2row_ub = 0;
4658 *p->tdlut_opt_time = 0;
4659 *p->tdlut_drain_time = 0;
4660 *p->tdlut_bytes_to_deliver = 0;
4661 *p->tdlut_bytes_per_group = 0;
4662 *p->tdlut_pte_bytes_per_frame = 0;
4663 *p->tdlut_bytes_per_frame = 0;
4664 return;
4665 }
4666
4667 if (p->tdlut_mpc_width_flag) {
4668 tdlut_mpc_width = 33;
4669 tdlut_bytes_per_group_simple = 39*256;
4670 } else {
4671 tdlut_mpc_width = 17;
4672 tdlut_bytes_per_group_simple = 10*256;
4673 }
4674
4675 vmpg_bytes = p->gpuvm_page_size_kbytes * 1024;
4676
4677 if (p->tdlut_addressing_mode == dml2_tdlut_simple_linear) {
4678 if (p->tdlut_width_mode == dml2_tdlut_width_17_cube)
4679 tdlut_width = 4916;
4680 else
4681 tdlut_width = 35940;
4682 } else {
4683 if (p->tdlut_width_mode == dml2_tdlut_width_17_cube)
4684 tdlut_width = 17;
4685 else // dml2_tdlut_width_33_cube
4686 tdlut_width = 33;
4687 }
4688
4689 if (p->is_gfx11)
4690 tdlut_pitch_bytes = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); //256B alignment
4691 else
4692 tdlut_pitch_bytes = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 128); //128B alignment
4693
4694 if (p->tdlut_addressing_mode == dml2_tdlut_sw_linear)
4695 tdlut_footprint_bytes = tdlut_pitch_bytes * tdlut_width * tdlut_width;
4696 else
4697 tdlut_footprint_bytes = tdlut_pitch_bytes;
4698
4699 if (!p->gpuvm_enable) {
4700 tdlut_vmpg_per_frame = 0;
4701 tdlut_pte_req_per_frame = 0;
4702 } else {
4703 tdlut_vmpg_per_frame = (unsigned int)math_ceil2(tdlut_footprint_bytes - 1, vmpg_bytes) / vmpg_bytes + 1;
4704 tdlut_pte_req_per_frame = (unsigned int)math_ceil2(tdlut_vmpg_per_frame - 1, 8) / 8 + 1;
4705 }
4706 tdlut_bytes_per_line = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 64); //64b request
4707 *p->tdlut_pte_bytes_per_frame = tdlut_pte_req_per_frame * 64;
4708
4709 if (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) {
4710 //the tdlut_width is either 17 or 33 but the 33x33x33 is subsampled every other line/slice
4711 *p->tdlut_bytes_per_frame = tdlut_bytes_per_line * tdlut_mpc_width * tdlut_mpc_width;
4712 *p->tdlut_bytes_per_group = tdlut_bytes_per_line * tdlut_mpc_width;
4713 //the delivery cycles is DispClk cycles per line * number of lines * number of slices
4714 //tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width;
4715 tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / math_ceil2(tdlut_mpc_width/2.0, 1);
4716 } else {
4717 //tdlut_addressing_mode = tdlut_simple_linear, 3dlut width should be 4*1229=4916 elements
4718 *p->tdlut_bytes_per_frame = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256);
4719 *p->tdlut_bytes_per_group = tdlut_bytes_per_group_simple;
4720 //tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_width/2.0, 1);
4721 tdlut_drain_rate = 2 * tdlut_bpe * p->dispclk_mhz;
4722 }
4723
4724 //the tdlut is fetched during the 2 row times of prefetch.
4725 if (p->setup_for_tdlut) {
4726 *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2((double) *p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1);
4727 if (*p->tdlut_bytes_per_frame > p->cursor_buffer_size * 1024)
4728 *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate;
4729 else
4730 *p->tdlut_opt_time = 0;
4731 *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate;
4732 *p->tdlut_bytes_to_deliver = (unsigned int) (p->cursor_buffer_size * 1024.0);
4733 }
4734
4735 #ifdef __DML_VBA_DEBUG__
4736 DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %d\n", __func__, p->gpuvm_enable);
4737 DML_LOG_VERBOSE("DML::%s: vmpg_bytes = %d\n", __func__, vmpg_bytes);
4738 DML_LOG_VERBOSE("DML::%s: tdlut_vmpg_per_frame = %d\n", __func__, tdlut_vmpg_per_frame);
4739 DML_LOG_VERBOSE("DML::%s: tdlut_pte_req_per_frame = %d\n", __func__, tdlut_pte_req_per_frame);
4740
4741 DML_LOG_VERBOSE("DML::%s: dispclk_mhz = %f\n", __func__, p->dispclk_mhz);
4742 DML_LOG_VERBOSE("DML::%s: tdlut_width = %u\n", __func__, tdlut_width);
4743 DML_LOG_VERBOSE("DML::%s: tdlut_addressing_mode = %s\n", __func__, (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) ? "sw_linear" : "simple_linear");
4744 DML_LOG_VERBOSE("DML::%s: tdlut_pitch_bytes = %u\n", __func__, tdlut_pitch_bytes);
4745 DML_LOG_VERBOSE("DML::%s: tdlut_footprint_bytes = %u\n", __func__, tdlut_footprint_bytes);
4746 DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_frame = %u\n", __func__, *p->tdlut_bytes_per_frame);
4747 DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_line = %u\n", __func__, tdlut_bytes_per_line);
4748 DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_group = %u\n", __func__, *p->tdlut_bytes_per_group);
4749 DML_LOG_VERBOSE("DML::%s: tdlut_drain_rate = %f\n", __func__, tdlut_drain_rate);
4750 DML_LOG_VERBOSE("DML::%s: tdlut_delivery_cycles = %u\n", __func__, p->tdlut_addressing_mode == dml2_tdlut_sw_linear ? (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width : (unsigned int)math_ceil2(tdlut_width/2.0, 1));
4751 DML_LOG_VERBOSE("DML::%s: tdlut_opt_time = %f\n", __func__, *p->tdlut_opt_time);
4752 DML_LOG_VERBOSE("DML::%s: tdlut_drain_time = %f\n", __func__, *p->tdlut_drain_time);
4753 DML_LOG_VERBOSE("DML::%s: tdlut_bytes_to_deliver = %d\n", __func__, *p->tdlut_bytes_to_deliver);
4754 DML_LOG_VERBOSE("DML::%s: tdlut_groups_per_2row_ub = %d\n", __func__, *p->tdlut_groups_per_2row_ub);
4755 #endif
4756 }
4757
CalculateTarb(const struct dml2_display_cfg * display_cfg,unsigned int PixelChunkSizeInKByte,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],unsigned int dpte_group_bytes[],unsigned int tdlut_bytes_per_group[],double HostVMInefficiencyFactor,double HostVMInefficiencyFactorPrefetch,unsigned int HostVMMinPageSize,double ReturnBW,unsigned int MetaChunkSize,double * Tarb,double * Tarb_prefetch)4758 static void CalculateTarb(
4759 const struct dml2_display_cfg *display_cfg,
4760 unsigned int PixelChunkSizeInKByte,
4761 unsigned int NumberOfActiveSurfaces,
4762 unsigned int NumberOfDPP[],
4763 unsigned int dpte_group_bytes[],
4764 unsigned int tdlut_bytes_per_group[],
4765 double HostVMInefficiencyFactor,
4766 double HostVMInefficiencyFactorPrefetch,
4767 unsigned int HostVMMinPageSize,
4768 double ReturnBW,
4769 unsigned int MetaChunkSize,
4770
4771 // output
4772 double *Tarb,
4773 double *Tarb_prefetch)
4774 {
4775 double extra_bytes = 0;
4776 double extra_bytes_prefetch = 0;
4777 double HostVMDynamicLevels = CalculateHostVMDynamicLevels(display_cfg->gpuvm_enable, display_cfg->hostvm_enable, HostVMMinPageSize, display_cfg->hostvm_max_non_cached_page_table_levels);
4778
4779 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
4780 extra_bytes = extra_bytes + (NumberOfDPP[k] * PixelChunkSizeInKByte * 1024);
4781
4782 if (display_cfg->plane_descriptors[k].surface.dcc.enable)
4783 extra_bytes = extra_bytes + (MetaChunkSize * 1024);
4784
4785 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut)
4786 extra_bytes = extra_bytes + tdlut_bytes_per_group[k];
4787 }
4788
4789 extra_bytes_prefetch = extra_bytes;
4790
4791 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
4792 if (display_cfg->gpuvm_enable == true) {
4793 extra_bytes = extra_bytes + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
4794 extra_bytes_prefetch = extra_bytes_prefetch + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactorPrefetch;
4795 }
4796 }
4797 *Tarb = extra_bytes / ReturnBW;
4798 *Tarb_prefetch = extra_bytes_prefetch / ReturnBW;
4799 #ifdef __DML_VBA_DEBUG__
4800 DML_LOG_VERBOSE("DML::%s: PixelChunkSizeInKByte = %d\n", __func__, PixelChunkSizeInKByte);
4801 DML_LOG_VERBOSE("DML::%s: MetaChunkSize = %d\n", __func__, MetaChunkSize);
4802 DML_LOG_VERBOSE("DML::%s: extra_bytes = %f\n", __func__, extra_bytes);
4803 DML_LOG_VERBOSE("DML::%s: extra_bytes_prefetch = %f\n", __func__, extra_bytes_prefetch);
4804 #endif
4805 }
4806
CalculateTWait(long reserved_vblank_time_ns,double UrgentLatency,double Ttrip,double g6_temp_read_blackout_us)4807 static double CalculateTWait(
4808 long reserved_vblank_time_ns,
4809 double UrgentLatency,
4810 double Ttrip,
4811 double g6_temp_read_blackout_us)
4812 {
4813 double TWait;
4814 double t_urg_trip = math_max2(UrgentLatency, Ttrip);
4815 TWait = math_max2(reserved_vblank_time_ns/1000.0, g6_temp_read_blackout_us) + t_urg_trip;
4816
4817 #ifdef __DML_VBA_DEBUG__
4818 DML_LOG_VERBOSE("DML::%s: reserved_vblank_time_ns = %ld\n", __func__, reserved_vblank_time_ns);
4819 DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
4820 DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, Ttrip);
4821 DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, TWait);
4822 #endif
4823 return TWait;
4824 }
4825
4826
CalculateVUpdateAndDynamicMetadataParameters(unsigned int MaxInterDCNTileRepeaters,double Dppclk,double Dispclk,double DCFClkDeepSleep,double PixelClock,unsigned int HTotal,unsigned int VBlank,unsigned int DynamicMetadataTransmittedBytes,unsigned int DynamicMetadataLinesBeforeActiveRequired,unsigned int InterlaceEnable,bool ProgressiveToInterlaceUnitInOPP,double * TSetup,double * Tdmbf,double * Tdmec,double * Tdmsks,unsigned int * VUpdateOffsetPix,unsigned int * VUpdateWidthPix,unsigned int * VReadyOffsetPix)4827 static void CalculateVUpdateAndDynamicMetadataParameters(
4828 unsigned int MaxInterDCNTileRepeaters,
4829 double Dppclk,
4830 double Dispclk,
4831 double DCFClkDeepSleep,
4832 double PixelClock,
4833 unsigned int HTotal,
4834 unsigned int VBlank,
4835 unsigned int DynamicMetadataTransmittedBytes,
4836 unsigned int DynamicMetadataLinesBeforeActiveRequired,
4837 unsigned int InterlaceEnable,
4838 bool ProgressiveToInterlaceUnitInOPP,
4839
4840 // Output
4841 double *TSetup,
4842 double *Tdmbf,
4843 double *Tdmec,
4844 double *Tdmsks,
4845 unsigned int *VUpdateOffsetPix,
4846 unsigned int *VUpdateWidthPix,
4847 unsigned int *VReadyOffsetPix)
4848 {
4849 double TotalRepeaterDelayTime;
4850 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
4851 *VUpdateWidthPix = (unsigned int)(math_ceil2((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0));
4852 *VReadyOffsetPix = (unsigned int)(math_ceil2(math_max2(150.0 / Dppclk, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0));
4853 *VUpdateOffsetPix = (unsigned int)(math_ceil2(HTotal / 4.0, 1.0));
4854 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
4855 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
4856 *Tdmec = HTotal / PixelClock;
4857
4858 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
4859 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
4860 } else {
4861 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
4862 }
4863 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
4864 *Tdmsks = *Tdmsks / 2;
4865 }
4866 #ifdef __DML_VBA_DEBUG__
4867 DML_LOG_VERBOSE("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired);
4868 DML_LOG_VERBOSE("DML::%s: VBlank = %u\n", __func__, VBlank);
4869 DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal);
4870 DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, PixelClock);
4871 DML_LOG_VERBOSE("DML::%s: Dppclk = %f\n", __func__, Dppclk);
4872 DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep);
4873 DML_LOG_VERBOSE("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters);
4874 DML_LOG_VERBOSE("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime);
4875
4876 DML_LOG_VERBOSE("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix);
4877 DML_LOG_VERBOSE("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix);
4878 DML_LOG_VERBOSE("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix);
4879
4880 DML_LOG_VERBOSE("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
4881 #endif
4882 }
4883
get_urgent_bandwidth_required(struct dml2_core_shared_get_urgent_bandwidth_required_locals * l,const struct dml2_display_cfg * display_cfg,enum dml2_core_internal_soc_state_type state_type,enum dml2_core_internal_bw_type bw_type,bool inc_flip_bw,bool use_qual_row_bw,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],double dcc_dram_bw_nom_overhead_factor_p0[],double dcc_dram_bw_nom_overhead_factor_p1[],double dcc_dram_bw_pref_overhead_factor_p0[],double dcc_dram_bw_pref_overhead_factor_p1[],double mall_prefetch_sdp_overhead_factor[],double mall_prefetch_dram_overhead_factor[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double PrefetchBandwidthMax[],double excess_vactive_fill_bw_l[],double excess_vactive_fill_bw_c[],double cursor_bw[],double dpte_row_bw[],double meta_row_bw[],double prefetch_cursor_bw[],double prefetch_vmrow_bw[],double flip_bw[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[],double surface_required_bw[],double surface_peak_required_bw[])4884 static double get_urgent_bandwidth_required(
4885 struct dml2_core_shared_get_urgent_bandwidth_required_locals *l,
4886 const struct dml2_display_cfg *display_cfg,
4887 enum dml2_core_internal_soc_state_type state_type,
4888 enum dml2_core_internal_bw_type bw_type,
4889 bool inc_flip_bw, // including flip bw
4890 bool use_qual_row_bw,
4891 unsigned int NumberOfActiveSurfaces,
4892 unsigned int NumberOfDPP[],
4893 double dcc_dram_bw_nom_overhead_factor_p0[],
4894 double dcc_dram_bw_nom_overhead_factor_p1[],
4895 double dcc_dram_bw_pref_overhead_factor_p0[],
4896 double dcc_dram_bw_pref_overhead_factor_p1[],
4897 double mall_prefetch_sdp_overhead_factor[],
4898 double mall_prefetch_dram_overhead_factor[],
4899 double ReadBandwidthLuma[],
4900 double ReadBandwidthChroma[],
4901 double PrefetchBandwidthLuma[],
4902 double PrefetchBandwidthChroma[],
4903 double PrefetchBandwidthMax[],
4904 double excess_vactive_fill_bw_l[],
4905 double excess_vactive_fill_bw_c[],
4906 double cursor_bw[],
4907 double dpte_row_bw[],
4908 double meta_row_bw[],
4909 double prefetch_cursor_bw[],
4910 double prefetch_vmrow_bw[],
4911 double flip_bw[],
4912 double UrgentBurstFactorLuma[],
4913 double UrgentBurstFactorChroma[],
4914 double UrgentBurstFactorCursor[],
4915 double UrgentBurstFactorLumaPre[],
4916 double UrgentBurstFactorChromaPre[],
4917 double UrgentBurstFactorCursorPre[],
4918 /* outputs */
4919 double surface_required_bw[],
4920 double surface_peak_required_bw[])
4921 {
4922 // set inc_flip_bw = 0 for total_dchub_urgent_read_bw_noflip calculation, 1 for total_dchub_urgent_read_bw as described in the MAS
4923 // set use_qual_row_bw = 1 to calculate using qualified row bandwidth, used for total_flip_bw calculation
4924
4925 memset(l, 0, sizeof(struct dml2_core_shared_get_urgent_bandwidth_required_locals));
4926
4927 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
4928 l->mall_svp_prefetch_factor = (state_type == dml2_core_internal_soc_state_svp_prefetch) ? (bw_type == dml2_core_internal_bw_dram ? mall_prefetch_dram_overhead_factor[k] : mall_prefetch_sdp_overhead_factor[k]) : 1.0;
4929 l->tmp_nom_adj_factor_p0 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p0[k] : 1.0) * l->mall_svp_prefetch_factor;
4930 l->tmp_nom_adj_factor_p1 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p1[k] : 1.0) * l->mall_svp_prefetch_factor;
4931 l->tmp_pref_adj_factor_p0 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_pref_overhead_factor_p0[k] : 1.0) * l->mall_svp_prefetch_factor;
4932 l->tmp_pref_adj_factor_p1 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_pref_overhead_factor_p1[k] : 1.0) * l->mall_svp_prefetch_factor;
4933
4934 l->adj_factor_p0 = UrgentBurstFactorLuma[k] * l->tmp_nom_adj_factor_p0;
4935 l->adj_factor_p1 = UrgentBurstFactorChroma[k] * l->tmp_nom_adj_factor_p1;
4936 l->adj_factor_cur = UrgentBurstFactorCursor[k];
4937 l->adj_factor_p0_pre = UrgentBurstFactorLumaPre[k] * l->tmp_pref_adj_factor_p0;
4938 l->adj_factor_p1_pre = UrgentBurstFactorChromaPre[k] * l->tmp_pref_adj_factor_p1;
4939 l->adj_factor_cur_pre = UrgentBurstFactorCursorPre[k];
4940
4941 bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]);
4942 bool exclude_this_plane = false;
4943
4944 // Exclude phantom pipe in bw calculation for non svp prefetch state
4945 if (state_type != dml2_core_internal_soc_state_svp_prefetch && is_phantom)
4946 exclude_this_plane = true;
4947
4948 // The qualified row bandwidth, qual_row_bw, accounts for the regular non-flip row bandwidth when there is no possible immediate flip or HostVM invalidation flip.
4949 // The qual_row_bw is zero if HostVM is possible and only non-zero and equal to row_bw(i) if immediate flip is not allowed for that pipe.
4950 if (use_qual_row_bw) {
4951 if (display_cfg->hostvm_enable)
4952 l->per_plane_flip_bw[k] = 0; // qual_row_bw
4953 else if (!display_cfg->plane_descriptors[k].immediate_flip)
4954 l->per_plane_flip_bw[k] = NumberOfDPP[k] * (dpte_row_bw[k] + meta_row_bw[k]);
4955 } else {
4956 // the final_flip_bw includes the regular row_bw when immediate flip is disallowed (and no HostVM)
4957 if ((!display_cfg->plane_descriptors[k].immediate_flip && !display_cfg->hostvm_enable) || !inc_flip_bw)
4958 l->per_plane_flip_bw[k] = NumberOfDPP[k] * (dpte_row_bw[k] + meta_row_bw[k]);
4959 else
4960 l->per_plane_flip_bw[k] = NumberOfDPP[k] * flip_bw[k];
4961 }
4962
4963 if (!exclude_this_plane) {
4964 l->vm_row_bw = NumberOfDPP[k] * prefetch_vmrow_bw[k];
4965 l->flip_and_active_bw = l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur;
4966 l->flip_and_prefetch_bw = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre;
4967 l->flip_and_prefetch_bw_max = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthMax[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre;
4968 l->active_and_excess_bw = (ReadBandwidthLuma[k] + excess_vactive_fill_bw_l[k]) * l->tmp_nom_adj_factor_p0 + (ReadBandwidthChroma[k] + excess_vactive_fill_bw_c[k]) * l->tmp_nom_adj_factor_p1 + dpte_row_bw[k] + meta_row_bw[k];
4969 surface_required_bw[k] = math_max5(l->vm_row_bw, l->flip_and_active_bw, l->flip_and_prefetch_bw, l->active_and_excess_bw, l->flip_and_prefetch_bw_max);
4970
4971 /* export peak required bandwidth for the surface */
4972 surface_peak_required_bw[k] = math_max2(surface_required_bw[k], surface_peak_required_bw[k]);
4973
4974 #ifdef __DML_VBA_DEBUG__
4975 DML_LOG_VERBOSE("DML::%s: k=%d, max1: vm_row_bw=%f\n", __func__, k, l->vm_row_bw);
4976 DML_LOG_VERBOSE("DML::%s: k=%d, max2: flip_and_active_bw=%f\n", __func__, k, l->flip_and_active_bw);
4977 DML_LOG_VERBOSE("DML::%s: k=%d, max3: flip_and_prefetch_bw=%f\n", __func__, k, l->flip_and_prefetch_bw);
4978 DML_LOG_VERBOSE("DML::%s: k=%d, max4: active_and_excess_bw=%f\n", __func__, k, l->active_and_excess_bw);
4979 DML_LOG_VERBOSE("DML::%s: k=%d, surface_required_bw=%f\n", __func__, k, surface_required_bw[k]);
4980 DML_LOG_VERBOSE("DML::%s: k=%d, surface_peak_required_bw=%f\n", __func__, k, surface_peak_required_bw[k]);
4981 #endif
4982 } else {
4983 surface_required_bw[k] = 0.0;
4984 }
4985
4986 l->required_bandwidth_mbps += surface_required_bw[k];
4987
4988 #ifdef __DML_VBA_DEBUG__
4989 DML_LOG_VERBOSE("DML::%s: k=%d, NumberOfDPP=%d\n", __func__, k, NumberOfDPP[k]);
4990 DML_LOG_VERBOSE("DML::%s: k=%d, use_qual_row_bw=%d\n", __func__, k, use_qual_row_bw);
4991 DML_LOG_VERBOSE("DML::%s: k=%d, immediate_flip=%d\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip);
4992 DML_LOG_VERBOSE("DML::%s: k=%d, mall_svp_prefetch_factor=%f\n", __func__, k, l->mall_svp_prefetch_factor);
4993 DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p0=%f\n", __func__, k, l->adj_factor_p0);
4994 DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p1=%f\n", __func__, k, l->adj_factor_p1);
4995 DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_cur=%f\n", __func__, k, l->adj_factor_cur);
4996
4997 DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p0_pre=%f\n", __func__, k, l->adj_factor_p0_pre);
4998 DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p1_pre=%f\n", __func__, k, l->adj_factor_p1_pre);
4999 DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_cur_pre=%f\n", __func__, k, l->adj_factor_cur_pre);
5000
5001 DML_LOG_VERBOSE("DML::%s: k=%d, per_plane_flip_bw=%f\n", __func__, k, l->per_plane_flip_bw[k]);
5002 DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_vmrow_bw=%f\n", __func__, k, prefetch_vmrow_bw[k]);
5003 DML_LOG_VERBOSE("DML::%s: k=%d, ReadBandwidthLuma=%f\n", __func__, k, ReadBandwidthLuma[k]);
5004 DML_LOG_VERBOSE("DML::%s: k=%d, ReadBandwidthChroma=%f\n", __func__, k, ReadBandwidthChroma[k]);
5005 DML_LOG_VERBOSE("DML::%s: k=%d, excess_vactive_fill_bw_l=%f\n", __func__, k, excess_vactive_fill_bw_l[k]);
5006 DML_LOG_VERBOSE("DML::%s: k=%d, excess_vactive_fill_bw_c=%f\n", __func__, k, excess_vactive_fill_bw_c[k]);
5007 DML_LOG_VERBOSE("DML::%s: k=%d, cursor_bw=%f\n", __func__, k, cursor_bw[k]);
5008
5009 DML_LOG_VERBOSE("DML::%s: k=%d, meta_row_bw=%f\n", __func__, k, meta_row_bw[k]);
5010 DML_LOG_VERBOSE("DML::%s: k=%d, dpte_row_bw=%f\n", __func__, k, dpte_row_bw[k]);
5011 DML_LOG_VERBOSE("DML::%s: k=%d, PrefetchBandwidthLuma=%f\n", __func__, k, PrefetchBandwidthLuma[k]);
5012 DML_LOG_VERBOSE("DML::%s: k=%d, PrefetchBandwidthChroma=%f\n", __func__, k, PrefetchBandwidthChroma[k]);
5013 DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_cursor_bw=%f\n", __func__, k, prefetch_cursor_bw[k]);
5014 DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane);
5015 DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), soc_state=%s, inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, dml2_core_internal_soc_state_type_str(state_type), inc_flip_bw, is_phantom, exclude_this_plane);
5016 DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane);
5017 #endif
5018 }
5019
5020 return l->required_bandwidth_mbps;
5021 }
5022
CalculateExtraLatency(const struct dml2_display_cfg * display_cfg,unsigned int ROBBufferSizeInKByte,unsigned int RoundTripPingLatencyCycles,unsigned int ReorderingBytes,double DCFCLK,double FabricClock,unsigned int PixelChunkSizeInKByte,double ReturnBW,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],unsigned int dpte_group_bytes[],unsigned int tdlut_bytes_per_group[],double HostVMInefficiencyFactor,double HostVMInefficiencyFactorPrefetch,unsigned int HostVMMinPageSize,enum dml2_qos_param_type qos_type,bool max_outstanding_when_urgent_expected,unsigned int max_outstanding_requests,unsigned int request_size_bytes_luma[],unsigned int request_size_bytes_chroma[],unsigned int MetaChunkSize,unsigned int dchub_arb_to_ret_delay,double Ttrip,unsigned int hostvm_mode,double * ExtraLatency,double * ExtraLatency_sr,double * ExtraLatencyPrefetch)5023 static void CalculateExtraLatency(
5024 const struct dml2_display_cfg *display_cfg,
5025 unsigned int ROBBufferSizeInKByte,
5026 unsigned int RoundTripPingLatencyCycles,
5027 unsigned int ReorderingBytes,
5028 double DCFCLK,
5029 double FabricClock,
5030 unsigned int PixelChunkSizeInKByte,
5031 double ReturnBW,
5032 unsigned int NumberOfActiveSurfaces,
5033 unsigned int NumberOfDPP[],
5034 unsigned int dpte_group_bytes[],
5035 unsigned int tdlut_bytes_per_group[],
5036 double HostVMInefficiencyFactor,
5037 double HostVMInefficiencyFactorPrefetch,
5038 unsigned int HostVMMinPageSize,
5039 enum dml2_qos_param_type qos_type,
5040 bool max_outstanding_when_urgent_expected,
5041 unsigned int max_outstanding_requests,
5042 unsigned int request_size_bytes_luma[],
5043 unsigned int request_size_bytes_chroma[],
5044 unsigned int MetaChunkSize,
5045 unsigned int dchub_arb_to_ret_delay,
5046 double Ttrip,
5047 unsigned int hostvm_mode,
5048
5049 // output
5050 double *ExtraLatency, // Tex
5051 double *ExtraLatency_sr, // Tex_sr
5052 double *ExtraLatencyPrefetch)
5053
5054 {
5055 double Tarb;
5056 double Tarb_prefetch;
5057 double Tex_trips;
5058 unsigned int max_request_size_bytes = 0;
5059
5060 CalculateTarb(
5061 display_cfg,
5062 PixelChunkSizeInKByte,
5063 NumberOfActiveSurfaces,
5064 NumberOfDPP,
5065 dpte_group_bytes,
5066 tdlut_bytes_per_group,
5067 HostVMInefficiencyFactor,
5068 HostVMInefficiencyFactorPrefetch,
5069 HostVMMinPageSize,
5070 ReturnBW,
5071 MetaChunkSize,
5072 // output
5073 &Tarb,
5074 &Tarb_prefetch);
5075
5076 Tex_trips = (display_cfg->hostvm_enable && hostvm_mode == 1) ? (2.0 * Ttrip) : 0.0;
5077
5078 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
5079 if (request_size_bytes_luma[k] > max_request_size_bytes)
5080 max_request_size_bytes = request_size_bytes_luma[k];
5081 if (request_size_bytes_chroma[k] > max_request_size_bytes)
5082 max_request_size_bytes = request_size_bytes_chroma[k];
5083 }
5084
5085 if (qos_type == dml2_qos_param_type_dcn4x) {
5086 *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK;
5087 *ExtraLatency = *ExtraLatency_sr;
5088 if (max_outstanding_when_urgent_expected)
5089 *ExtraLatency = *ExtraLatency + (ROBBufferSizeInKByte * 1024 - max_outstanding_requests * max_request_size_bytes) / ReturnBW;
5090 } else {
5091 *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK + RoundTripPingLatencyCycles / FabricClock + ReorderingBytes / ReturnBW;
5092 *ExtraLatency = *ExtraLatency_sr;
5093 }
5094 *ExtraLatency = *ExtraLatency + Tex_trips;
5095 *ExtraLatencyPrefetch = *ExtraLatency + Tarb_prefetch;
5096 *ExtraLatency = *ExtraLatency + Tarb;
5097 *ExtraLatency_sr = *ExtraLatency_sr + Tarb;
5098
5099 #ifdef __DML_VBA_DEBUG__
5100 DML_LOG_VERBOSE("DML::%s: qos_type=%u\n", __func__, qos_type);
5101 DML_LOG_VERBOSE("DML::%s: hostvm_mode=%u\n", __func__, hostvm_mode);
5102 DML_LOG_VERBOSE("DML::%s: Tex_trips=%f\n", __func__, Tex_trips);
5103 DML_LOG_VERBOSE("DML::%s: max_outstanding_when_urgent_expected=%u\n", __func__, max_outstanding_when_urgent_expected);
5104 DML_LOG_VERBOSE("DML::%s: FabricClock=%f\n", __func__, FabricClock);
5105 DML_LOG_VERBOSE("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
5106 DML_LOG_VERBOSE("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
5107 DML_LOG_VERBOSE("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles);
5108 DML_LOG_VERBOSE("DML::%s: ReorderingBytes=%u\n", __func__, ReorderingBytes);
5109 DML_LOG_VERBOSE("DML::%s: Tarb=%f\n", __func__, Tarb);
5110 DML_LOG_VERBOSE("DML::%s: ExtraLatency=%f\n", __func__, *ExtraLatency);
5111 DML_LOG_VERBOSE("DML::%s: ExtraLatency_sr=%f\n", __func__, *ExtraLatency_sr);
5112 DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch=%f\n", __func__, *ExtraLatencyPrefetch);
5113 #endif
5114 }
5115
CalculatePrefetchSchedule(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculatePrefetchSchedule_params * p)5116 static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_CalculatePrefetchSchedule_params *p)
5117 {
5118 struct dml2_core_calcs_CalculatePrefetchSchedule_locals *s = &scratch->CalculatePrefetchSchedule_locals;
5119 bool dcc_mrq_enable;
5120
5121 unsigned int vm_bytes;
5122 unsigned int extra_tdpe_bytes;
5123 unsigned int tdlut_row_bytes;
5124 unsigned int Lo;
5125
5126 s->NoTimeToPrefetch = false;
5127 s->DPPCycles = 0;
5128 s->DISPCLKCycles = 0;
5129 s->DSTTotalPixelsAfterScaler = 0.0;
5130 s->LineTime = 0.0;
5131 s->dst_y_prefetch_equ = 0.0;
5132 s->prefetch_bw_oto = 0.0;
5133 s->Tvm_oto = 0.0;
5134 s->Tr0_oto = 0.0;
5135 s->Tvm_oto_lines = 0.0;
5136 s->Tr0_oto_lines = 0.0;
5137 s->dst_y_prefetch_oto = 0.0;
5138 s->TimeForFetchingVM = 0.0;
5139 s->TimeForFetchingRowInVBlank = 0.0;
5140 s->LinesToRequestPrefetchPixelData = 0.0;
5141 s->HostVMDynamicLevelsTrips = 0;
5142 s->trip_to_mem = 0.0;
5143 *p->Tvm_trips = 0.0;
5144 *p->Tr0_trips = 0.0;
5145 s->Tvm_trips_rounded = 0.0;
5146 s->Tr0_trips_rounded = 0.0;
5147 s->max_Tsw = 0.0;
5148 s->Lsw_oto = 0.0;
5149 *p->Tpre_rounded = 0.0;
5150 s->prefetch_bw_equ = 0.0;
5151 s->Tvm_equ = 0.0;
5152 s->Tr0_equ = 0.0;
5153 s->Tdmbf = 0.0;
5154 s->Tdmec = 0.0;
5155 s->Tdmsks = 0.0;
5156 *p->prefetch_sw_bytes = 0.0;
5157 s->prefetch_bw_pr = 0.0;
5158 s->bytes_pp = 0.0;
5159 s->dep_bytes = 0.0;
5160 s->min_Lsw_oto = 0.0;
5161 s->min_Lsw_equ = 0.0;
5162 s->Tsw_est1 = 0.0;
5163 s->Tsw_est2 = 0.0;
5164 s->Tsw_est3 = 0.0;
5165 s->cursor_prefetch_bytes = 0;
5166 *p->prefetch_cursor_bw = 0;
5167 *p->RequiredPrefetchBWMax = 0.0;
5168
5169 dcc_mrq_enable = (p->dcc_enable && p->mrq_present);
5170
5171 s->TWait_p = p->TWait - p->Ttrip; // TWait includes max(Turg, Ttrip) and Ttrip here is already max(Turg, Ttrip)
5172
5173 if (p->display_cfg->gpuvm_enable == true && p->display_cfg->hostvm_enable == true) {
5174 s->HostVMDynamicLevelsTrips = p->display_cfg->hostvm_max_non_cached_page_table_levels;
5175 } else {
5176 s->HostVMDynamicLevelsTrips = 0;
5177 }
5178 #ifdef __DML_VBA_DEBUG__
5179 DML_LOG_VERBOSE("DML::%s: dcc_enable = %u\n", __func__, p->dcc_enable);
5180 DML_LOG_VERBOSE("DML::%s: mrq_present = %u\n", __func__, p->mrq_present);
5181 DML_LOG_VERBOSE("DML::%s: dcc_mrq_enable = %u\n", __func__, dcc_mrq_enable);
5182 DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->display_cfg->gpuvm_enable);
5183 DML_LOG_VERBOSE("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels);
5184 DML_LOG_VERBOSE("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable);
5185 DML_LOG_VERBOSE("DML::%s: VStartup = %u\n", __func__, p->VStartup);
5186 DML_LOG_VERBOSE("DML::%s: HostVMEnable = %u\n", __func__, p->display_cfg->hostvm_enable);
5187 DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
5188 DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, p->TWait);
5189 DML_LOG_VERBOSE("DML::%s: TWait_p = %f\n", __func__, s->TWait_p);
5190 DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, p->Ttrip);
5191 DML_LOG_VERBOSE("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk);
5192 DML_LOG_VERBOSE("DML::%s: myPipe->Dispclk = %f\n", __func__, p->myPipe->Dispclk);
5193 #endif
5194 CalculateVUpdateAndDynamicMetadataParameters(
5195 p->MaxInterDCNTileRepeaters,
5196 p->myPipe->Dppclk,
5197 p->myPipe->Dispclk,
5198 p->myPipe->DCFClkDeepSleep,
5199 p->myPipe->PixelClock,
5200 p->myPipe->HTotal,
5201 p->myPipe->VBlank,
5202 p->DynamicMetadataTransmittedBytes,
5203 p->DynamicMetadataLinesBeforeActiveRequired,
5204 p->myPipe->InterlaceEnable,
5205 p->myPipe->ProgressiveToInterlaceUnitInOPP,
5206 p->TSetup,
5207
5208 // Output
5209 &s->Tdmbf,
5210 &s->Tdmec,
5211 &s->Tdmsks,
5212 p->VUpdateOffsetPix,
5213 p->VUpdateWidthPix,
5214 p->VReadyOffsetPix);
5215
5216 s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock;
5217 s->trip_to_mem = p->Ttrip;
5218 *p->Tvm_trips = p->ExtraLatencyPrefetch + math_max2(s->trip_to_mem * (p->display_cfg->gpuvm_max_page_table_levels * (s->HostVMDynamicLevelsTrips + 1)), p->Turg);
5219 if (dcc_mrq_enable)
5220 *p->Tvm_trips_flip = *p->Tvm_trips;
5221 else
5222 *p->Tvm_trips_flip = *p->Tvm_trips - s->trip_to_mem;
5223
5224 *p->Tr0_trips_flip = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1);
5225 *p->Tr0_trips = math_max2(*p->Tr0_trips_flip, p->tdlut_opt_time / 2);
5226
5227 if (p->DynamicMetadataVMEnabled == true) {
5228 *p->Tdmdl_vm = s->TWait_p + *p->Tvm_trips;
5229 *p->Tdmdl = *p->Tdmdl_vm + p->Ttrip;
5230 } else {
5231 *p->Tdmdl_vm = 0;
5232 *p->Tdmdl = s->TWait_p + p->ExtraLatencyPrefetch + p->Ttrip; // Tex
5233 }
5234
5235 if (p->DynamicMetadataEnable == true) {
5236 if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) {
5237 *p->NotEnoughTimeForDynamicMetadata = true;
5238 DML_LOG_VERBOSE("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
5239 DML_LOG_VERBOSE("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
5240 DML_LOG_VERBOSE("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
5241 DML_LOG_VERBOSE("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
5242 DML_LOG_VERBOSE("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
5243 } else {
5244 *p->NotEnoughTimeForDynamicMetadata = false;
5245 }
5246 } else {
5247 *p->NotEnoughTimeForDynamicMetadata = false;
5248 }
5249
5250 if (p->myPipe->ScalerEnabled)
5251 s->DPPCycles = (unsigned int)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCL);
5252 else
5253 s->DPPCycles = (unsigned int)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCLLBOnly);
5254
5255 s->DPPCycles = (unsigned int)(s->DPPCycles + p->myPipe->NumberOfCursors * p->DPPCLKDelayCNVCCursor);
5256
5257 s->DISPCLKCycles = (unsigned int)p->DISPCLKDelaySubtotal;
5258
5259 if (p->myPipe->Dppclk == 0.0 || p->myPipe->Dispclk == 0.0)
5260 return true;
5261
5262 *p->DSTXAfterScaler = (unsigned int)math_round(s->DPPCycles * p->myPipe->PixelClock / p->myPipe->Dppclk + s->DISPCLKCycles * p->myPipe->PixelClock / p->myPipe->Dispclk + p->DSCDelay);
5263 *p->DSTXAfterScaler = (unsigned int)math_round(*p->DSTXAfterScaler + (p->myPipe->ODMMode != dml2_odm_mode_bypass ? 18 : 0) + (p->myPipe->DPPPerSurface - 1) * p->DPP_RECOUT_WIDTH +
5264 ((p->myPipe->ODMMode == dml2_odm_mode_split_1to2 || p->myPipe->ODMMode == dml2_odm_mode_mso_1to2) ? (double)p->myPipe->HActive / 2.0 : 0) +
5265 ((p->myPipe->ODMMode == dml2_odm_mode_mso_1to4) ? (double)p->myPipe->HActive * 3.0 / 4.0 : 0));
5266
5267 #ifdef __DML_VBA_DEBUG__
5268 DML_LOG_VERBOSE("DML::%s: DynamicMetadataVMEnabled = %u\n", __func__, p->DynamicMetadataVMEnabled);
5269 DML_LOG_VERBOSE("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles);
5270 DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock);
5271 DML_LOG_VERBOSE("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk);
5272 DML_LOG_VERBOSE("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles);
5273 DML_LOG_VERBOSE("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk);
5274 DML_LOG_VERBOSE("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay);
5275 DML_LOG_VERBOSE("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode);
5276 DML_LOG_VERBOSE("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH);
5277 DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler);
5278
5279 DML_LOG_VERBOSE("DML::%s: setup_for_tdlut = %u\n", __func__, p->setup_for_tdlut);
5280 DML_LOG_VERBOSE("DML::%s: tdlut_opt_time = %f\n", __func__, p->tdlut_opt_time);
5281 DML_LOG_VERBOSE("DML::%s: tdlut_pte_bytes_per_frame = %u\n", __func__, p->tdlut_pte_bytes_per_frame);
5282 DML_LOG_VERBOSE("DML::%s: tdlut_drain_time = %f\n", __func__, p->tdlut_drain_time);
5283 #endif
5284
5285 if (p->OutputFormat == dml2_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP))
5286 *p->DSTYAfterScaler = 1;
5287 else
5288 *p->DSTYAfterScaler = 0;
5289
5290 s->DSTTotalPixelsAfterScaler = *p->DSTYAfterScaler * p->myPipe->HTotal + *p->DSTXAfterScaler;
5291 *p->DSTYAfterScaler = (unsigned int)(math_floor2(s->DSTTotalPixelsAfterScaler / p->myPipe->HTotal, 1));
5292 *p->DSTXAfterScaler = (unsigned int)(s->DSTTotalPixelsAfterScaler - ((double)(*p->DSTYAfterScaler * p->myPipe->HTotal)));
5293 #ifdef __DML_VBA_DEBUG__
5294 DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler);
5295 DML_LOG_VERBOSE("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler);
5296 #endif
5297
5298 #ifdef __DML_VBA_DEBUG__
5299 DML_LOG_VERBOSE("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips);
5300 DML_LOG_VERBOSE("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips);
5301 DML_LOG_VERBOSE("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem);
5302 DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch);
5303 DML_LOG_VERBOSE("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels);
5304 DML_LOG_VERBOSE("DML::%s: HostVMDynamicLevelsTrips = %u\n", __func__, s->HostVMDynamicLevelsTrips);
5305 #endif
5306 if (p->display_cfg->gpuvm_enable) {
5307 s->Tvm_trips_rounded = math_ceil2(4.0 * *p->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
5308 *p->Tvm_trips_flip_rounded = math_ceil2(4.0 * *p->Tvm_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime;
5309 } else {
5310 if (p->DynamicMetadataEnable || dcc_mrq_enable || p->setup_for_tdlut)
5311 s->Tvm_trips_rounded = math_max2(s->LineTime * math_ceil2(4.0*math_max3(p->ExtraLatencyPrefetch, p->Turg, s->trip_to_mem)/s->LineTime, 1)/4, s->LineTime/4.0);
5312 else
5313 s->Tvm_trips_rounded = s->LineTime / 4.0;
5314 *p->Tvm_trips_flip_rounded = s->LineTime / 4.0;
5315 }
5316
5317 s->Tvm_trips_rounded = math_max2(s->Tvm_trips_rounded, s->LineTime / 4.0);
5318 *p->Tvm_trips_flip_rounded = math_max2(*p->Tvm_trips_flip_rounded, s->LineTime / 4.0);
5319
5320 if (p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable) {
5321 s->Tr0_trips_rounded = math_ceil2(4.0 * *p->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
5322 *p->Tr0_trips_flip_rounded = math_ceil2(4.0 * *p->Tr0_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime;
5323 } else {
5324 s->Tr0_trips_rounded = s->LineTime / 4.0;
5325 *p->Tr0_trips_flip_rounded = s->LineTime / 4.0;
5326 }
5327 s->Tr0_trips_rounded = math_max2(s->Tr0_trips_rounded, s->LineTime / 4.0);
5328 *p->Tr0_trips_flip_rounded = math_max2(*p->Tr0_trips_flip_rounded, s->LineTime / 4.0);
5329
5330 if (p->display_cfg->gpuvm_enable == true) {
5331 if (p->display_cfg->gpuvm_max_page_table_levels >= 3) {
5332 *p->Tno_bw = p->ExtraLatencyPrefetch + s->trip_to_mem * (double)((p->display_cfg->gpuvm_max_page_table_levels - 2) * (s->HostVMDynamicLevelsTrips + 1));
5333 } else if (p->display_cfg->gpuvm_max_page_table_levels == 1 && !dcc_mrq_enable && !p->setup_for_tdlut) {
5334 *p->Tno_bw = p->ExtraLatencyPrefetch;
5335 } else {
5336 *p->Tno_bw = 0;
5337 }
5338 } else {
5339 *p->Tno_bw = 0;
5340 }
5341
5342 if (p->mrq_present || p->display_cfg->gpuvm_max_page_table_levels >= 3)
5343 *p->Tno_bw_flip = *p->Tno_bw;
5344 else
5345 *p->Tno_bw_flip = 0; //because there is no 3DLUT for iFlip
5346
5347 if (dml_is_420(p->myPipe->SourcePixelFormat)) {
5348 s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4.0;
5349 } else {
5350 s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC;
5351 }
5352
5353 *p->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC;
5354 *p->prefetch_sw_bytes = *p->prefetch_sw_bytes * p->mall_prefetch_sdp_overhead_factor;
5355
5356 vm_bytes = p->vm_bytes; // vm_bytes is dpde0_bytes_per_frame_ub_l + dpde0_bytes_per_frame_ub_c + 2*extra_dpde_bytes;
5357 extra_tdpe_bytes = (unsigned int)math_max2(0, (p->display_cfg->gpuvm_max_page_table_levels - 1) * 128);
5358
5359 if (p->setup_for_tdlut)
5360 vm_bytes = vm_bytes + p->tdlut_pte_bytes_per_frame + (p->display_cfg->gpuvm_enable ? extra_tdpe_bytes : 0);
5361
5362 tdlut_row_bytes = (unsigned long) math_ceil2(p->tdlut_bytes_per_frame/2.0, 1.0);
5363
5364 s->min_Lsw_oto = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_OTO__;
5365 s->min_Lsw_oto = math_max2(s->min_Lsw_oto, p->tdlut_drain_time / s->LineTime);
5366 s->min_Lsw_oto = math_max2(s->min_Lsw_oto, 2.0);
5367
5368 // use vactive swath bw for prefetch oto and also cap prefetch_bw_oto to max_vratio_oto
5369 // Note: in prefetch calculation, acounting is done mostly per-pipe.
5370 // vactive swath bw represents the per-surface (aka per dml plane) bw to move vratio_l/c lines of bytes_l/c per line time
5371 s->per_pipe_vactive_sw_bw = p->vactive_sw_bw_l / (double)p->myPipe->DPPPerSurface;
5372
5373 // one-to-one prefetch bw as one line of bytes per line time (as per vratio_pre_l/c = 1)
5374 s->prefetch_bw_oto = (p->swath_width_luma_ub * p->myPipe->BytePerPixelY) / s->LineTime;
5375
5376 if (p->myPipe->BytePerPixelC > 0) {
5377 s->per_pipe_vactive_sw_bw += p->vactive_sw_bw_c / (double)p->myPipe->DPPPerSurface;
5378 s->prefetch_bw_oto += (p->swath_width_chroma_ub * p->myPipe->BytePerPixelC) / s->LineTime;
5379 }
5380
5381 /* oto prefetch bw should be always be less than total vactive bw */
5382 //DML_ASSERT(s->prefetch_bw_oto < s->per_pipe_vactive_sw_bw * p->myPipe->DPPPerSurface);
5383
5384 s->prefetch_bw_oto = math_max2(s->per_pipe_vactive_sw_bw, s->prefetch_bw_oto) * p->mall_prefetch_sdp_overhead_factor;
5385
5386 s->prefetch_bw_oto = math_min2(s->prefetch_bw_oto, *p->prefetch_sw_bytes/(s->min_Lsw_oto*s->LineTime));
5387
5388 s->Lsw_oto = math_ceil2(4.0 * *p->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, 1.0) / 4.0;
5389
5390 s->prefetch_bw_oto = math_max3(s->prefetch_bw_oto,
5391 p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw,
5392 (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime));
5393
5394 /* oto bw needs to be outputted even if the oto schedule isn't being used to avoid ms/mp mismatch.
5395 * mp will fail if ms decides to use equ schedule and mp decides to use oto schedule
5396 * and the required bandwidth increases when going from ms to mp
5397 */
5398 *p->RequiredPrefetchBWMax = s->prefetch_bw_oto;
5399
5400 #ifdef __DML_VBA_DEBUG__
5401 DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_l = %f\n", __func__, p->vactive_sw_bw_l);
5402 DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_c = %f\n", __func__, p->vactive_sw_bw_c);
5403 DML_LOG_VERBOSE("DML::%s: per_pipe_vactive_sw_bw = %f\n", __func__, s->per_pipe_vactive_sw_bw);
5404 #endif
5405
5406 if (p->display_cfg->gpuvm_enable == true) {
5407 s->Tvm_oto = math_max3(
5408 *p->Tvm_trips,
5409 *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto,
5410 s->LineTime / 4.0);
5411
5412 #ifdef __DML_VBA_DEBUG__
5413 DML_LOG_VERBOSE("DML::%s: Tvm_oto max0 = %f\n", __func__, *p->Tvm_trips);
5414 DML_LOG_VERBOSE("DML::%s: Tvm_oto max1 = %f\n", __func__, *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto);
5415 DML_LOG_VERBOSE("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4.0);
5416 #endif
5417 } else {
5418 s->Tvm_oto = s->Tvm_trips_rounded;
5419 }
5420
5421 if ((p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable)) {
5422 s->Tr0_oto = math_max3(
5423 *p->Tr0_trips,
5424 (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto,
5425 s->LineTime / 4.0);
5426 #ifdef __DML_VBA_DEBUG__
5427 DML_LOG_VERBOSE("DML::%s: Tr0_oto max0 = %f\n", __func__, *p->Tr0_trips);
5428 DML_LOG_VERBOSE("DML::%s: Tr0_oto max1 = %f\n", __func__, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto);
5429 DML_LOG_VERBOSE("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime / 4);
5430 #endif
5431 } else
5432 s->Tr0_oto = s->LineTime / 4.0;
5433
5434 s->Tvm_oto_lines = math_ceil2(4.0 * s->Tvm_oto / s->LineTime, 1) / 4.0;
5435 s->Tr0_oto_lines = math_ceil2(4.0 * s->Tr0_oto / s->LineTime, 1) / 4.0;
5436 s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto;
5437
5438 #ifdef DML_GLOBAL_PREFETCH_CHECK
5439 DML_LOG_VERBOSE("DML::%s: impacted_Tpre = %f\n", __func__, p->impacted_dst_y_pre);
5440 if (p->impacted_dst_y_pre > 0) {
5441 DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto);
5442 s->dst_y_prefetch_oto = math_max2(s->dst_y_prefetch_oto, p->impacted_dst_y_pre);
5443 DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f (impacted)\n", __func__, s->dst_y_prefetch_oto);
5444 }
5445 #endif
5446 *p->Tpre_oto = s->dst_y_prefetch_oto * s->LineTime;
5447
5448 //To (time for delay after scaler) in line time
5449 Lo = (unsigned int)(*p->DSTYAfterScaler + (double)*p->DSTXAfterScaler / (double)p->myPipe->HTotal);
5450
5451 s->min_Lsw_equ = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_EQU__;
5452 s->min_Lsw_equ = math_max2(s->min_Lsw_equ, p->tdlut_drain_time / s->LineTime);
5453 s->min_Lsw_equ = math_max2(s->min_Lsw_equ, 2.0);
5454 //Tpre_equ in line time
5455 if (p->DynamicMetadataVMEnabled && p->DynamicMetadataEnable)
5456 s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, *p->Tvm_trips) + s->TWait_p) / s->LineTime - Lo;
5457 else
5458 s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, p->ExtraLatencyPrefetch) + s->TWait_p) / s->LineTime - Lo;
5459
5460 #ifdef DML_GLOBAL_PREFETCH_CHECK
5461 s->dst_y_prefetch_equ_impacted = math_max2(p->impacted_dst_y_pre, s->dst_y_prefetch_equ);
5462
5463 s->dst_y_prefetch_equ_impacted = math_min2(s->dst_y_prefetch_equ_impacted, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
5464
5465 if (s->dst_y_prefetch_equ_impacted > s->dst_y_prefetch_equ)
5466 s->dst_y_prefetch_equ -= s->dst_y_prefetch_equ_impacted - s->dst_y_prefetch_equ;
5467 #endif
5468
5469 s->dst_y_prefetch_equ = math_min2(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
5470
5471 #ifdef __DML_VBA_DEBUG__
5472 DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal);
5473 DML_LOG_VERBOSE("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto);
5474 DML_LOG_VERBOSE("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ);
5475 DML_LOG_VERBOSE("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw);
5476 DML_LOG_VERBOSE("DML::%s: Tno_bw_flip = %f\n", __func__, *p->Tno_bw_flip);
5477 DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch);
5478 DML_LOG_VERBOSE("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem);
5479 DML_LOG_VERBOSE("DML::%s: mall_prefetch_sdp_overhead_factor = %f\n", __func__, p->mall_prefetch_sdp_overhead_factor);
5480 DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
5481 DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
5482 DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
5483 DML_LOG_VERBOSE("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC);
5484 DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
5485 DML_LOG_VERBOSE("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub);
5486 DML_LOG_VERBOSE("DML::%s: prefetch_sw_bytes = %f\n", __func__, *p->prefetch_sw_bytes);
5487 DML_LOG_VERBOSE("DML::%s: max_Tsw = %f\n", __func__, s->max_Tsw);
5488 DML_LOG_VERBOSE("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp);
5489 DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, vm_bytes);
5490 DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
5491 DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
5492 DML_LOG_VERBOSE("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips);
5493 DML_LOG_VERBOSE("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips);
5494 DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip = %f\n", __func__, *p->Tvm_trips_flip);
5495 DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip = %f\n", __func__, *p->Tr0_trips_flip);
5496 DML_LOG_VERBOSE("DML::%s: prefetch_bw_pr = %f\n", __func__, s->prefetch_bw_pr);
5497 DML_LOG_VERBOSE("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto);
5498 DML_LOG_VERBOSE("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto);
5499 DML_LOG_VERBOSE("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto);
5500 DML_LOG_VERBOSE("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines);
5501 DML_LOG_VERBOSE("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines);
5502 DML_LOG_VERBOSE("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto);
5503 DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto);
5504 DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ);
5505 DML_LOG_VERBOSE("DML::%s: tdlut_row_bytes = %d\n", __func__, tdlut_row_bytes);
5506 DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %d\n", __func__, p->meta_row_bytes);
5507 #endif
5508 s->dst_y_prefetch_equ = math_floor2(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0;
5509 *p->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime;
5510
5511 #ifdef __DML_VBA_DEBUG__
5512 DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ);
5513 DML_LOG_VERBOSE("DML::%s: LineTime: %f\n", __func__, s->LineTime);
5514 DML_LOG_VERBOSE("DML::%s: VStartup: %u\n", __func__, p->VStartup);
5515 DML_LOG_VERBOSE("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime);
5516 DML_LOG_VERBOSE("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup);
5517 DML_LOG_VERBOSE("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc);
5518 DML_LOG_VERBOSE("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait);
5519 DML_LOG_VERBOSE("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
5520 DML_LOG_VERBOSE("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
5521 DML_LOG_VERBOSE("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
5522 DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, p->TWait);
5523 DML_LOG_VERBOSE("DML::%s: TWait_p = %f\n", __func__, s->TWait_p);
5524 DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, p->Ttrip);
5525 DML_LOG_VERBOSE("DML::%s: Tex = %f\n", __func__, p->ExtraLatencyPrefetch);
5526 DML_LOG_VERBOSE("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm);
5527 DML_LOG_VERBOSE("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
5528 DML_LOG_VERBOSE("DML::%s: TWait_p: %fus\n", __func__, s->TWait_p);
5529 DML_LOG_VERBOSE("DML::%s: Ttrip: %fus\n", __func__, p->Ttrip);
5530 DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler);
5531 DML_LOG_VERBOSE("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler);
5532 DML_LOG_VERBOSE("DML::%s: vm_bytes: %f (hvm inefficiency scaled)\n", __func__, vm_bytes*p->HostVMInefficiencyFactor);
5533 DML_LOG_VERBOSE("DML::%s: row_bytes: %f (hvm inefficiency scaled, 1 row)\n", __func__, p->PixelPTEBytesPerRow*p->HostVMInefficiencyFactor+p->meta_row_bytes+tdlut_row_bytes);
5534 DML_LOG_VERBOSE("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw);
5535 DML_LOG_VERBOSE("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, (s->dst_y_prefetch_equ * s->LineTime), *p->Tpre_rounded, (*p->Tpre_rounded - (s->dst_y_prefetch_equ * s->LineTime)));
5536 DML_LOG_VERBOSE("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips));
5537 #endif
5538
5539 *p->dst_y_per_vm_vblank = 0;
5540 *p->dst_y_per_row_vblank = 0;
5541 *p->VRatioPrefetchY = 0;
5542 *p->VRatioPrefetchC = 0;
5543 *p->RequiredPrefetchPixelDataBWLuma = 0;
5544
5545 // Derive bandwidth by finding how much data to move within the time constraint
5546 // Tpre_rounded is Tpre rounding to 2-bit fraction
5547 // Tvm_trips_rounded is Tvm_trips ceiling to 1/4 line time
5548 // Tr0_trips_rounded is Tr0_trips ceiling to 1/4 line time
5549 // So that means prefetch bw calculated can be higher since the total time available for prefetch is less
5550 bool min_Lsw_equ_ok = *p->Tpre_rounded >= s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded + s->min_Lsw_equ*s->LineTime;
5551 bool tpre_gt_req_latency = true;
5552 #if 0
5553 // Check that Tpre_rounded is big enough if all of the stages of the prefetch are time constrained.
5554 // The terms Tvm_trips_rounded and Tr0_trips_rounded represent the min time constraints for the VM and row stages.
5555 // Normally, these terms cover the overall time constraint for Tpre >= (Tex + max{Ttrip, Turg}), but if these terms are at their minimum, an explicit check is necessary.
5556 tpre_gt_req_latency = *p->Tpre_rounded > (math_max2(p->Turg, s->trip_to_mem) + p->ExtraLatencyPrefetch);
5557 #endif
5558
5559 if (s->dst_y_prefetch_equ > 1 && min_Lsw_equ_ok && tpre_gt_req_latency) {
5560 s->prefetch_bw1 = 0.;
5561 s->prefetch_bw2 = 0.;
5562 s->prefetch_bw3 = 0.;
5563 s->prefetch_bw4 = 0.;
5564
5565 // prefetch_bw1: VM + 2*R0 + SW
5566 if (*p->Tpre_rounded - *p->Tno_bw > 0) {
5567 s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor
5568 + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)
5569 + *p->prefetch_sw_bytes)
5570 / (*p->Tpre_rounded - *p->Tno_bw);
5571 s->Tsw_est1 = *p->prefetch_sw_bytes / s->prefetch_bw1;
5572 } else
5573 s->prefetch_bw1 = 0;
5574
5575 DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f\n", __func__, s->prefetch_bw1);
5576 if ((s->Tsw_est1 < s->min_Lsw_equ * s->LineTime) && (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) {
5577 s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) /
5578 (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw);
5579 #ifdef __DML_VBA_DEBUG__
5580 DML_LOG_VERBOSE("DML::%s: vm and 2 rows bytes = %f\n", __func__, (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)));
5581 DML_LOG_VERBOSE("DML::%s: Tpre_rounded = %f\n", __func__, *p->Tpre_rounded);
5582 DML_LOG_VERBOSE("DML::%s: minus term = %f\n", __func__, s->min_Lsw_equ * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw);
5583 DML_LOG_VERBOSE("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ);
5584 DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime);
5585 DML_LOG_VERBOSE("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw);
5586 DML_LOG_VERBOSE("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw));
5587 DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f (updated)\n", __func__, s->prefetch_bw1);
5588 #endif
5589 }
5590
5591 // prefetch_bw2: VM + SW
5592 if (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded > 0) {
5593 s->prefetch_bw2 = (vm_bytes * p->HostVMInefficiencyFactor + *p->prefetch_sw_bytes) /
5594 (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded);
5595 s->Tsw_est2 = *p->prefetch_sw_bytes / s->prefetch_bw2;
5596 } else
5597 s->prefetch_bw2 = 0;
5598
5599 DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f\n", __func__, s->prefetch_bw2);
5600 if ((s->Tsw_est2 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime) > 0)) {
5601 s->prefetch_bw2 = vm_bytes * p->HostVMInefficiencyFactor / (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime);
5602 DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f (updated)\n", __func__, s->prefetch_bw2);
5603 }
5604
5605 // prefetch_bw3: 2*R0 + SW
5606 if (*p->Tpre_rounded - s->Tvm_trips_rounded > 0) {
5607 s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + *p->prefetch_sw_bytes) /
5608 (*p->Tpre_rounded - s->Tvm_trips_rounded);
5609 s->Tsw_est3 = *p->prefetch_sw_bytes / s->prefetch_bw3;
5610 } else
5611 s->prefetch_bw3 = 0;
5612
5613 DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f\n", __func__, s->prefetch_bw3);
5614 if ((s->Tsw_est3 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) {
5615 s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded);
5616 DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f (updated)\n", __func__, s->prefetch_bw3);
5617 }
5618
5619 // prefetch_bw4: SW
5620 if (*p->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0)
5621 s->prefetch_bw4 = *p->prefetch_sw_bytes / (*p->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded);
5622 else
5623 s->prefetch_bw4 = 0;
5624
5625 #ifdef __DML_VBA_DEBUG__
5626 DML_LOG_VERBOSE("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw);
5627 DML_LOG_VERBOSE("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, s->dst_y_prefetch_equ * s->LineTime, *p->Tpre_rounded, (*p->Tpre_rounded - (s->dst_y_prefetch_equ * s->LineTime)));
5628 DML_LOG_VERBOSE("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips));
5629 DML_LOG_VERBOSE("DML::%s: Tr0_trips=%f Tr0_trips_rounded: %f, delta=%f\n", __func__, *p->Tr0_trips, s->Tr0_trips_rounded, (s->Tr0_trips_rounded - *p->Tr0_trips));
5630 DML_LOG_VERBOSE("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1);
5631 DML_LOG_VERBOSE("DML::%s: Tsw_est2: %f\n", __func__, s->Tsw_est2);
5632 DML_LOG_VERBOSE("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3);
5633 DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f (final)\n", __func__, s->prefetch_bw1);
5634 DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f (final)\n", __func__, s->prefetch_bw2);
5635 DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f (final)\n", __func__, s->prefetch_bw3);
5636 DML_LOG_VERBOSE("DML::%s: prefetch_bw4: %f (final)\n", __func__, s->prefetch_bw4);
5637 #endif
5638 {
5639 bool Case1OK = false;
5640 bool Case2OK = false;
5641 bool Case3OK = false;
5642
5643 // get "equalized" bw among all stages (vm, r0, sw), so based is all 3 stages are just above the latency-based requirement
5644 // so it is not too dis-portionally favor a particular stage, next is either r0 more agressive and next is vm more agressive, the worst is all are agressive
5645 // vs the latency based number
5646
5647 // prefetch_bw1: VM + 2*R0 + SW
5648 // so prefetch_bw1 will have enough bw to transfer the necessary data within Tpre_rounded - Tno_bw (Tpre is the the worst-case latency based time to fetch the data)
5649 // here is to make sure equ bw wont be more agressive than the latency-based requirement.
5650 // check vm time >= vm_trips
5651 // check r0 time >= r0_trips
5652
5653 double total_row_bytes = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes);
5654
5655 DML_LOG_VERBOSE("DML::%s: Tvm_trips_rounded = %f\n", __func__, s->Tvm_trips_rounded);
5656 DML_LOG_VERBOSE("DML::%s: Tr0_trips_rounded = %f\n", __func__, s->Tr0_trips_rounded);
5657
5658 if (s->prefetch_bw1 > 0) {
5659 double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw1;
5660 double row_transfer_time = total_row_bytes / s->prefetch_bw1;
5661 DML_LOG_VERBOSE("DML::%s: Case1: vm_transfer_time = %f\n", __func__, vm_transfer_time);
5662 DML_LOG_VERBOSE("DML::%s: Case1: row_transfer_time = %f\n", __func__, row_transfer_time);
5663 if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) {
5664 Case1OK = true;
5665 }
5666 }
5667
5668 // prefetch_bw2: VM + SW
5669 // prefetch_bw2 will be enough bw to transfer VM and SW data within (Tpre_rounded - Tr0_trips_rounded - Tno_bw)
5670 // check vm time >= vm_trips
5671 // check r0 time < r0_trips
5672 if (s->prefetch_bw2 > 0) {
5673 double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw2;
5674 double row_transfer_time = total_row_bytes / s->prefetch_bw2;
5675 DML_LOG_VERBOSE("DML::%s: Case2: vm_transfer_time = %f\n", __func__, vm_transfer_time);
5676 DML_LOG_VERBOSE("DML::%s: Case2: row_transfer_time = %f\n", __func__, row_transfer_time);
5677 if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time < s->Tr0_trips_rounded) {
5678 Case2OK = true;
5679 }
5680 }
5681
5682 // prefetch_bw3: VM + 2*R0
5683 // check vm time < vm_trips
5684 // check r0 time >= r0_trips
5685 if (s->prefetch_bw3 > 0) {
5686 double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw3;
5687 double row_transfer_time = total_row_bytes / s->prefetch_bw3;
5688 DML_LOG_VERBOSE("DML::%s: Case3: vm_transfer_time = %f\n", __func__, vm_transfer_time);
5689 DML_LOG_VERBOSE("DML::%s: Case3: row_transfer_time = %f\n", __func__, row_transfer_time);
5690 if (vm_transfer_time < s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) {
5691 Case3OK = true;
5692 }
5693 }
5694
5695 if (Case1OK) {
5696 s->prefetch_bw_equ = s->prefetch_bw1;
5697 } else if (Case2OK) {
5698 s->prefetch_bw_equ = s->prefetch_bw2;
5699 } else if (Case3OK) {
5700 s->prefetch_bw_equ = s->prefetch_bw3;
5701 } else {
5702 s->prefetch_bw_equ = s->prefetch_bw4;
5703 }
5704
5705 s->prefetch_bw_equ = math_max3(s->prefetch_bw_equ,
5706 p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw,
5707 (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime));
5708 #ifdef __DML_VBA_DEBUG__
5709 DML_LOG_VERBOSE("DML::%s: Case1OK: %u\n", __func__, Case1OK);
5710 DML_LOG_VERBOSE("DML::%s: Case2OK: %u\n", __func__, Case2OK);
5711 DML_LOG_VERBOSE("DML::%s: Case3OK: %u\n", __func__, Case3OK);
5712 DML_LOG_VERBOSE("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ);
5713 #endif
5714
5715 if (s->prefetch_bw_equ > 0) {
5716 if (p->display_cfg->gpuvm_enable == true) {
5717 s->Tvm_equ = math_max3(*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_equ, *p->Tvm_trips, s->LineTime / 4);
5718 } else {
5719 s->Tvm_equ = s->LineTime / 4;
5720 }
5721
5722 if (p->display_cfg->gpuvm_enable == true || dcc_mrq_enable || p->setup_for_tdlut) {
5723 s->Tr0_equ = math_max3((p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_equ, // PixelPTEBytesPerRow is dpte_row_bytes
5724 *p->Tr0_trips,
5725 s->LineTime / 4);
5726 } else {
5727 s->Tr0_equ = s->LineTime / 4;
5728 }
5729 } else {
5730 s->Tvm_equ = 0;
5731 s->Tr0_equ = 0;
5732 DML_LOG_VERBOSE("DML::%s: prefetch_bw_equ equals 0!\n", __func__);
5733 }
5734 }
5735 #ifdef __DML_VBA_DEBUG__
5736 DML_LOG_VERBOSE("DML::%s: Tvm_equ = %f\n", __func__, s->Tvm_equ);
5737 DML_LOG_VERBOSE("DML::%s: Tr0_equ = %f\n", __func__, s->Tr0_equ);
5738 #endif
5739 // Use the more stressful prefetch schedule
5740 if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) {
5741 *p->dst_y_prefetch = s->dst_y_prefetch_oto;
5742 s->TimeForFetchingVM = s->Tvm_oto;
5743 s->TimeForFetchingRowInVBlank = s->Tr0_oto;
5744
5745 *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0;
5746 *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
5747 #ifdef __DML_VBA_DEBUG__
5748 DML_LOG_VERBOSE("DML::%s: Using oto scheduling for prefetch\n", __func__);
5749 #endif
5750
5751 } else {
5752 *p->dst_y_prefetch = s->dst_y_prefetch_equ;
5753
5754 if (s->dst_y_prefetch_equ < s->dst_y_prefetch_equ_impacted)
5755 *p->dst_y_prefetch = s->dst_y_prefetch_equ_impacted;
5756
5757 s->TimeForFetchingVM = s->Tvm_equ;
5758 s->TimeForFetchingRowInVBlank = s->Tr0_equ;
5759
5760 *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0;
5761 *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
5762
5763 /* equ bw should be propagated so a ceiling of the equ bw is accounted for prior to mode programming.
5764 * Overall bandwidth may be lower when going from mode support to mode programming but final pixel data
5765 * bandwidth may end up higher than what was calculated in mode support.
5766 */
5767 *p->RequiredPrefetchBWMax = math_max2(s->prefetch_bw_equ, *p->RequiredPrefetchBWMax);
5768
5769 #ifdef __DML_VBA_DEBUG__
5770 DML_LOG_VERBOSE("DML::%s: Using equ bw scheduling for prefetch\n", __func__);
5771 #endif
5772 }
5773
5774 // Lsw = dst_y_prefetch - (dst_y_per_vm_vblank + 2*dst_y_per_row_vblank)
5775 s->LinesToRequestPrefetchPixelData = *p->dst_y_prefetch - *p->dst_y_per_vm_vblank - 2 * *p->dst_y_per_row_vblank; // Lsw
5776
5777 s->cursor_prefetch_bytes = (unsigned int)math_max2(p->cursor_bytes_per_chunk, 4 * p->cursor_bytes_per_line);
5778 *p->prefetch_cursor_bw = p->num_cursors * s->cursor_prefetch_bytes / (s->LinesToRequestPrefetchPixelData * s->LineTime);
5779 *p->prefetch_swath_time_us = (s->LinesToRequestPrefetchPixelData * s->LineTime);
5780
5781 #ifdef __DML_VBA_DEBUG__
5782 DML_LOG_VERBOSE("DML::%s: TimeForFetchingVM = %f\n", __func__, s->TimeForFetchingVM);
5783 DML_LOG_VERBOSE("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank);
5784 DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime);
5785 DML_LOG_VERBOSE("DML::%s: dst_y_prefetch = %f\n", __func__, *p->dst_y_prefetch);
5786 DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank);
5787 DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank);
5788 DML_LOG_VERBOSE("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData);
5789 DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
5790 DML_LOG_VERBOSE("DML::%s: prefetch_swath_time_us = %f\n", __func__, *p->prefetch_swath_time_us);
5791
5792 DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, p->cursor_bytes_per_chunk);
5793 DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_line = %d\n", __func__, p->cursor_bytes_per_line);
5794 DML_LOG_VERBOSE("DML::%s: cursor_prefetch_bytes = %d\n", __func__, s->cursor_prefetch_bytes);
5795 DML_LOG_VERBOSE("DML::%s: prefetch_cursor_bw = %f\n", __func__, *p->prefetch_cursor_bw);
5796 #endif
5797 DML_ASSERT(*p->dst_y_prefetch < 64);
5798
5799 unsigned int min_lsw_required = (unsigned int)math_max2(2, p->tdlut_drain_time / s->LineTime);
5800 if (s->LinesToRequestPrefetchPixelData >= min_lsw_required && s->prefetch_bw_equ > 0) {
5801 *p->VRatioPrefetchY = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData;
5802 *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY, 1.0);
5803 #ifdef __DML_VBA_DEBUG__
5804 DML_LOG_VERBOSE("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
5805 DML_LOG_VERBOSE("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY);
5806 DML_LOG_VERBOSE("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY);
5807 #endif
5808 if ((p->SwathHeightY > 4) && (p->VInitPreFillY > 3)) {
5809 if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillY - 3.0) / 2.0) {
5810 *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY,
5811 (double)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0));
5812 } else {
5813 s->NoTimeToPrefetch = true;
5814 DML_LOG_VERBOSE("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY);
5815 *p->VRatioPrefetchY = 0;
5816 }
5817 #ifdef __DML_VBA_DEBUG__
5818 DML_LOG_VERBOSE("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
5819 DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
5820 DML_LOG_VERBOSE("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY);
5821 #endif
5822 }
5823
5824 *p->VRatioPrefetchC = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData;
5825 *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, 1.0);
5826
5827 #ifdef __DML_VBA_DEBUG__
5828 DML_LOG_VERBOSE("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
5829 DML_LOG_VERBOSE("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC);
5830 DML_LOG_VERBOSE("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC);
5831 #endif
5832 if ((p->SwathHeightC > 4) && (p->VInitPreFillC > 3)) {
5833 if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillC - 3.0) / 2.0) {
5834 *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, (double)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0));
5835 } else {
5836 s->NoTimeToPrefetch = true;
5837 DML_LOG_VERBOSE("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC);
5838 *p->VRatioPrefetchC = 0;
5839 }
5840 #ifdef __DML_VBA_DEBUG__
5841 DML_LOG_VERBOSE("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
5842 DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
5843 DML_LOG_VERBOSE("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC);
5844 #endif
5845 }
5846
5847 *p->RequiredPrefetchPixelDataBWLuma = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelY * p->swath_width_luma_ub / s->LineTime;
5848 *p->RequiredPrefetchPixelDataBWChroma = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelC * p->swath_width_chroma_ub / s->LineTime;
5849
5850 #ifdef __DML_VBA_DEBUG__
5851 DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
5852 DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
5853 DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime);
5854 DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWLuma);
5855 DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWChroma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWChroma);
5856 #endif
5857 } else {
5858 s->NoTimeToPrefetch = true;
5859 DML_LOG_VERBOSE("DML::%s: No time to prefetch!, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required);
5860 DML_LOG_VERBOSE("DML::%s: No time to prefetch!, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ);
5861 *p->VRatioPrefetchY = 0;
5862 *p->VRatioPrefetchC = 0;
5863 *p->RequiredPrefetchPixelDataBWLuma = 0;
5864 *p->RequiredPrefetchPixelDataBWChroma = 0;
5865 }
5866 DML_LOG_VERBOSE("DML: Tpre: %fus - sum of time to request 2 x data pte, swaths\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingVM);
5867 DML_LOG_VERBOSE("DML: Tvm: %fus - time to fetch vm\n", s->TimeForFetchingVM);
5868 DML_LOG_VERBOSE("DML: Tr0: %fus - time to fetch first row of data pagetables\n", s->TimeForFetchingRowInVBlank);
5869 DML_LOG_VERBOSE("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime);
5870 DML_LOG_VERBOSE("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime);
5871 DML_LOG_VERBOSE("DML: Tvstartup - TSetup - Tcalc - TWait - Tpre - To > 0\n");
5872 DML_LOG_VERBOSE("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingVM - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup);
5873 DML_LOG_VERBOSE("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow);
5874
5875 } else {
5876 DML_LOG_VERBOSE("DML::%s: No time to prefetch! dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ);
5877 DML_LOG_VERBOSE("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded (%f) should be >= Tvm_trips_rounded (%f) + 2.0*Tr0_trips_rounded (%f) + min_Tsw_equ (%f)\n",
5878 __func__, min_Lsw_equ_ok, *p->Tpre_rounded, s->Tvm_trips_rounded, 2.0*s->Tr0_trips_rounded, s->min_Lsw_equ*s->LineTime);
5879 s->NoTimeToPrefetch = true;
5880 s->TimeForFetchingVM = 0;
5881 s->TimeForFetchingRowInVBlank = 0;
5882 *p->dst_y_per_vm_vblank = 0;
5883 *p->dst_y_per_row_vblank = 0;
5884 s->LinesToRequestPrefetchPixelData = 0;
5885 *p->VRatioPrefetchY = 0;
5886 *p->VRatioPrefetchC = 0;
5887 *p->RequiredPrefetchPixelDataBWLuma = 0;
5888 *p->RequiredPrefetchPixelDataBWChroma = 0;
5889 }
5890
5891 {
5892 double prefetch_vm_bw;
5893 double prefetch_row_bw;
5894
5895 if (vm_bytes == 0) {
5896 prefetch_vm_bw = 0;
5897 } else if (*p->dst_y_per_vm_vblank > 0) {
5898 #ifdef __DML_VBA_DEBUG__
5899 DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
5900 DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank);
5901 DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime);
5902 #endif
5903 prefetch_vm_bw = vm_bytes * p->HostVMInefficiencyFactor / (*p->dst_y_per_vm_vblank * s->LineTime);
5904 #ifdef __DML_VBA_DEBUG__
5905 DML_LOG_VERBOSE("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
5906 #endif
5907 } else {
5908 prefetch_vm_bw = 0;
5909 s->NoTimeToPrefetch = true;
5910 DML_LOG_VERBOSE("DML::%s: No time to prefetch!. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank);
5911 }
5912
5913 if (p->PixelPTEBytesPerRow == 0 && tdlut_row_bytes == 0) {
5914 prefetch_row_bw = 0;
5915 } else if (*p->dst_y_per_row_vblank > 0) {
5916 prefetch_row_bw = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + tdlut_row_bytes) / (*p->dst_y_per_row_vblank * s->LineTime);
5917
5918 #ifdef __DML_VBA_DEBUG__
5919 DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
5920 DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank);
5921 DML_LOG_VERBOSE("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
5922 #endif
5923 } else {
5924 prefetch_row_bw = 0;
5925 s->NoTimeToPrefetch = true;
5926 DML_LOG_VERBOSE("DML::%s: No time to prefetch!. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank);
5927 }
5928
5929 *p->prefetch_vmrow_bw = math_max2(prefetch_vm_bw, prefetch_row_bw);
5930 }
5931
5932 if (s->NoTimeToPrefetch) {
5933 s->TimeForFetchingVM = 0;
5934 s->TimeForFetchingRowInVBlank = 0;
5935 *p->dst_y_per_vm_vblank = 0;
5936 *p->dst_y_per_row_vblank = 0;
5937 *p->dst_y_prefetch = 0;
5938 s->LinesToRequestPrefetchPixelData = 0;
5939 *p->VRatioPrefetchY = 0;
5940 *p->VRatioPrefetchC = 0;
5941 *p->RequiredPrefetchPixelDataBWLuma = 0;
5942 *p->RequiredPrefetchPixelDataBWChroma = 0;
5943 *p->prefetch_vmrow_bw = 0;
5944 }
5945
5946 DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f (final)\n", __func__, *p->dst_y_per_vm_vblank);
5947 DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f (final)\n", __func__, *p->dst_y_per_row_vblank);
5948 DML_LOG_VERBOSE("DML::%s: prefetch_vmrow_bw = %f (final)\n", __func__, *p->prefetch_vmrow_bw);
5949 DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWLuma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWLuma);
5950 DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWChroma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWChroma);
5951 DML_LOG_VERBOSE("DML::%s: NoTimeToPrefetch=%d\n", __func__, s->NoTimeToPrefetch);
5952
5953 return s->NoTimeToPrefetch;
5954 }
5955
get_num_lb_source_lines(unsigned int max_line_buffer_lines,unsigned int line_buffer_size_bits,unsigned int num_pipes,unsigned int vp_width,unsigned int vp_height,double h_ratio,enum dml2_rotation_angle rotation_angle)5956 static unsigned int get_num_lb_source_lines(unsigned int max_line_buffer_lines,
5957 unsigned int line_buffer_size_bits,
5958 unsigned int num_pipes,
5959 unsigned int vp_width,
5960 unsigned int vp_height,
5961 double h_ratio,
5962 enum dml2_rotation_angle rotation_angle)
5963 {
5964 unsigned int num_lb_source_lines = 0;
5965 double lb_bit_per_pixel = 57.0;
5966 unsigned recin_width = vp_width/num_pipes;
5967
5968 if (dml_is_vertical_rotation(rotation_angle))
5969 recin_width = vp_height/num_pipes;
5970
5971 num_lb_source_lines = (unsigned int) math_min2((double) max_line_buffer_lines,
5972 math_floor2(line_buffer_size_bits / lb_bit_per_pixel / (recin_width / math_max2(h_ratio, 1.0)), 1.0));
5973
5974 return num_lb_source_lines;
5975 }
5976
find_max_impact_plane(unsigned int this_plane_idx,unsigned int num_planes,unsigned int Trpd_dcfclk_cycles[])5977 static unsigned int find_max_impact_plane(unsigned int this_plane_idx, unsigned int num_planes, unsigned int Trpd_dcfclk_cycles[])
5978 {
5979 int max_value = -1;
5980 int max_idx = -1;
5981 for (unsigned int i = 0; i < num_planes; i++) {
5982 if (i != this_plane_idx && (int) Trpd_dcfclk_cycles[i] > max_value) {
5983 max_value = Trpd_dcfclk_cycles[i];
5984 max_idx = i;
5985 }
5986 }
5987 if (max_idx <= 0) {
5988 DML_ASSERT(max_idx >= 0);
5989 max_idx = this_plane_idx;
5990 }
5991
5992 return max_idx;
5993 }
5994
calculate_impacted_Tsw(unsigned int exclude_plane_idx,unsigned int num_planes,double * prefetch_swath_bytes,double bw_mbps)5995 static double calculate_impacted_Tsw(unsigned int exclude_plane_idx, unsigned int num_planes, double *prefetch_swath_bytes, double bw_mbps)
5996 {
5997 double sum = 0.;
5998 for (unsigned int i = 0; i < num_planes; i++) {
5999 if (i != exclude_plane_idx) {
6000 sum += prefetch_swath_bytes[i];
6001 }
6002 }
6003 return sum / bw_mbps;
6004 }
6005
6006 // a global check against the aggregate effect of the per plane prefetch schedule
CheckGlobalPrefetchAdmissibility(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params * p)6007 static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core_internal_scratch *scratch,
6008 struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *p)
6009 {
6010 struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals *s = &scratch->CheckGlobalPrefetchAdmissibility_locals;
6011 unsigned int i, k;
6012
6013 memset(s, 0, sizeof(struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals));
6014
6015 *p->recalc_prefetch_schedule = 0;
6016 s->prefetch_global_check_passed = 1;
6017 // worst case if the rob and cdb is fully hogged
6018 s->max_Trpd_dcfclk_cycles = (unsigned int) math_ceil2((p->rob_buffer_size_kbytes*1024 + p->compressed_buffer_size_kbytes*DML_MAX_COMPRESSION_RATIO*1024)/64.0, 1.0);
6019 #ifdef __DML_VBA_DEBUG__
6020 DML_LOG_VERBOSE("DML::%s: num_active_planes = %d\n", __func__, p->num_active_planes);
6021 DML_LOG_VERBOSE("DML::%s: rob_buffer_size_kbytes = %d\n", __func__, p->rob_buffer_size_kbytes);
6022 DML_LOG_VERBOSE("DML::%s: compressed_buffer_size_kbytes = %d\n", __func__, p->compressed_buffer_size_kbytes);
6023 DML_LOG_VERBOSE("DML::%s: estimated_urg_bandwidth_required_mbps = %f\n", __func__, p->estimated_urg_bandwidth_required_mbps);
6024 DML_LOG_VERBOSE("DML::%s: estimated_dcfclk_mhz = %f\n", __func__, p->estimated_dcfclk_mhz);
6025 DML_LOG_VERBOSE("DML::%s: max_Trpd_dcfclk_cycles = %u\n", __func__, s->max_Trpd_dcfclk_cycles);
6026 #endif
6027
6028 // calculate the return impact from each plane, request is 256B per dcfclk
6029 for (i = 0; i < p->num_active_planes; i++) {
6030 s->src_detile_buf_size_bytes_l[i] = p->detile_buffer_size_bytes_l[i];
6031 s->src_detile_buf_size_bytes_c[i] = p->detile_buffer_size_bytes_c[i];
6032 s->src_swath_bytes_l[i] = p->full_swath_bytes_l[i];
6033 s->src_swath_bytes_c[i] = p->full_swath_bytes_c[i];
6034
6035 if (p->pixel_format[i] == dml2_420_10) {
6036 s->src_detile_buf_size_bytes_l[i] = (unsigned int) (s->src_detile_buf_size_bytes_l[i] * 1.5);
6037 s->src_detile_buf_size_bytes_c[i] = (unsigned int) (s->src_detile_buf_size_bytes_c[i] * 1.5);
6038 s->src_swath_bytes_l[i] = (unsigned int) (s->src_swath_bytes_l[i] * 1.5);
6039 s->src_swath_bytes_c[i] = (unsigned int) (s->src_swath_bytes_c[i] * 1.5);
6040 }
6041
6042 s->burst_bytes_to_fill_det = (unsigned int) (math_floor2(s->src_detile_buf_size_bytes_l[i] / p->chunk_bytes_l, 1) * p->chunk_bytes_l);
6043 s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_l[i] / p->swath_height_l[i], 1) * s->src_swath_bytes_l[i]);
6044
6045 #ifdef __DML_VBA_DEBUG__
6046 DML_LOG_VERBOSE("DML::%s: i=%u pixel_format = %d\n", __func__, i, p->pixel_format[i]);
6047 DML_LOG_VERBOSE("DML::%s: i=%u chunk_bytes_l = %d\n", __func__, i, p->chunk_bytes_l);
6048 DML_LOG_VERBOSE("DML::%s: i=%u lb_source_lines_l = %d\n", __func__, i, p->lb_source_lines_l[i]);
6049 DML_LOG_VERBOSE("DML::%s: i=%u src_detile_buf_size_bytes_l=%d\n", __func__, i, s->src_detile_buf_size_bytes_l[i]);
6050 DML_LOG_VERBOSE("DML::%s: i=%u src_swath_bytes_l=%d\n", __func__, i, s->src_swath_bytes_l[i]);
6051 DML_LOG_VERBOSE("DML::%s: i=%u burst_bytes_to_fill_det=%d (luma)\n", __func__, i, s->burst_bytes_to_fill_det);
6052 #endif
6053
6054 if (s->src_swath_bytes_c[i] > 0) { // dual_plane
6055 s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(s->src_detile_buf_size_bytes_c[i] / p->chunk_bytes_c, 1) * p->chunk_bytes_c);
6056
6057 if (p->pixel_format[i] == dml2_422_planar_8 || p->pixel_format[i] == dml2_422_planar_10 || p->pixel_format[i] == dml2_422_planar_12) {
6058 s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_c[i] / p->swath_height_c[i], 1) * s->src_swath_bytes_c[i]);
6059 }
6060
6061 #ifdef __DML_VBA_DEBUG__
6062 DML_LOG_VERBOSE("DML::%s: i=%u chunk_bytes_c = %d\n", __func__, i, p->chunk_bytes_c);
6063 DML_LOG_VERBOSE("DML::%s: i=%u lb_source_lines_c = %d\n", __func__, i, p->lb_source_lines_c[i]);
6064 DML_LOG_VERBOSE("DML::%s: i=%u src_detile_buf_size_bytes_c=%d\n", __func__, i, s->src_detile_buf_size_bytes_c[i]);
6065 DML_LOG_VERBOSE("DML::%s: i=%u src_swath_bytes_c=%d\n", __func__, i, s->src_swath_bytes_c[i]);
6066 #endif
6067 }
6068
6069 s->time_to_fill_det_us = (double) s->burst_bytes_to_fill_det / (256 * p->estimated_dcfclk_mhz); // fill time assume full burst at request rate
6070 s->accumulated_return_path_dcfclk_cycles[i] = (unsigned int) math_ceil2(((DML_MAX_COMPRESSION_RATIO-1) * 64 * p->estimated_dcfclk_mhz) * s->time_to_fill_det_us / 64.0, 1.0); //for 64B per DCFClk
6071
6072 #ifdef __DML_VBA_DEBUG__
6073 DML_LOG_VERBOSE("DML::%s: i=%u burst_bytes_to_fill_det=%d\n", __func__, i, s->burst_bytes_to_fill_det);
6074 DML_LOG_VERBOSE("DML::%s: i=%u time_to_fill_det_us=%f\n", __func__, i, s->time_to_fill_det_us);
6075 DML_LOG_VERBOSE("DML::%s: i=%u accumulated_return_path_dcfclk_cycles=%u\n", __func__, i, s->accumulated_return_path_dcfclk_cycles[i]);
6076 #endif
6077 // clamping to worst case delay which is one which occupy the full rob+cdb
6078 if (s->accumulated_return_path_dcfclk_cycles[i] > s->max_Trpd_dcfclk_cycles)
6079 s->accumulated_return_path_dcfclk_cycles[i] = s->max_Trpd_dcfclk_cycles;
6080 }
6081
6082 // Figure out the impacted prefetch time for each plane
6083 // if impacted_Tre is > equ bw Tpre, we need to fail the prefetch schedule as we need a higher state to support the bw
6084 for (i = 0; i < p->num_active_planes; i++) {
6085 k = find_max_impact_plane(i, p->num_active_planes, s->accumulated_return_path_dcfclk_cycles); // plane k causes most impact to plane i
6086 // the rest of planes (except for k) complete for bw
6087 p->impacted_dst_y_pre[i] = s->accumulated_return_path_dcfclk_cycles[k]/p->estimated_dcfclk_mhz;
6088 p->impacted_dst_y_pre[i] += calculate_impacted_Tsw(k, p->num_active_planes, p->prefetch_sw_bytes, p->estimated_urg_bandwidth_required_mbps);
6089 p->impacted_dst_y_pre[i] = math_ceil2(p->impacted_dst_y_pre[i] / p->line_time[i], 0.25);
6090
6091 #ifdef __DML_VBA_DEBUG__
6092 DML_LOG_VERBOSE("DML::%s: i=%u impacted_Tpre=%f (k=%u)\n", __func__, i, p->impacted_dst_y_pre[i], k);
6093 #endif
6094 }
6095
6096 if (p->Tpre_rounded != NULL && p->Tpre_oto != NULL) {
6097 for (i = 0; i < p->num_active_planes; i++) {
6098 if (p->impacted_dst_y_pre[i] > p->dst_y_prefetch[i]) {
6099 s->prefetch_global_check_passed = 0;
6100 *p->recalc_prefetch_schedule = 1;
6101 }
6102 #ifdef __DML_VBA_DEBUG__
6103 DML_LOG_VERBOSE("DML::%s: i=%u Tpre_rounded=%f\n", __func__, i, p->Tpre_rounded[i]);
6104 DML_LOG_VERBOSE("DML::%s: i=%u Tpre_oto=%f\n", __func__, i, p->Tpre_oto[i]);
6105 #endif
6106 }
6107 } else {
6108 // likely a mode programming calls, assume support, and no recalc - not used anyways
6109 s->prefetch_global_check_passed = 1;
6110 *p->recalc_prefetch_schedule = 0;
6111 }
6112
6113 #ifdef __DML_VBA_DEBUG__
6114 DML_LOG_VERBOSE("DML::%s: prefetch_global_check_passed=%u\n", __func__, s->prefetch_global_check_passed);
6115 DML_LOG_VERBOSE("DML::%s: recalc_prefetch_schedule=%u\n", __func__, *p->recalc_prefetch_schedule);
6116 #endif
6117
6118 return s->prefetch_global_check_passed;
6119 }
6120
calculate_peak_bandwidth_required(struct dml2_core_internal_scratch * s,struct dml2_core_calcs_calculate_peak_bandwidth_required_params * p)6121 static void calculate_peak_bandwidth_required(
6122 struct dml2_core_internal_scratch *s,
6123 struct dml2_core_calcs_calculate_peak_bandwidth_required_params *p)
6124 {
6125 unsigned int n;
6126 unsigned int m;
6127
6128 struct dml2_core_shared_calculate_peak_bandwidth_required_locals *l = &s->calculate_peak_bandwidth_required_locals;
6129
6130 memset(l, 0, sizeof(struct dml2_core_shared_calculate_peak_bandwidth_required_locals));
6131
6132 #ifdef __DML_VBA_DEBUG__
6133 DML_LOG_VERBOSE("DML::%s: inc_flip_bw = %d\n", __func__, p->inc_flip_bw);
6134 DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, p->num_active_planes);
6135 #endif
6136
6137 for (unsigned int k = 0; k < p->num_active_planes; ++k) {
6138 l->unity_array[k] = 1.0;
6139 l->zero_array[k] = 0.0;
6140 }
6141
6142 for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
6143 for (n = 0; n < dml2_core_internal_bw_max; n++) {
6144 get_urgent_bandwidth_required(
6145 &s->get_urgent_bandwidth_required_locals,
6146 p->display_cfg,
6147 m,
6148 n,
6149 0, //inc_flip_bw,
6150 0, //use_qual_row_bw
6151 p->num_active_planes,
6152 p->num_of_dpp,
6153 p->dcc_dram_bw_nom_overhead_factor_p0,
6154 p->dcc_dram_bw_nom_overhead_factor_p1,
6155 p->dcc_dram_bw_pref_overhead_factor_p0,
6156 p->dcc_dram_bw_pref_overhead_factor_p1,
6157 p->mall_prefetch_sdp_overhead_factor,
6158 p->mall_prefetch_dram_overhead_factor,
6159 p->surface_read_bandwidth_l,
6160 p->surface_read_bandwidth_c,
6161 l->zero_array, //PrefetchBandwidthLuma,
6162 l->zero_array, //PrefetchBandwidthChroma,
6163 l->zero_array, //PrefetchBWMax
6164 l->zero_array,
6165 l->zero_array,
6166 l->zero_array,
6167 p->dpte_row_bw,
6168 p->meta_row_bw,
6169 l->zero_array, //prefetch_cursor_bw,
6170 l->zero_array, //prefetch_vmrow_bw,
6171 l->zero_array, //flip_bw,
6172 l->zero_array,
6173 l->zero_array,
6174 l->zero_array,
6175 l->zero_array,
6176 l->zero_array,
6177 l->zero_array,
6178 p->surface_avg_vactive_required_bw[m][n],
6179 p->surface_peak_required_bw[m][n]);
6180
6181 p->urg_vactive_bandwidth_required[m][n] = get_urgent_bandwidth_required(
6182 &s->get_urgent_bandwidth_required_locals,
6183 p->display_cfg,
6184 m,
6185 n,
6186 0, //inc_flip_bw,
6187 0, //use_qual_row_bw
6188 p->num_active_planes,
6189 p->num_of_dpp,
6190 p->dcc_dram_bw_nom_overhead_factor_p0,
6191 p->dcc_dram_bw_nom_overhead_factor_p1,
6192 p->dcc_dram_bw_pref_overhead_factor_p0,
6193 p->dcc_dram_bw_pref_overhead_factor_p1,
6194 p->mall_prefetch_sdp_overhead_factor,
6195 p->mall_prefetch_dram_overhead_factor,
6196 p->surface_read_bandwidth_l,
6197 p->surface_read_bandwidth_c,
6198 l->zero_array, //PrefetchBandwidthLuma,
6199 l->zero_array, //PrefetchBandwidthChroma,
6200 l->zero_array, //PrefetchBWMax
6201 p->excess_vactive_fill_bw_l,
6202 p->excess_vactive_fill_bw_c,
6203 p->cursor_bw,
6204 p->dpte_row_bw,
6205 p->meta_row_bw,
6206 l->zero_array, //prefetch_cursor_bw,
6207 l->zero_array, //prefetch_vmrow_bw,
6208 l->zero_array, //flip_bw,
6209 p->urgent_burst_factor_l,
6210 p->urgent_burst_factor_c,
6211 p->urgent_burst_factor_cursor,
6212 p->urgent_burst_factor_prefetch_l,
6213 p->urgent_burst_factor_prefetch_c,
6214 p->urgent_burst_factor_prefetch_cursor,
6215 l->surface_dummy_bw,
6216 p->surface_peak_required_bw[m][n]);
6217
6218 p->urg_bandwidth_required[m][n] = get_urgent_bandwidth_required(
6219 &s->get_urgent_bandwidth_required_locals,
6220 p->display_cfg,
6221 m,
6222 n,
6223 p->inc_flip_bw,
6224 0, //use_qual_row_bw
6225 p->num_active_planes,
6226 p->num_of_dpp,
6227 p->dcc_dram_bw_nom_overhead_factor_p0,
6228 p->dcc_dram_bw_nom_overhead_factor_p1,
6229 p->dcc_dram_bw_pref_overhead_factor_p0,
6230 p->dcc_dram_bw_pref_overhead_factor_p1,
6231 p->mall_prefetch_sdp_overhead_factor,
6232 p->mall_prefetch_dram_overhead_factor,
6233 p->surface_read_bandwidth_l,
6234 p->surface_read_bandwidth_c,
6235 p->prefetch_bandwidth_l,
6236 p->prefetch_bandwidth_c,
6237 p->prefetch_bandwidth_max, // to prevent ms/mp mismatches where mp prefetch bw > ms prefetch bw
6238 p->excess_vactive_fill_bw_l,
6239 p->excess_vactive_fill_bw_c,
6240 p->cursor_bw,
6241 p->dpte_row_bw,
6242 p->meta_row_bw,
6243 p->prefetch_cursor_bw,
6244 p->prefetch_vmrow_bw,
6245 p->flip_bw,
6246 p->urgent_burst_factor_l,
6247 p->urgent_burst_factor_c,
6248 p->urgent_burst_factor_cursor,
6249 p->urgent_burst_factor_prefetch_l,
6250 p->urgent_burst_factor_prefetch_c,
6251 p->urgent_burst_factor_prefetch_cursor,
6252 l->surface_dummy_bw,
6253 p->surface_peak_required_bw[m][n]);
6254
6255 p->urg_bandwidth_required_qual[m][n] = get_urgent_bandwidth_required(
6256 &s->get_urgent_bandwidth_required_locals,
6257 p->display_cfg,
6258 m,
6259 n,
6260 0, //inc_flip_bw
6261 1, //use_qual_row_bw
6262 p->num_active_planes,
6263 p->num_of_dpp,
6264 p->dcc_dram_bw_nom_overhead_factor_p0,
6265 p->dcc_dram_bw_nom_overhead_factor_p1,
6266 p->dcc_dram_bw_pref_overhead_factor_p0,
6267 p->dcc_dram_bw_pref_overhead_factor_p1,
6268 p->mall_prefetch_sdp_overhead_factor,
6269 p->mall_prefetch_dram_overhead_factor,
6270 p->surface_read_bandwidth_l,
6271 p->surface_read_bandwidth_c,
6272 p->prefetch_bandwidth_l,
6273 p->prefetch_bandwidth_c,
6274 p->prefetch_bandwidth_max, // to prevent ms/mp mismatch where mp prefetch bw > ms prefetch bw
6275 p->excess_vactive_fill_bw_l,
6276 p->excess_vactive_fill_bw_c,
6277 p->cursor_bw,
6278 p->dpte_row_bw,
6279 p->meta_row_bw,
6280 p->prefetch_cursor_bw,
6281 p->prefetch_vmrow_bw,
6282 p->flip_bw,
6283 p->urgent_burst_factor_l,
6284 p->urgent_burst_factor_c,
6285 p->urgent_burst_factor_cursor,
6286 p->urgent_burst_factor_prefetch_l,
6287 p->urgent_burst_factor_prefetch_c,
6288 p->urgent_burst_factor_prefetch_cursor,
6289 l->surface_dummy_bw,
6290 p->surface_peak_required_bw[m][n]);
6291
6292 p->non_urg_bandwidth_required[m][n] = get_urgent_bandwidth_required(
6293 &s->get_urgent_bandwidth_required_locals,
6294 p->display_cfg,
6295 m,
6296 n,
6297 p->inc_flip_bw,
6298 0, //use_qual_row_bw
6299 p->num_active_planes,
6300 p->num_of_dpp,
6301 p->dcc_dram_bw_nom_overhead_factor_p0,
6302 p->dcc_dram_bw_nom_overhead_factor_p1,
6303 p->dcc_dram_bw_pref_overhead_factor_p0,
6304 p->dcc_dram_bw_pref_overhead_factor_p1,
6305 p->mall_prefetch_sdp_overhead_factor,
6306 p->mall_prefetch_dram_overhead_factor,
6307 p->surface_read_bandwidth_l,
6308 p->surface_read_bandwidth_c,
6309 p->prefetch_bandwidth_l,
6310 p->prefetch_bandwidth_c,
6311 p->prefetch_bandwidth_max, // to prevent ms/mp mismatches where mp prefetch bw > ms prefetch bw
6312 p->excess_vactive_fill_bw_l,
6313 p->excess_vactive_fill_bw_c,
6314 p->cursor_bw,
6315 p->dpte_row_bw,
6316 p->meta_row_bw,
6317 p->prefetch_cursor_bw,
6318 p->prefetch_vmrow_bw,
6319 p->flip_bw,
6320 l->unity_array,
6321 l->unity_array,
6322 l->unity_array,
6323 l->unity_array,
6324 l->unity_array,
6325 l->unity_array,
6326 l->surface_dummy_bw,
6327 p->surface_peak_required_bw[m][n]);
6328
6329 #ifdef __DML_VBA_DEBUG__
6330 DML_LOG_VERBOSE("DML::%s: urg_vactive_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_vactive_bandwidth_required[m][n]);
6331 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]);
6332 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_qual[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]);
6333 DML_LOG_VERBOSE("DML::%s: non_urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->non_urg_bandwidth_required[m][n]);
6334 #endif
6335 DML_ASSERT(p->urg_bandwidth_required[m][n] >= p->non_urg_bandwidth_required[m][n]);
6336 }
6337 }
6338 }
6339
check_urgent_bandwidth_support(double * frac_urg_bandwidth_nom,double * frac_urg_bandwidth_mall,bool * vactive_bandwidth_support_ok,bool * bandwidth_support_ok,unsigned int mall_allocated_for_dcn_mbytes,double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])6340 static void check_urgent_bandwidth_support(
6341 double *frac_urg_bandwidth_nom,
6342 double *frac_urg_bandwidth_mall,
6343 bool *vactive_bandwidth_support_ok, // vactive ok
6344 bool *bandwidth_support_ok,// max of vm, prefetch, vactive all ok
6345
6346 unsigned int mall_allocated_for_dcn_mbytes,
6347 double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
6348 double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
6349 double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
6350 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])
6351 {
6352 double frac_urg_bandwidth_nom_sdp = non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] / urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
6353 double frac_urg_bandwidth_nom_dram = non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] / urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
6354 double frac_urg_bandwidth_mall_sdp;
6355 double frac_urg_bandwidth_mall_dram;
6356 if (urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] > 0)
6357 frac_urg_bandwidth_mall_sdp = non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] / urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
6358 else
6359 frac_urg_bandwidth_mall_sdp = 0.0;
6360 if (urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] > 0)
6361 frac_urg_bandwidth_mall_dram = non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] / urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
6362 else
6363 frac_urg_bandwidth_mall_dram = 0.0;
6364
6365 *bandwidth_support_ok = 1;
6366 *vactive_bandwidth_support_ok = 1;
6367
6368 // Check urgent bandwidth required at sdp vs urgent bandwidth avail at sdp -> FractionOfUrgentBandwidth
6369 // Check urgent bandwidth required at dram vs urgent bandwidth avail at dram
6370 // Check urgent bandwidth required at sdp vs urgent bandwidth avail at sdp, svp_prefetch -> FractionOfUrgentBandwidthMALL
6371 // Check urgent bandwidth required at dram vs urgent bandwidth avail at dram, svp_prefetch
6372
6373 *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
6374 *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
6375
6376 if (mall_allocated_for_dcn_mbytes > 0) {
6377 *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
6378 *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
6379 }
6380
6381 *frac_urg_bandwidth_nom = math_max2(frac_urg_bandwidth_nom_sdp, frac_urg_bandwidth_nom_dram);
6382 *frac_urg_bandwidth_mall = math_max2(frac_urg_bandwidth_mall_sdp, frac_urg_bandwidth_mall_dram);
6383
6384 *bandwidth_support_ok &= (*frac_urg_bandwidth_nom <= 1.0);
6385
6386 if (mall_allocated_for_dcn_mbytes > 0)
6387 *bandwidth_support_ok &= (*frac_urg_bandwidth_mall <= 1.0);
6388
6389 *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
6390 *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
6391 if (mall_allocated_for_dcn_mbytes > 0) {
6392 *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
6393 *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
6394 }
6395
6396 #ifdef __DML_VBA_DEBUG__
6397 DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom_sdp = %f\n", __func__, frac_urg_bandwidth_nom_sdp);
6398 DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom_dram = %f\n", __func__, frac_urg_bandwidth_nom_dram);
6399 DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom = %f\n", __func__, *frac_urg_bandwidth_nom);
6400
6401 DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall_sdp = %f\n", __func__, frac_urg_bandwidth_mall_sdp);
6402 DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall_dram = %f\n", __func__, frac_urg_bandwidth_mall_dram);
6403 DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall = %f\n", __func__, *frac_urg_bandwidth_mall);
6404 DML_LOG_VERBOSE("DML::%s: bandwidth_support_ok = %d\n", __func__, *bandwidth_support_ok);
6405
6406 for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) {
6407 for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) {
6408 DML_LOG_VERBOSE("DML::%s: state:%s bw_type:%s urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n",
6409 __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n),
6410 urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required[m][n]) ? "<" : ">=", urg_bandwidth_required[m][n]);
6411 }
6412 }
6413 #endif
6414 }
6415
get_bandwidth_available_for_immediate_flip(enum dml2_core_internal_soc_state_type eval_state,double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])6416 static double get_bandwidth_available_for_immediate_flip(enum dml2_core_internal_soc_state_type eval_state,
6417 double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], // no flip
6418 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])
6419 {
6420 double flip_bw_available_mbps;
6421 double flip_bw_available_sdp_mbps;
6422 double flip_bw_available_dram_mbps;
6423
6424 flip_bw_available_sdp_mbps = urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp] - urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp];
6425 flip_bw_available_dram_mbps = urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram] - urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram];
6426 flip_bw_available_mbps = flip_bw_available_sdp_mbps < flip_bw_available_dram_mbps ? flip_bw_available_sdp_mbps : flip_bw_available_dram_mbps;
6427
6428 #ifdef __DML_VBA_DEBUG__
6429 DML_LOG_VERBOSE("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state));
6430 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_sdp_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp]);
6431 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_dram_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram]);
6432 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_sdp_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp]);
6433 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_dram_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram]);
6434 DML_LOG_VERBOSE("DML::%s: flip_bw_available_sdp_mbps = %f\n", __func__, flip_bw_available_sdp_mbps);
6435 DML_LOG_VERBOSE("DML::%s: flip_bw_available_dram_mbps = %f\n", __func__, flip_bw_available_dram_mbps);
6436 DML_LOG_VERBOSE("DML::%s: flip_bw_available_mbps = %f\n", __func__, flip_bw_available_mbps);
6437 #endif
6438
6439 return flip_bw_available_mbps;
6440 }
6441
calculate_immediate_flip_bandwidth_support(double * frac_urg_bandwidth_flip,bool * flip_bandwidth_support_ok,enum dml2_core_internal_soc_state_type eval_state,double urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])6442 static void calculate_immediate_flip_bandwidth_support(
6443 // Output
6444 double *frac_urg_bandwidth_flip,
6445 bool *flip_bandwidth_support_ok,
6446
6447 // Input
6448 enum dml2_core_internal_soc_state_type eval_state,
6449 double urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
6450 double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
6451 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])
6452 {
6453 double frac_urg_bw_flip_sdp = non_urg_bandwidth_required_flip[eval_state][dml2_core_internal_bw_sdp] / urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp];
6454 double frac_urg_bw_flip_dram = non_urg_bandwidth_required_flip[eval_state][dml2_core_internal_bw_dram] / urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram];
6455
6456 *flip_bandwidth_support_ok = true;
6457 for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { // check sdp and dram
6458 *flip_bandwidth_support_ok &= urg_bandwidth_available[eval_state][n] >= urg_bandwidth_required_flip[eval_state][n];
6459
6460 #ifdef __DML_VBA_DEBUG__
6461 DML_LOG_VERBOSE("DML::%s: n = %s\n", __func__, dml2_core_internal_bw_type_str(n));
6462 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available = %f\n", __func__, urg_bandwidth_available[eval_state][n]);
6463 DML_LOG_VERBOSE("DML::%s: non_urg_bandwidth_required_flip = %f\n", __func__, non_urg_bandwidth_required_flip[eval_state][n]);
6464 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_flip = %f\n", __func__, urg_bandwidth_required_flip[eval_state][n]);
6465 DML_LOG_VERBOSE("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok);
6466 #endif
6467 DML_ASSERT(urg_bandwidth_required_flip[eval_state][n] >= non_urg_bandwidth_required_flip[eval_state][n]);
6468 }
6469
6470 *frac_urg_bandwidth_flip = (frac_urg_bw_flip_sdp > frac_urg_bw_flip_dram) ? frac_urg_bw_flip_sdp : frac_urg_bw_flip_dram;
6471 *flip_bandwidth_support_ok &= (*frac_urg_bandwidth_flip <= 1.0);
6472
6473 #ifdef __DML_VBA_DEBUG__
6474 DML_LOG_VERBOSE("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state));
6475 DML_LOG_VERBOSE("DML::%s: frac_urg_bw_flip_sdp = %f\n", __func__, frac_urg_bw_flip_sdp);
6476 DML_LOG_VERBOSE("DML::%s: frac_urg_bw_flip_dram = %f\n", __func__, frac_urg_bw_flip_dram);
6477 DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_flip = %f\n", __func__, *frac_urg_bandwidth_flip);
6478 DML_LOG_VERBOSE("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok);
6479
6480 for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) {
6481 for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) {
6482 DML_LOG_VERBOSE("DML::%s: state:%s bw_type:%s, urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n",
6483 __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n),
6484 urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required_flip[m][n]) ? "<" : ">=", urg_bandwidth_required_flip[m][n]);
6485 }
6486 }
6487 #endif
6488 }
6489
CalculateFlipSchedule(struct dml2_core_internal_scratch * s,bool iflip_enable,bool use_lb_flip_bw,double HostVMInefficiencyFactor,double Tvm_trips_flip,double Tr0_trips_flip,double Tvm_trips_flip_rounded,double Tr0_trips_flip_rounded,bool GPUVMEnable,double vm_bytes,double DPTEBytesPerRow,double BandwidthAvailableForImmediateFlip,unsigned int TotImmediateFlipBytes,enum dml2_source_format_class SourcePixelFormat,double LineTime,double VRatio,double VRatioChroma,double Tno_bw_flip,unsigned int dpte_row_height,unsigned int dpte_row_height_chroma,bool use_one_row_for_frame_flip,unsigned int max_flip_time_us,unsigned int max_flip_time_lines,unsigned int per_pipe_flip_bytes,unsigned int meta_row_bytes,unsigned int meta_row_height,unsigned int meta_row_height_chroma,bool dcc_mrq_enable,double * dst_y_per_vm_flip,double * dst_y_per_row_flip,double * final_flip_bw,bool * ImmediateFlipSupportedForPipe)6490 static void CalculateFlipSchedule(
6491 struct dml2_core_internal_scratch *s,
6492 bool iflip_enable,
6493 bool use_lb_flip_bw,
6494 double HostVMInefficiencyFactor,
6495 double Tvm_trips_flip,
6496 double Tr0_trips_flip,
6497 double Tvm_trips_flip_rounded,
6498 double Tr0_trips_flip_rounded,
6499 bool GPUVMEnable,
6500 double vm_bytes, // vm_bytes
6501 double DPTEBytesPerRow, // dpte_row_bytes
6502 double BandwidthAvailableForImmediateFlip,
6503 unsigned int TotImmediateFlipBytes,
6504 enum dml2_source_format_class SourcePixelFormat,
6505 double LineTime,
6506 double VRatio,
6507 double VRatioChroma,
6508 double Tno_bw_flip,
6509 unsigned int dpte_row_height,
6510 unsigned int dpte_row_height_chroma,
6511 bool use_one_row_for_frame_flip,
6512 unsigned int max_flip_time_us,
6513 unsigned int max_flip_time_lines,
6514 unsigned int per_pipe_flip_bytes,
6515 unsigned int meta_row_bytes,
6516 unsigned int meta_row_height,
6517 unsigned int meta_row_height_chroma,
6518 bool dcc_mrq_enable,
6519
6520 // Output
6521 double *dst_y_per_vm_flip,
6522 double *dst_y_per_row_flip,
6523 double *final_flip_bw,
6524 bool *ImmediateFlipSupportedForPipe)
6525 {
6526 (void)use_one_row_for_frame_flip;
6527 struct dml2_core_shared_CalculateFlipSchedule_locals *l = &s->CalculateFlipSchedule_locals;
6528
6529 l->dual_plane = dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha;
6530 l->dpte_row_bytes = DPTEBytesPerRow;
6531
6532 #ifdef __DML_VBA_DEBUG__
6533 DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
6534 DML_LOG_VERBOSE("DML::%s: ip.max_flip_time_us = %d\n", __func__, max_flip_time_us);
6535 DML_LOG_VERBOSE("DML::%s: ip.max_flip_time_lines = %d\n", __func__, max_flip_time_lines);
6536 DML_LOG_VERBOSE("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
6537 DML_LOG_VERBOSE("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes);
6538 DML_LOG_VERBOSE("DML::%s: use_lb_flip_bw = %u\n", __func__, use_lb_flip_bw);
6539 DML_LOG_VERBOSE("DML::%s: iflip_enable = %u\n", __func__, iflip_enable);
6540 DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
6541 DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, LineTime);
6542 DML_LOG_VERBOSE("DML::%s: Tno_bw_flip = %f\n", __func__, Tno_bw_flip);
6543 DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip = %f\n", __func__, Tvm_trips_flip);
6544 DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip = %f\n", __func__, Tr0_trips_flip);
6545 DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip_rounded = %f\n", __func__, Tvm_trips_flip_rounded);
6546 DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip_rounded = %f\n", __func__, Tr0_trips_flip_rounded);
6547 DML_LOG_VERBOSE("DML::%s: vm_bytes = %f\n", __func__, vm_bytes);
6548 DML_LOG_VERBOSE("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow);
6549 DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %d\n", __func__, meta_row_bytes);
6550 DML_LOG_VERBOSE("DML::%s: dpte_row_bytes = %f\n", __func__, l->dpte_row_bytes);
6551 DML_LOG_VERBOSE("DML::%s: dpte_row_height = %d\n", __func__, dpte_row_height);
6552 DML_LOG_VERBOSE("DML::%s: meta_row_height = %d\n", __func__, meta_row_height);
6553 DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio);
6554 #endif
6555
6556 if (TotImmediateFlipBytes > 0 && (GPUVMEnable || dcc_mrq_enable)) {
6557 if (l->dual_plane) {
6558 if (dcc_mrq_enable & GPUVMEnable) {
6559 l->min_row_height = math_min2(dpte_row_height, meta_row_height);
6560 l->min_row_height_chroma = math_min2(dpte_row_height_chroma, meta_row_height_chroma);
6561 } else if (GPUVMEnable) {
6562 l->min_row_height = dpte_row_height;
6563 l->min_row_height_chroma = dpte_row_height_chroma;
6564 } else {
6565 l->min_row_height = meta_row_height;
6566 l->min_row_height_chroma = meta_row_height_chroma;
6567 }
6568 l->min_row_time = math_min2(l->min_row_height * LineTime / VRatio, l->min_row_height_chroma * LineTime / VRatioChroma);
6569 } else {
6570 if (dcc_mrq_enable & GPUVMEnable)
6571 l->min_row_height = math_min2(dpte_row_height, meta_row_height);
6572 else if (GPUVMEnable)
6573 l->min_row_height = dpte_row_height;
6574 else
6575 l->min_row_height = meta_row_height;
6576
6577 l->min_row_time = l->min_row_height * LineTime / VRatio;
6578 }
6579 #ifdef __DML_VBA_DEBUG__
6580 DML_LOG_VERBOSE("DML::%s: min_row_time = %f\n", __func__, l->min_row_time);
6581 #endif
6582 DML_ASSERT(l->min_row_time > 0);
6583
6584 if (use_lb_flip_bw) {
6585 // For mode check, calculation the flip bw requirement with worst case flip time
6586 l->max_flip_time = math_min2(math_min2(l->min_row_time, (double)max_flip_time_lines * LineTime / VRatio),
6587 math_max2(Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded, (double)max_flip_time_us));
6588
6589 //The lower bound on flip bandwidth
6590 // Note: The get_urgent_bandwidth_required already consider dpte_row_bw and meta_row_bw in bandwidth calculation, so leave final_flip_bw = 0 if iflip not required
6591 l->lb_flip_bw = 0;
6592
6593 if (iflip_enable) {
6594 l->hvm_scaled_vm_bytes = vm_bytes * HostVMInefficiencyFactor;
6595 l->num_rows = 2;
6596 l->hvm_scaled_row_bytes = (l->num_rows * l->dpte_row_bytes * HostVMInefficiencyFactor + l->num_rows * meta_row_bytes);
6597 l->hvm_scaled_vm_row_bytes = l->hvm_scaled_vm_bytes + l->hvm_scaled_row_bytes;
6598 l->lb_flip_bw = math_max3(
6599 l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip),
6600 l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded),
6601 l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded));
6602 #ifdef __DML_VBA_DEBUG__
6603 DML_LOG_VERBOSE("DML::%s: max_flip_time = %f\n", __func__, l->max_flip_time);
6604 DML_LOG_VERBOSE("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_bytes);
6605 DML_LOG_VERBOSE("DML::%s: total row bytes (%f row, hvm ineff scaled) = %f\n", __func__, l->num_rows, l->hvm_scaled_row_bytes);
6606 DML_LOG_VERBOSE("DML::%s: total vm+row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_row_bytes);
6607 DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm and row = %f\n", __func__, l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip));
6608 DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm = %f\n", __func__, l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded));
6609 DML_LOG_VERBOSE("DML::%s: lb_flip_bw for row = %f\n", __func__, l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded));
6610
6611 if (l->lb_flip_bw > 0) {
6612 DML_LOG_VERBOSE("DML::%s: mode_support est Tvm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw);
6613 DML_LOG_VERBOSE("DML::%s: mode_support est Tr0_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / l->num_rows);
6614 DML_LOG_VERBOSE("DML::%s: mode_support est dst_y_per_vm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw / LineTime);
6615 DML_LOG_VERBOSE("DML::%s: mode_support est dst_y_per_row_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / LineTime / l->num_rows);
6616 DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip_rounded + 2*Tr0_trips_flip_rounded = %f\n", __func__, (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded));
6617 }
6618 #endif
6619 l->lb_flip_bw = math_max3(l->lb_flip_bw,
6620 l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip,
6621 (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime));
6622
6623 #ifdef __DML_VBA_DEBUG__
6624 DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm reg limit = %f\n", __func__, l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip);
6625 DML_LOG_VERBOSE("DML::%s: lb_flip_bw for row reg limit = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime));
6626 #endif
6627 }
6628
6629 *final_flip_bw = l->lb_flip_bw;
6630
6631 *dst_y_per_vm_flip = 1; // not used
6632 *dst_y_per_row_flip = 1; // not used
6633 *ImmediateFlipSupportedForPipe = l->min_row_time >= (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded);
6634 } else {
6635 if (iflip_enable) {
6636 l->ImmediateFlipBW = (double)per_pipe_flip_bytes * BandwidthAvailableForImmediateFlip / (double)TotImmediateFlipBytes; // flip_bw(i)
6637
6638 #ifdef __DML_VBA_DEBUG__
6639 DML_LOG_VERBOSE("DML::%s: per_pipe_flip_bytes = %d\n", __func__, per_pipe_flip_bytes);
6640 DML_LOG_VERBOSE("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
6641 DML_LOG_VERBOSE("DML::%s: ImmediateFlipBW = %f\n", __func__, l->ImmediateFlipBW);
6642 DML_LOG_VERBOSE("DML::%s: portion of flip bw = %f\n", __func__, (double)per_pipe_flip_bytes / (double)TotImmediateFlipBytes);
6643 #endif
6644 if (l->ImmediateFlipBW == 0) {
6645 l->Tvm_flip = 0;
6646 l->Tr0_flip = 0;
6647 } else {
6648 l->Tvm_flip = math_max3(Tvm_trips_flip,
6649 Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW,
6650 LineTime / 4.0);
6651
6652 l->Tr0_flip = math_max3(Tr0_trips_flip,
6653 (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW,
6654 LineTime / 4.0);
6655 }
6656 #ifdef __DML_VBA_DEBUG__
6657 DML_LOG_VERBOSE("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, vm_bytes * HostVMInefficiencyFactor);
6658 DML_LOG_VERBOSE("DML::%s: total row bytes (hvm ineff scaled, one row) = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes));
6659
6660 DML_LOG_VERBOSE("DML::%s: Tvm_flip = %f (bw-based), Tvm_trips_flip = %f (latency-based)\n", __func__, Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW, Tvm_trips_flip);
6661 DML_LOG_VERBOSE("DML::%s: Tr0_flip = %f (bw-based), Tr0_trips_flip = %f (latency-based)\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW, Tr0_trips_flip);
6662 #endif
6663 *dst_y_per_vm_flip = math_ceil2(4.0 * (l->Tvm_flip / LineTime), 1.0) / 4.0;
6664 *dst_y_per_row_flip = math_ceil2(4.0 * (l->Tr0_flip / LineTime), 1.0) / 4.0;
6665
6666 *final_flip_bw = math_max2(vm_bytes * HostVMInefficiencyFactor / (*dst_y_per_vm_flip * LineTime),
6667 (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (*dst_y_per_row_flip * LineTime));
6668
6669 if (*dst_y_per_vm_flip >= 32 || *dst_y_per_row_flip >= 16 || l->Tvm_flip + 2 * l->Tr0_flip > l->min_row_time) {
6670 *ImmediateFlipSupportedForPipe = false;
6671 } else {
6672 *ImmediateFlipSupportedForPipe = iflip_enable;
6673 }
6674 } else {
6675 l->Tvm_flip = 0;
6676 l->Tr0_flip = 0;
6677 *dst_y_per_vm_flip = 0;
6678 *dst_y_per_row_flip = 0;
6679 *final_flip_bw = 0;
6680 *ImmediateFlipSupportedForPipe = iflip_enable;
6681 }
6682 }
6683 } else {
6684 l->Tvm_flip = 0;
6685 l->Tr0_flip = 0;
6686 *dst_y_per_vm_flip = 0;
6687 *dst_y_per_row_flip = 0;
6688 *final_flip_bw = 0;
6689 *ImmediateFlipSupportedForPipe = iflip_enable;
6690 }
6691
6692 #ifdef __DML_VBA_DEBUG__
6693 if (!use_lb_flip_bw) {
6694 DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_flip = %f (should be < 32)\n", __func__, *dst_y_per_vm_flip);
6695 DML_LOG_VERBOSE("DML::%s: dst_y_per_row_flip = %f (should be < 16)\n", __func__, *dst_y_per_row_flip);
6696 DML_LOG_VERBOSE("DML::%s: Tvm_flip = %f (final)\n", __func__, l->Tvm_flip);
6697 DML_LOG_VERBOSE("DML::%s: Tr0_flip = %f (final)\n", __func__, l->Tr0_flip);
6698 DML_LOG_VERBOSE("DML::%s: Tvm_flip + 2*Tr0_flip = %f (should be <= min_row_time=%f)\n", __func__, l->Tvm_flip + 2 * l->Tr0_flip, l->min_row_time);
6699 }
6700 DML_LOG_VERBOSE("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw);
6701 DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe);
6702 #endif
6703 }
6704
CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params * p)6705 static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
6706 struct dml2_core_internal_scratch *scratch,
6707 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *p)
6708 {
6709 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals *s = &scratch->CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals;
6710
6711 enum dml2_uclk_pstate_change_strategy uclk_pstate_change_strategy;
6712 double reserved_vblank_time_us;
6713 bool FoundCriticalSurface = false;
6714
6715 s->TotalActiveWriteback = 0;
6716 p->Watermark->UrgentWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency;
6717
6718 #ifdef __DML_VBA_DEBUG__
6719 DML_LOG_VERBOSE("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark);
6720 #endif
6721
6722 p->Watermark->USRRetrainingWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency + p->mmSOCParameters.USRRetrainingLatency + p->mmSOCParameters.SMNLatency;
6723 p->Watermark->DRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->Watermark->UrgentWatermark;
6724 p->Watermark->FCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->Watermark->UrgentWatermark;
6725 p->Watermark->StutterExitWatermark = p->mmSOCParameters.SRExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
6726 p->Watermark->StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
6727 p->Watermark->Z8StutterExitWatermark = p->mmSOCParameters.SRExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
6728 p->Watermark->Z8StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
6729 if (p->mmSOCParameters.qos_type == dml2_qos_param_type_dcn4x) {
6730 p->Watermark->StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
6731 p->Watermark->StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
6732 p->Watermark->Z8StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
6733 p->Watermark->Z8StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
6734 }
6735 p->Watermark->temp_read_or_ppt_watermark_us = p->mmSOCParameters.g6_temp_read_blackout_us + p->Watermark->UrgentWatermark;
6736
6737 #ifdef __DML_VBA_DEBUG__
6738 DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency);
6739 DML_LOG_VERBOSE("DML::%s: ExtraLatency = %f\n", __func__, p->mmSOCParameters.ExtraLatency);
6740 DML_LOG_VERBOSE("DML::%s: DRAMClockChangeLatency = %f\n", __func__, p->mmSOCParameters.DRAMClockChangeLatency);
6741 DML_LOG_VERBOSE("DML::%s: SREnterPlusExitZ8Time = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitZ8Time);
6742 DML_LOG_VERBOSE("DML::%s: SREnterPlusExitTime = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitTime);
6743 DML_LOG_VERBOSE("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark);
6744 DML_LOG_VERBOSE("DML::%s: USRRetrainingWatermark = %f\n", __func__, p->Watermark->USRRetrainingWatermark);
6745 DML_LOG_VERBOSE("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, p->Watermark->DRAMClockChangeWatermark);
6746 DML_LOG_VERBOSE("DML::%s: FCLKChangeWatermark = %f\n", __func__, p->Watermark->FCLKChangeWatermark);
6747 DML_LOG_VERBOSE("DML::%s: StutterExitWatermark = %f\n", __func__, p->Watermark->StutterExitWatermark);
6748 DML_LOG_VERBOSE("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark);
6749 DML_LOG_VERBOSE("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark);
6750 DML_LOG_VERBOSE("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark);
6751 DML_LOG_VERBOSE("DML::%s: temp_read_or_ppt_watermark_us = %f\n", __func__, p->Watermark->temp_read_or_ppt_watermark_us);
6752 #endif
6753
6754 s->TotalActiveWriteback = 0;
6755 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6756 if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
6757 s->TotalActiveWriteback = s->TotalActiveWriteback + 1;
6758 }
6759 }
6760
6761 if (s->TotalActiveWriteback <= 1) {
6762 p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency;
6763 } else {
6764 p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK;
6765 }
6766 if (p->USRRetrainingRequired)
6767 p->Watermark->WritebackUrgentWatermark = p->Watermark->WritebackUrgentWatermark + p->mmSOCParameters.USRRetrainingLatency;
6768
6769 if (s->TotalActiveWriteback <= 1) {
6770 p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency;
6771 p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency;
6772 } else {
6773 p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK;
6774 p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024 / 32 / p->SOCCLK;
6775 }
6776
6777 if (p->USRRetrainingRequired)
6778 p->Watermark->WritebackDRAMClockChangeWatermark = p->Watermark->WritebackDRAMClockChangeWatermark + p->mmSOCParameters.USRRetrainingLatency;
6779
6780 if (p->USRRetrainingRequired)
6781 p->Watermark->WritebackFCLKChangeWatermark = p->Watermark->WritebackFCLKChangeWatermark + p->mmSOCParameters.USRRetrainingLatency;
6782
6783 #ifdef __DML_VBA_DEBUG__
6784 DML_LOG_VERBOSE("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", __func__, p->Watermark->WritebackDRAMClockChangeWatermark);
6785 DML_LOG_VERBOSE("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, p->Watermark->WritebackFCLKChangeWatermark);
6786 DML_LOG_VERBOSE("DML::%s: WritebackUrgentWatermark = %f\n", __func__, p->Watermark->WritebackUrgentWatermark);
6787 DML_LOG_VERBOSE("DML::%s: USRRetrainingRequired = %u\n", __func__, p->USRRetrainingRequired);
6788 DML_LOG_VERBOSE("DML::%s: USRRetrainingLatency = %f\n", __func__, p->mmSOCParameters.USRRetrainingLatency);
6789 #endif
6790
6791 s->TotalPixelBW = 0.0;
6792 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6793 double h_total = (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total;
6794 double pixel_clock_mhz = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000.0;
6795 double v_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
6796 double v_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
6797 s->TotalPixelBW = s->TotalPixelBW + p->DPPPerSurface[k]
6798 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * v_ratio + p->SwathWidthC[k] * p->BytePerPixelDETC[k] * v_ratio_c) / (h_total / pixel_clock_mhz);
6799 }
6800
6801 *p->global_fclk_change_supported = true;
6802 *p->global_dram_clock_change_supported = true;
6803
6804 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6805 double h_total = (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total;
6806 double pixel_clock_mhz = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000.0;
6807 double v_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
6808 double v_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
6809 double v_taps = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
6810 double v_taps_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
6811 double h_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio;
6812 double h_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio;
6813 double LBBitPerPixel = 57;
6814
6815 s->LBLatencyHidingSourceLinesY[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthY[k] / math_max2(h_ratio, 1.0)), 1)) - (v_taps - 1));
6816 s->LBLatencyHidingSourceLinesC[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthC[k] / math_max2(h_ratio_c, 1.0)), 1)) - (v_taps_c - 1));
6817
6818 #ifdef __DML_VBA_DEBUG__
6819 DML_LOG_VERBOSE("DML::%s: k=%u, MaxLineBufferLines = %u\n", __func__, k, p->MaxLineBufferLines);
6820 DML_LOG_VERBOSE("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize);
6821 DML_LOG_VERBOSE("DML::%s: k=%u, LBBitPerPixel = %f\n", __func__, k, LBBitPerPixel);
6822 DML_LOG_VERBOSE("DML::%s: k=%u, HRatio = %f\n", __func__, k, h_ratio);
6823 DML_LOG_VERBOSE("DML::%s: k=%u, VTaps = %f\n", __func__, k, v_taps);
6824 #endif
6825
6826 s->EffectiveLBLatencyHidingY = s->LBLatencyHidingSourceLinesY[k] / v_ratio * (h_total / pixel_clock_mhz);
6827 s->EffectiveLBLatencyHidingC = s->LBLatencyHidingSourceLinesC[k] / v_ratio_c * (h_total / pixel_clock_mhz);
6828
6829 s->EffectiveDETBufferSizeY = p->DETBufferSizeY[k];
6830 if (p->UnboundedRequestEnabled) {
6831 s->EffectiveDETBufferSizeY = s->EffectiveDETBufferSizeY + p->CompressedBufferSizeInkByte * 1024 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * v_ratio) / (h_total / pixel_clock_mhz) / s->TotalPixelBW;
6832 }
6833
6834 s->LinesInDETY[k] = (double)s->EffectiveDETBufferSizeY / p->BytePerPixelDETY[k] / p->SwathWidthY[k];
6835 s->LinesInDETYRoundedDownToSwath[k] = (unsigned int)(math_floor2(s->LinesInDETY[k], p->SwathHeightY[k]));
6836 s->FullDETBufferingTimeY = s->LinesInDETYRoundedDownToSwath[k] * (h_total / pixel_clock_mhz) / v_ratio;
6837
6838 s->ActiveClockChangeLatencyHidingY = s->EffectiveLBLatencyHidingY + s->FullDETBufferingTimeY - ((double)p->DSTXAfterScaler[k] / h_total + (double)p->DSTYAfterScaler[k]) * h_total / pixel_clock_mhz;
6839
6840 if (p->NumberOfActiveSurfaces > 1) {
6841 s->ActiveClockChangeLatencyHidingY = s->ActiveClockChangeLatencyHidingY - (1.0 - 1.0 / (double)p->NumberOfActiveSurfaces) * (double)p->SwathHeightY[k] * (double)h_total / pixel_clock_mhz / v_ratio;
6842 }
6843
6844 if (p->BytePerPixelDETC[k] > 0) {
6845 s->LinesInDETC[k] = p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k];
6846 s->LinesInDETCRoundedDownToSwath[k] = (unsigned int)(math_floor2(s->LinesInDETC[k], p->SwathHeightC[k]));
6847 s->FullDETBufferingTimeC = s->LinesInDETCRoundedDownToSwath[k] * (h_total / pixel_clock_mhz) / v_ratio_c;
6848 s->ActiveClockChangeLatencyHidingC = s->EffectiveLBLatencyHidingC + s->FullDETBufferingTimeC - ((double)p->DSTXAfterScaler[k] / (double)h_total + (double)p->DSTYAfterScaler[k]) * (double)h_total / pixel_clock_mhz;
6849 if (p->NumberOfActiveSurfaces > 1) {
6850 s->ActiveClockChangeLatencyHidingC = s->ActiveClockChangeLatencyHidingC - (1.0 - 1.0 / (double)p->NumberOfActiveSurfaces) * (double)p->SwathHeightC[k] * (double)h_total / pixel_clock_mhz / v_ratio_c;
6851 }
6852 s->ActiveClockChangeLatencyHiding = math_min2(s->ActiveClockChangeLatencyHidingY, s->ActiveClockChangeLatencyHidingC);
6853 } else {
6854 s->ActiveClockChangeLatencyHiding = s->ActiveClockChangeLatencyHidingY;
6855 }
6856
6857 s->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->DRAMClockChangeWatermark;
6858 s->ActiveFCLKChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->FCLKChangeWatermark;
6859 s->USRRetrainingLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->USRRetrainingWatermark;
6860 s->g6_temp_read_latency_margin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->temp_read_or_ppt_watermark_us;
6861
6862 if (p->VActiveLatencyHidingMargin)
6863 p->VActiveLatencyHidingMargin[k] = s->ActiveDRAMClockChangeLatencyMargin[k];
6864
6865 if (p->VActiveLatencyHidingUs)
6866 p->VActiveLatencyHidingUs[k] = s->ActiveClockChangeLatencyHiding;
6867
6868 if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
6869 s->WritebackLatencyHiding = (double)p->WritebackInterfaceBufferSize * 1024.0
6870 / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height
6871 * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width
6872 / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height * (double)h_total / pixel_clock_mhz) * 4.0);
6873 if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format == dml2_444_64) {
6874 s->WritebackLatencyHiding = s->WritebackLatencyHiding / 2;
6875 }
6876 s->WritebackDRAMClockChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackDRAMClockChangeWatermark;
6877
6878 s->WritebackFCLKChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackFCLKChangeWatermark;
6879
6880 s->ActiveDRAMClockChangeLatencyMargin[k] = math_min2(s->ActiveDRAMClockChangeLatencyMargin[k], s->WritebackDRAMClockChangeLatencyMargin);
6881 s->ActiveFCLKChangeLatencyMargin[k] = math_min2(s->ActiveFCLKChangeLatencyMargin[k], s->WritebackFCLKChangeLatencyMargin);
6882 }
6883 p->MaxActiveDRAMClockChangeLatencySupported[k] = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 0 : (s->ActiveDRAMClockChangeLatencyMargin[k] + p->mmSOCParameters.DRAMClockChangeLatency);
6884
6885 uclk_pstate_change_strategy = p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy;
6886 reserved_vblank_time_us = (double)p->display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns / 1000;
6887
6888 p->FCLKChangeSupport[k] = dml2_pstate_change_unsupported;
6889 if (s->ActiveFCLKChangeLatencyMargin[k] > 0)
6890 p->FCLKChangeSupport[k] = dml2_pstate_change_vactive;
6891 else if (reserved_vblank_time_us >= p->mmSOCParameters.FCLKChangeLatency)
6892 p->FCLKChangeSupport[k] = dml2_pstate_change_vblank;
6893
6894 if (p->FCLKChangeSupport[k] == dml2_pstate_change_unsupported)
6895 *p->global_fclk_change_supported = false;
6896
6897 p->DRAMClockChangeSupport[k] = dml2_pstate_change_unsupported;
6898 if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_auto) {
6899 if (p->display_cfg->overrides.all_streams_blanked ||
6900 (s->ActiveDRAMClockChangeLatencyMargin[k] > 0 && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency))
6901 p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank_and_vactive;
6902 else if (s->ActiveDRAMClockChangeLatencyMargin[k] > 0)
6903 p->DRAMClockChangeSupport[k] = dml2_pstate_change_vactive;
6904 else if (reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)
6905 p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank;
6906 } else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vactive && s->ActiveDRAMClockChangeLatencyMargin[k] > 0)
6907 p->DRAMClockChangeSupport[k] = dml2_pstate_change_vactive;
6908 else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vblank && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)
6909 p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank;
6910 else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_drr)
6911 p->DRAMClockChangeSupport[k] = dml2_pstate_change_drr;
6912 else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_svp)
6913 p->DRAMClockChangeSupport[k] = dml2_pstate_change_mall_svp;
6914 else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame)
6915 p->DRAMClockChangeSupport[k] = dml2_pstate_change_mall_full_frame;
6916
6917 if (p->DRAMClockChangeSupport[k] == dml2_pstate_change_unsupported)
6918 *p->global_dram_clock_change_supported = false;
6919
6920 s->dst_y_pstate = (unsigned int)(math_ceil2((p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.UrgentLatency) / (h_total / pixel_clock_mhz), 1));
6921 s->src_y_pstate_l = (unsigned int)(math_ceil2(s->dst_y_pstate * v_ratio, p->SwathHeightY[k]));
6922 s->src_y_ahead_l = (unsigned int)(math_floor2(p->DETBufferSizeY[k] / p->BytePerPixelDETY[k] / p->SwathWidthY[k], p->SwathHeightY[k]) + s->LBLatencyHidingSourceLinesY[k]);
6923 s->sub_vp_lines_l = s->src_y_pstate_l + s->src_y_ahead_l + p->meta_row_height_l[k];
6924
6925 #ifdef __DML_VBA_DEBUG__
6926 DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
6927 DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
6928 DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
6929 DML_LOG_VERBOSE("DML::%s: k=%u, SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
6930 DML_LOG_VERBOSE("DML::%s: k=%u, LBLatencyHidingSourceLinesY = %u\n", __func__, k, s->LBLatencyHidingSourceLinesY[k]);
6931 DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_pstate = %u\n", __func__, k, s->dst_y_pstate);
6932 DML_LOG_VERBOSE("DML::%s: k=%u, src_y_pstate_l = %u\n", __func__, k, s->src_y_pstate_l);
6933 DML_LOG_VERBOSE("DML::%s: k=%u, src_y_ahead_l = %u\n", __func__, k, s->src_y_ahead_l);
6934 DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_height_l = %u\n", __func__, k, p->meta_row_height_l[k]);
6935 DML_LOG_VERBOSE("DML::%s: k=%u, sub_vp_lines_l = %u\n", __func__, k, s->sub_vp_lines_l);
6936 #endif
6937 p->SubViewportLinesNeededInMALL[k] = s->sub_vp_lines_l;
6938
6939 if (p->BytePerPixelDETC[k] > 0) {
6940 s->src_y_pstate_c = (unsigned int)(math_ceil2(s->dst_y_pstate * v_ratio_c, p->SwathHeightC[k]));
6941 s->src_y_ahead_c = (unsigned int)(math_floor2(p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k], p->SwathHeightC[k]) + s->LBLatencyHidingSourceLinesC[k]);
6942 s->sub_vp_lines_c = s->src_y_pstate_c + s->src_y_ahead_c + p->meta_row_height_c[k];
6943
6944 if (dml_is_420(p->display_cfg->plane_descriptors[k].pixel_format))
6945 p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, 2 * s->sub_vp_lines_c));
6946 else
6947 p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, s->sub_vp_lines_c));
6948
6949 #ifdef __DML_VBA_DEBUG__
6950 DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, k, p->meta_row_height_c[k]);
6951 DML_LOG_VERBOSE("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c);
6952 DML_LOG_VERBOSE("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c);
6953 DML_LOG_VERBOSE("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c);
6954 #endif
6955 }
6956 }
6957
6958 *p->g6_temp_read_support = true;
6959 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6960 if ((!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) &&
6961 (s->g6_temp_read_latency_margin[k] < 0)) {
6962 *p->g6_temp_read_support = false;
6963 }
6964 }
6965
6966 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6967 if ((!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) && ((!FoundCriticalSurface)
6968 || ((s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency) < *p->MaxActiveFCLKChangeLatencySupported))) {
6969 FoundCriticalSurface = true;
6970 *p->MaxActiveFCLKChangeLatencySupported = s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency;
6971 }
6972 }
6973
6974 #ifdef __DML_VBA_DEBUG__
6975 DML_LOG_VERBOSE("DML::%s: DRAMClockChangeSupport = %u\n", __func__, *p->global_dram_clock_change_supported);
6976 DML_LOG_VERBOSE("DML::%s: FCLKChangeSupport = %u\n", __func__, *p->global_fclk_change_supported);
6977 DML_LOG_VERBOSE("DML::%s: MaxActiveFCLKChangeLatencySupported = %f\n", __func__, *p->MaxActiveFCLKChangeLatencySupported);
6978 DML_LOG_VERBOSE("DML::%s: USRRetrainingSupport = %u\n", __func__, *p->USRRetrainingSupport);
6979 #endif
6980 }
6981
calculate_bytes_to_fetch_required_to_hide_latency(struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params * p)6982 static void calculate_bytes_to_fetch_required_to_hide_latency(
6983 struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *p)
6984 {
6985 unsigned int dst_lines_to_hide;
6986 unsigned int src_lines_to_hide_l;
6987 unsigned int src_lines_to_hide_c;
6988 unsigned int plane_index;
6989 unsigned int stream_index;
6990
6991 for (plane_index = 0; plane_index < p->num_active_planes; plane_index++) {
6992 if (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[plane_index]))
6993 continue;
6994
6995 stream_index = p->display_cfg->plane_descriptors[plane_index].stream_index;
6996
6997 dst_lines_to_hide = (unsigned int)math_ceil(p->latency_to_hide_us[0] /
6998 ((double)p->display_cfg->stream_descriptors[stream_index].timing.h_total /
6999 (double)p->display_cfg->stream_descriptors[stream_index].timing.pixel_clock_khz * 1000.0));
7000
7001 src_lines_to_hide_l = (unsigned int)math_ceil2(p->display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane0.v_ratio * dst_lines_to_hide,
7002 p->swath_height_l[plane_index]);
7003 p->bytes_required_l[plane_index] = src_lines_to_hide_l * p->num_of_dpp[plane_index] * p->swath_width_l[plane_index] * p->byte_per_pix_l[plane_index];
7004
7005 src_lines_to_hide_c = (unsigned int)math_ceil2(p->display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane1.v_ratio * dst_lines_to_hide,
7006 p->swath_height_c[plane_index]);
7007 p->bytes_required_c[plane_index] = src_lines_to_hide_c * p->num_of_dpp[plane_index] * p->swath_width_c[plane_index] * p->byte_per_pix_c[plane_index];
7008
7009 if (p->display_cfg->plane_descriptors[plane_index].surface.dcc.enable && p->mrq_present) {
7010 p->bytes_required_l[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_l / p->meta_row_height_l[plane_index]) * p->meta_row_bytes_per_row_ub_l[plane_index];
7011 if (p->meta_row_height_c[plane_index]) {
7012 p->bytes_required_c[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_c / p->meta_row_height_c[plane_index]) * p->meta_row_bytes_per_row_ub_c[plane_index];
7013 }
7014 }
7015
7016 if (p->display_cfg->gpuvm_enable == true) {
7017 p->bytes_required_l[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_l / p->dpte_row_height_l[plane_index]) * p->dpte_bytes_per_row_l[plane_index];
7018 if (p->dpte_row_height_c[plane_index]) {
7019 p->bytes_required_c[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_c / p->dpte_row_height_c[plane_index]) * p->dpte_bytes_per_row_c[plane_index];
7020 }
7021 }
7022 }
7023 }
7024
calculate_vactive_det_fill_latency(const struct dml2_display_cfg * display_cfg,unsigned int num_active_planes,unsigned int bytes_required_l[],unsigned int bytes_required_c[],double dcc_dram_bw_nom_overhead_factor_p0[],double dcc_dram_bw_nom_overhead_factor_p1[],double surface_read_bw_l[],double surface_read_bw_c[],double (* surface_avg_vactive_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES],double (* surface_peak_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES],double vactive_det_fill_delay_us[])7025 static noinline_for_stack void calculate_vactive_det_fill_latency(
7026 const struct dml2_display_cfg *display_cfg,
7027 unsigned int num_active_planes,
7028 unsigned int bytes_required_l[],
7029 unsigned int bytes_required_c[],
7030 double dcc_dram_bw_nom_overhead_factor_p0[],
7031 double dcc_dram_bw_nom_overhead_factor_p1[],
7032 double surface_read_bw_l[],
7033 double surface_read_bw_c[],
7034 double (*surface_avg_vactive_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES],
7035 double (*surface_peak_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES],
7036 /* output */
7037 double vactive_det_fill_delay_us[])
7038 {
7039 double effective_excess_bandwidth;
7040 double effective_excess_bandwidth_l;
7041 double effective_excess_bandwidth_c;
7042 double adj_factor;
7043 unsigned int plane_index;
7044 unsigned int soc_state;
7045 unsigned int bw_type;
7046
7047 for (plane_index = 0; plane_index < num_active_planes; plane_index++) {
7048 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_index]))
7049 continue;
7050
7051 vactive_det_fill_delay_us[plane_index] = 0.0;
7052 for (soc_state = 0; soc_state < dml2_core_internal_soc_state_max; soc_state++) {
7053 for (bw_type = 0; bw_type < dml2_core_internal_bw_max; bw_type++) {
7054 effective_excess_bandwidth = (surface_peak_required_bw[soc_state][bw_type][plane_index] - surface_avg_vactive_required_bw[soc_state][bw_type][plane_index]);
7055
7056 /* luma */
7057 adj_factor = bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p0[plane_index] : 1.0;
7058
7059 effective_excess_bandwidth_l = effective_excess_bandwidth * surface_read_bw_l[plane_index] / (surface_read_bw_l[plane_index] + surface_read_bw_c[plane_index]) / adj_factor;
7060 if (effective_excess_bandwidth_l > 0.0) {
7061 vactive_det_fill_delay_us[plane_index] = math_max2(vactive_det_fill_delay_us[plane_index], bytes_required_l[plane_index] / effective_excess_bandwidth_l);
7062 }
7063
7064 /* chroma */
7065 adj_factor = bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p1[plane_index] : 1.0;
7066
7067 effective_excess_bandwidth_c = effective_excess_bandwidth * surface_read_bw_c[plane_index] / (surface_read_bw_l[plane_index] + surface_read_bw_c[plane_index]) / adj_factor;
7068 if (effective_excess_bandwidth_c > 0.0) {
7069 vactive_det_fill_delay_us[plane_index] = math_max2(vactive_det_fill_delay_us[plane_index], bytes_required_c[plane_index] / effective_excess_bandwidth_c);
7070 }
7071 }
7072 }
7073 }
7074 }
7075
calculate_excess_vactive_bandwidth_required(const struct dml2_display_cfg * display_cfg,unsigned int num_active_planes,unsigned int bytes_required_l[],unsigned int bytes_required_c[],double excess_vactive_fill_bw_l[],double excess_vactive_fill_bw_c[])7076 static void calculate_excess_vactive_bandwidth_required(
7077 const struct dml2_display_cfg *display_cfg,
7078 unsigned int num_active_planes,
7079 unsigned int bytes_required_l[],
7080 unsigned int bytes_required_c[],
7081 /* outputs */
7082 double excess_vactive_fill_bw_l[],
7083 double excess_vactive_fill_bw_c[])
7084 {
7085 unsigned int plane_index;
7086
7087 for (plane_index = 0; plane_index < num_active_planes; plane_index++) {
7088 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_index]))
7089 continue;
7090
7091 excess_vactive_fill_bw_l[plane_index] = 0.0;
7092 excess_vactive_fill_bw_c[plane_index] = 0.0;
7093
7094 if (display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us[dml2_pstate_type_uclk] > 0) {
7095 excess_vactive_fill_bw_l[plane_index] = (double)bytes_required_l[plane_index] / (double)display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us[dml2_pstate_type_uclk];
7096 excess_vactive_fill_bw_c[plane_index] = (double)bytes_required_c[plane_index] / (double)display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us[dml2_pstate_type_uclk];
7097 }
7098 }
7099 }
7100
uclk_khz_to_dram_bw_mbps(unsigned long uclk_khz,const struct dml2_dram_params * dram_config,const struct dml2_mcg_dram_bw_to_min_clk_table * dram_bw_table)7101 static double uclk_khz_to_dram_bw_mbps(unsigned long uclk_khz, const struct dml2_dram_params *dram_config, const struct dml2_mcg_dram_bw_to_min_clk_table *dram_bw_table)
7102 {
7103 double bw_mbps = 0;
7104 unsigned int i;
7105
7106 if (!dram_config->alt_clock_bw_conversion)
7107 bw_mbps = ((double)uclk_khz * dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock) / 1000.0;
7108 else
7109 for (i = 0; i < dram_bw_table->num_entries; i++)
7110 if (dram_bw_table->entries[i].min_uclk_khz >= uclk_khz) {
7111 bw_mbps = (double)dram_bw_table->entries[i].pre_derate_dram_bw_kbps / 1000.0;
7112 break;
7113 }
7114
7115 DML_ASSERT(bw_mbps > 0);
7116
7117 return bw_mbps;
7118 }
7119
dram_bw_kbps_to_uclk_mhz(unsigned long long bw_kbps,const struct dml2_dram_params * dram_config)7120 static double dram_bw_kbps_to_uclk_mhz(unsigned long long bw_kbps, const struct dml2_dram_params *dram_config)
7121 {
7122 double uclk_mhz = 0;
7123
7124 uclk_mhz = (double)bw_kbps / (dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock) / 1000.0;
7125
7126 return uclk_mhz;
7127 }
7128
get_qos_param_index(unsigned long uclk_freq_khz,const struct dml2_dcn4_uclk_dpm_dependent_qos_params * per_uclk_dpm_params)7129 static unsigned int get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params)
7130 {
7131 unsigned int i;
7132 unsigned int index = 0;
7133
7134 for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) {
7135 DML_LOG_VERBOSE("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %ld\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz);
7136
7137 if (i == 0)
7138 index = 0;
7139 else
7140 index = i - 1;
7141
7142 if (uclk_freq_khz < per_uclk_dpm_params[i].minimum_uclk_khz ||
7143 per_uclk_dpm_params[i].minimum_uclk_khz == 0) {
7144 break;
7145 }
7146 }
7147 DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz);
7148 DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, index);
7149 return index;
7150 }
7151
get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz,const struct dml2_soc_state_table * clk_table)7152 static unsigned int get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table)
7153 {
7154 unsigned int i;
7155 bool clk_entry_found = false;
7156
7157 for (i = 0; i < clk_table->uclk.num_clk_values; i++) {
7158 DML_LOG_VERBOSE("DML::%s: clk_table.uclk.clk_values_khz[%d] = %ld\n", __func__, i, clk_table->uclk.clk_values_khz[i]);
7159
7160 if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) {
7161 clk_entry_found = true;
7162 break;
7163 }
7164 }
7165
7166 if (!clk_entry_found)
7167 DML_ASSERT(clk_entry_found);
7168 #if defined(__DML_VBA_DEBUG__)
7169 DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz);
7170 DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, i);
7171 #endif
7172 return i;
7173 }
7174
get_pipe_flip_bytes(double hostvm_inefficiency_factor,unsigned int vm_bytes,unsigned int dpte_row_bytes,unsigned int meta_row_bytes)7175 static unsigned int get_pipe_flip_bytes(
7176 double hostvm_inefficiency_factor,
7177 unsigned int vm_bytes,
7178 unsigned int dpte_row_bytes,
7179 unsigned int meta_row_bytes)
7180 {
7181 unsigned int flip_bytes = 0;
7182
7183 flip_bytes += (unsigned int) ((vm_bytes * hostvm_inefficiency_factor) + 2*meta_row_bytes);
7184 flip_bytes += (unsigned int) (2*dpte_row_bytes * hostvm_inefficiency_factor);
7185
7186 return flip_bytes;
7187 }
7188
calculate_hostvm_inefficiency_factor(double * HostVMInefficiencyFactor,double * HostVMInefficiencyFactorPrefetch,bool gpuvm_enable,bool hostvm_enable,unsigned int remote_iommu_outstanding_translations,unsigned int max_outstanding_reqs,double urg_bandwidth_avail_active_pixel_and_vm,double urg_bandwidth_avail_active_vm_only)7189 static void calculate_hostvm_inefficiency_factor(
7190 double *HostVMInefficiencyFactor,
7191 double *HostVMInefficiencyFactorPrefetch,
7192
7193 bool gpuvm_enable,
7194 bool hostvm_enable,
7195 unsigned int remote_iommu_outstanding_translations,
7196 unsigned int max_outstanding_reqs,
7197 double urg_bandwidth_avail_active_pixel_and_vm,
7198 double urg_bandwidth_avail_active_vm_only)
7199 {
7200 *HostVMInefficiencyFactor = 1;
7201 *HostVMInefficiencyFactorPrefetch = 1;
7202
7203 if (gpuvm_enable && hostvm_enable) {
7204 *HostVMInefficiencyFactor = urg_bandwidth_avail_active_pixel_and_vm / urg_bandwidth_avail_active_vm_only;
7205 *HostVMInefficiencyFactorPrefetch = *HostVMInefficiencyFactor;
7206
7207 if ((*HostVMInefficiencyFactorPrefetch < 4) && (remote_iommu_outstanding_translations < max_outstanding_reqs))
7208 *HostVMInefficiencyFactorPrefetch = 4;
7209 #ifdef __DML_VBA_DEBUG__
7210 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_avail_active_pixel_and_vm = %f\n", __func__, urg_bandwidth_avail_active_pixel_and_vm);
7211 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_avail_active_vm_only = %f\n", __func__, urg_bandwidth_avail_active_vm_only);
7212 DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, *HostVMInefficiencyFactor);
7213 DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactorPrefetch = %f\n", __func__, *HostVMInefficiencyFactorPrefetch);
7214 #endif
7215 }
7216 }
7217
7218 struct dml2_core_internal_g6_temp_read_blackouts_table {
7219 struct {
7220 unsigned int uclk_khz;
7221 unsigned int blackout_us;
7222 } entries[DML_MAX_CLK_TABLE_SIZE];
7223 };
7224
7225 struct dml2_core_internal_g6_temp_read_blackouts_table core_dcn4_g6_temp_read_blackout_table = {
7226 .entries = {
7227 {
7228 .uclk_khz = 96000,
7229 .blackout_us = 23,
7230 },
7231 {
7232 .uclk_khz = 435000,
7233 .blackout_us = 10,
7234 },
7235 {
7236 .uclk_khz = 521000,
7237 .blackout_us = 10,
7238 },
7239 {
7240 .uclk_khz = 731000,
7241 .blackout_us = 8,
7242 },
7243 {
7244 .uclk_khz = 822000,
7245 .blackout_us = 8,
7246 },
7247 {
7248 .uclk_khz = 962000,
7249 .blackout_us = 5,
7250 },
7251 {
7252 .uclk_khz = 1069000,
7253 .blackout_us = 5,
7254 },
7255 {
7256 .uclk_khz = 1187000,
7257 .blackout_us = 5,
7258 },
7259 },
7260 };
7261
get_g6_temp_read_blackout_us(struct dml2_soc_bb * soc,unsigned int uclk_freq_khz,unsigned int min_clk_index)7262 static double get_g6_temp_read_blackout_us(
7263 struct dml2_soc_bb *soc,
7264 unsigned int uclk_freq_khz,
7265 unsigned int min_clk_index)
7266 {
7267 unsigned int i;
7268 unsigned int blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[0].blackout_us;
7269
7270 if (soc->power_management_parameters.g6_temp_read_blackout_us[0] > 0.0) {
7271 /* overrides are present in the SoC BB */
7272 return soc->power_management_parameters.g6_temp_read_blackout_us[min_clk_index];
7273 }
7274
7275 /* use internal table */
7276 blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[0].blackout_us;
7277
7278 for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) {
7279 if (uclk_freq_khz < core_dcn4_g6_temp_read_blackout_table.entries[i].uclk_khz ||
7280 core_dcn4_g6_temp_read_blackout_table.entries[i].uclk_khz == 0) {
7281 break;
7282 }
7283
7284 blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[i].blackout_us;
7285 }
7286
7287 return (double)blackout_us;
7288 }
7289
get_max_urgent_latency_us(struct dml2_dcn4x_soc_qos_params * dcn4x,double uclk_freq_mhz,double FabricClock,unsigned int min_clk_index)7290 static double get_max_urgent_latency_us(
7291 struct dml2_dcn4x_soc_qos_params *dcn4x,
7292 double uclk_freq_mhz,
7293 double FabricClock,
7294 unsigned int min_clk_index)
7295 {
7296 double latency;
7297 latency = dcn4x->per_uclk_dpm_params[min_clk_index].maximum_latency_when_urgent_uclk_cycles / uclk_freq_mhz
7298 * (1 + dcn4x->umc_max_latency_margin / 100.0)
7299 + dcn4x->mall_overhead_fclk_cycles / FabricClock
7300 + dcn4x->max_round_trip_to_furthest_cs_fclk_cycles / FabricClock
7301 * (1 + dcn4x->fabric_max_transport_latency_margin / 100.0);
7302 return latency;
7303 }
7304
calculate_pstate_keepout_dst_lines(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_watermarks * watermarks,unsigned int pstate_keepout_dst_lines[])7305 static void calculate_pstate_keepout_dst_lines(
7306 const struct dml2_display_cfg *display_cfg,
7307 const struct dml2_core_internal_watermarks *watermarks,
7308 unsigned int pstate_keepout_dst_lines[])
7309 {
7310 const struct dml2_stream_parameters *stream_descriptor;
7311 unsigned int i;
7312
7313 for (i = 0; i < display_cfg->num_planes; i++) {
7314 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[i])) {
7315 stream_descriptor = &display_cfg->stream_descriptors[display_cfg->plane_descriptors[i].stream_index];
7316
7317 pstate_keepout_dst_lines[i] =
7318 (unsigned int)math_ceil(watermarks->DRAMClockChangeWatermark / ((double)stream_descriptor->timing.h_total * 1000.0 / (double)stream_descriptor->timing.pixel_clock_khz));
7319
7320 if (pstate_keepout_dst_lines[i] > stream_descriptor->timing.v_total - 1) {
7321 pstate_keepout_dst_lines[i] = stream_descriptor->timing.v_total - 1;
7322 }
7323 }
7324 }
7325 }
7326
dml_core_ms_prefetch_check(struct dml2_core_internal_display_mode_lib * mode_lib,const struct dml2_display_cfg * display_cfg)7327 static noinline_for_stack void dml_core_ms_prefetch_check(struct dml2_core_internal_display_mode_lib *mode_lib,
7328 const struct dml2_display_cfg *display_cfg)
7329 {
7330 struct dml2_core_calcs_mode_support_locals *s = &mode_lib->scratch.dml_core_mode_support_locals;
7331 struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params;
7332 struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
7333 struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params;
7334 #ifdef DML_GLOBAL_PREFETCH_CHECK
7335 struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params;
7336 #endif
7337 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
7338
7339 double min_return_bw_for_latency;
7340 unsigned int k;
7341
7342 mode_lib->ms.TimeCalc = 24 / mode_lib->ms.dcfclk_deepsleep;
7343
7344 calculate_hostvm_inefficiency_factor(
7345 &s->HostVMInefficiencyFactor,
7346 &s->HostVMInefficiencyFactorPrefetch,
7347
7348 display_cfg->gpuvm_enable,
7349 display_cfg->hostvm_enable,
7350 mode_lib->ip.remote_iommu_outstanding_translations,
7351 mode_lib->soc.max_outstanding_reqs,
7352 mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active],
7353 mode_lib->ms.support.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]);
7354
7355 mode_lib->ms.Total3dlutActive = 0;
7356 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7357 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut)
7358 mode_lib->ms.Total3dlutActive = mode_lib->ms.Total3dlutActive + 1;
7359
7360 // Calculate tdlut schedule related terms
7361 calculate_tdlut_setting_params->dispclk_mhz = mode_lib->ms.RequiredDISPCLK;
7362 calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
7363 calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode;
7364 calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode;
7365 calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size;
7366 calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
7367 calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
7368 calculate_tdlut_setting_params->tdlut_mpc_width_flag = display_cfg->plane_descriptors[k].tdlut.tdlut_mpc_width_flag;
7369 calculate_tdlut_setting_params->is_gfx11 = dml_get_gfx_version(display_cfg->plane_descriptors[k].surface.tiling);
7370
7371 // output
7372 calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k];
7373 calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k];
7374 calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k];
7375 calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k];
7376 calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k];
7377 calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k];
7378 calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k];
7379
7380 calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params);
7381 }
7382
7383 min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active];
7384
7385 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3)
7386 s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes,
7387 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes,
7388 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes));
7389
7390 CalculateExtraLatency(
7391 display_cfg,
7392 mode_lib->ip.rob_buffer_size_kbytes,
7393 mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles,
7394 s->ReorderingBytes,
7395 mode_lib->ms.DCFCLK,
7396 mode_lib->ms.FabricClock,
7397 mode_lib->ip.pixel_chunk_size_kbytes,
7398 min_return_bw_for_latency,
7399 mode_lib->ms.num_active_planes,
7400 mode_lib->ms.NoOfDPP,
7401 mode_lib->ms.dpte_group_bytes,
7402 s->tdlut_bytes_per_group,
7403 s->HostVMInefficiencyFactor,
7404 s->HostVMInefficiencyFactorPrefetch,
7405 mode_lib->soc.hostvm_min_page_size_kbytes * 1024,
7406 mode_lib->soc.qos_parameters.qos_type,
7407 !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable),
7408 mode_lib->soc.max_outstanding_reqs,
7409 mode_lib->ms.support.request_size_bytes_luma,
7410 mode_lib->ms.support.request_size_bytes_chroma,
7411 mode_lib->ip.meta_chunk_size_kbytes,
7412 mode_lib->ip.dchub_arb_to_ret_delay,
7413 mode_lib->ms.TripToMemory,
7414 mode_lib->ip.hostvm_mode,
7415
7416 // output
7417 &mode_lib->ms.ExtraLatency,
7418 &mode_lib->ms.ExtraLatency_sr,
7419 &mode_lib->ms.ExtraLatencyPrefetch);
7420
7421 for (k = 0; k < mode_lib->ms.num_active_planes; k++)
7422 s->impacted_dst_y_pre[k] = 0;
7423
7424 s->recalc_prefetch_schedule = 0;
7425 s->recalc_prefetch_done = 0;
7426 do {
7427 mode_lib->ms.support.PrefetchSupported = true;
7428
7429 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7430 s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
7431 s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format;
7432
7433 s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
7434 mode_lib->ms.NoOfDPP[k],
7435 display_cfg->plane_descriptors[k].composition.viewport.plane0.width,
7436 display_cfg->plane_descriptors[k].composition.viewport.plane0.height,
7437 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
7438 display_cfg->plane_descriptors[k].composition.rotation_angle);
7439
7440 s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
7441 mode_lib->ms.NoOfDPP[k],
7442 display_cfg->plane_descriptors[k].composition.viewport.plane1.width,
7443 display_cfg->plane_descriptors[k].composition.viewport.plane1.height,
7444 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
7445 display_cfg->plane_descriptors[k].composition.rotation_angle);
7446
7447 struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe;
7448
7449 mode_lib->ms.TWait[k] = CalculateTWait(
7450 display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns,
7451 mode_lib->ms.UrgLatency,
7452 mode_lib->ms.TripToMemory,
7453 !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ?
7454 get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), mode_lib->ms.state_idx) : 0.0);
7455
7456 myPipe->Dppclk = mode_lib->ms.RequiredDPPCLK[k];
7457 myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK;
7458 myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
7459 myPipe->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep;
7460 myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[k];
7461 myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled;
7462 myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
7463 myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
7464 myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
7465 myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
7466 myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
7467 myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored;
7468 myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k];
7469 myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k];
7470 myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k];
7471 myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k];
7472 myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
7473 myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors;
7474 myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active;
7475 myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
7476 myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active;
7477 myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
7478 myPipe->ODMMode = mode_lib->ms.ODMMode[k];
7479 myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
7480 myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k];
7481 myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k];
7482 myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
7483
7484 #ifdef __DML_VBA_DEBUG__
7485 DML_LOG_VERBOSE("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k);
7486 DML_LOG_VERBOSE("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[k]);
7487 #endif
7488 CalculatePrefetchSchedule_params->display_cfg = display_cfg;
7489 CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch;
7490 CalculatePrefetchSchedule_params->myPipe = myPipe;
7491 CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelay[k];
7492 CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter;
7493 CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl;
7494 CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only;
7495 CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor;
7496 CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal;
7497 CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->ms.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
7498 CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format;
7499 CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters;
7500 CalculatePrefetchSchedule_params->VStartup = s->MaximumVStartup[k];
7501 CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes * 1024;
7502 CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable;
7503 CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled;
7504 CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required;
7505 CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes;
7506 CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->ms.ExtraLatencyPrefetch;
7507 CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc;
7508 CalculatePrefetchSchedule_params->vm_bytes = mode_lib->ms.vm_bytes[k];
7509 CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[k];
7510 CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[k];
7511 CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k];
7512 CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY[k];
7513 CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[k];
7514 CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k];
7515 CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC[k];
7516 CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub[k];
7517 CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub[k];
7518 CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightY[k];
7519 CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightC[k];
7520 CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait[k];
7521 CalculatePrefetchSchedule_params->Ttrip = mode_lib->ms.TripToMemory;
7522 CalculatePrefetchSchedule_params->Turg = mode_lib->ms.UrgLatency;
7523 CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
7524 CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k];
7525 CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k];
7526 CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k];
7527 CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k];
7528 CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0);
7529 CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k];
7530 CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k];
7531 CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
7532 CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present;
7533 CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->ms.meta_row_bytes[k];
7534 CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor[k];
7535 CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k];
7536 CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->ms.vactive_sw_bw_l[k];
7537 CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->ms.vactive_sw_bw_c[k];
7538
7539 // output
7540 CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k];
7541 CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k];
7542 CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->ms.dst_y_prefetch[k];
7543 CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->ms.LinesForVM[k];
7544 CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->ms.LinesForDPTERow[k];
7545 CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[k];
7546 CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[k];
7547 CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; // prefetch_sw_bw_l
7548 CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; // prefetch_sw_bw_c
7549 CalculatePrefetchSchedule_params->RequiredPrefetchBWMax = &mode_lib->ms.RequiredPrefetchBWMax[k];
7550 CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.NoTimeForDynamicMetadata[k];
7551 CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k];
7552 CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->ms.Tno_bw_flip[k];
7553 CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k];
7554 CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0];
7555 CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1];
7556 CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2];
7557 CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k];
7558 CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k];
7559 CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k];
7560 CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k];
7561 CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k];
7562 CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k];
7563 CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0];
7564 CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1];
7565 CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2];
7566 CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->ms.prefetch_cursor_bw[k];
7567 CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k];
7568 CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k];
7569 CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k];
7570 CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->prefetch_swath_time_us[k];
7571
7572 mode_lib->ms.NoTimeForPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params);
7573
7574 mode_lib->ms.support.PrefetchSupported &= !mode_lib->ms.NoTimeForPrefetch[k];
7575 DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_per_vm_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_vm_vblank);
7576 DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_per_row_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_row_vblank);
7577 } // for k num_planes
7578
7579 CalculateDCFCLKDeepSleepTdlut(
7580 display_cfg,
7581 mode_lib->ms.num_active_planes,
7582 mode_lib->ms.BytePerPixelY,
7583 mode_lib->ms.BytePerPixelC,
7584 mode_lib->ms.SwathWidthY,
7585 mode_lib->ms.SwathWidthC,
7586 mode_lib->ms.NoOfDPP,
7587 mode_lib->ms.PSCL_FACTOR,
7588 mode_lib->ms.PSCL_FACTOR_CHROMA,
7589 mode_lib->ms.RequiredDPPCLK,
7590 mode_lib->ms.vactive_sw_bw_l,
7591 mode_lib->ms.vactive_sw_bw_c,
7592 mode_lib->soc.return_bus_width_bytes,
7593 mode_lib->ms.RequiredDISPCLK,
7594 s->tdlut_bytes_to_deliver,
7595 s->prefetch_swath_time_us,
7596
7597 /* Output */
7598 &mode_lib->ms.dcfclk_deepsleep);
7599
7600 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7601 if (mode_lib->ms.dst_y_prefetch[k] < 2.0
7602 || mode_lib->ms.LinesForVM[k] >= 32.0
7603 || mode_lib->ms.LinesForDPTERow[k] >= 16.0
7604 || mode_lib->ms.NoTimeForPrefetch[k] == true
7605 || s->DSTYAfterScaler[k] > 8) {
7606 mode_lib->ms.support.PrefetchSupported = false;
7607 DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_prefetch=%f (should not be < 2)\n", __func__, k, mode_lib->ms.dst_y_prefetch[k]);
7608 DML_LOG_VERBOSE("DML::%s: k=%d, LinesForVM=%f (should not be >= 32)\n", __func__, k, mode_lib->ms.LinesForVM[k]);
7609 DML_LOG_VERBOSE("DML::%s: k=%d, LinesForDPTERow=%f (should not be >= 16)\n", __func__, k, mode_lib->ms.LinesForDPTERow[k]);
7610 DML_LOG_VERBOSE("DML::%s: k=%d, DSTYAfterScaler=%d (should be <= 8)\n", __func__, k, s->DSTYAfterScaler[k]);
7611 DML_LOG_VERBOSE("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]);
7612 }
7613 }
7614
7615 mode_lib->ms.support.DynamicMetadataSupported = true;
7616 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7617 if (mode_lib->ms.NoTimeForDynamicMetadata[k] == true) {
7618 mode_lib->ms.support.DynamicMetadataSupported = false;
7619 }
7620 }
7621
7622 mode_lib->ms.support.VRatioInPrefetchSupported = true;
7623 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7624 if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ ||
7625 mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) {
7626 mode_lib->ms.support.VRatioInPrefetchSupported = false;
7627 DML_LOG_VERBOSE("DML::%s: k=%d VRatioPreY = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreY[k], __DML2_CALCS_MAX_VRATIO_PRE__);
7628 DML_LOG_VERBOSE("DML::%s: k=%d VRatioPreC = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreC[k], __DML2_CALCS_MAX_VRATIO_PRE__);
7629 DML_LOG_VERBOSE("DML::%s: VRatioInPrefetchSupported = %u\n", __func__, mode_lib->ms.support.VRatioInPrefetchSupported);
7630 }
7631 }
7632
7633 mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.VRatioInPrefetchSupported;
7634
7635 // By default, do not recalc prefetch schedule
7636 s->recalc_prefetch_schedule = 0;
7637
7638 // Only do urg vs prefetch bandwidth check, flip schedule check, power saving feature support check IF the Prefetch Schedule Check is ok
7639 if (mode_lib->ms.support.PrefetchSupported) {
7640 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7641 // Calculate Urgent burst factor for prefetch
7642 #ifdef __DML_VBA_DEBUG__
7643 DML_LOG_VERBOSE("DML::%s: k=%d, Calling CalculateUrgentBurstFactor (for prefetch)\n", __func__, k);
7644 DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPreY=%f\n", __func__, k, mode_lib->ms.VRatioPreY[k]);
7645 DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPreC=%f\n", __func__, k, mode_lib->ms.VRatioPreC[k]);
7646 #endif
7647 CalculateUrgentBurstFactor(
7648 &display_cfg->plane_descriptors[k],
7649 mode_lib->ms.swath_width_luma_ub[k],
7650 mode_lib->ms.swath_width_chroma_ub[k],
7651 mode_lib->ms.SwathHeightY[k],
7652 mode_lib->ms.SwathHeightC[k],
7653 s->line_times[k],
7654 mode_lib->ms.UrgLatency,
7655 mode_lib->ms.VRatioPreY[k],
7656 mode_lib->ms.VRatioPreC[k],
7657 mode_lib->ms.BytePerPixelInDETY[k],
7658 mode_lib->ms.BytePerPixelInDETC[k],
7659 mode_lib->ms.DETBufferSizeY[k],
7660 mode_lib->ms.DETBufferSizeC[k],
7661 /* Output */
7662 &mode_lib->ms.UrgentBurstFactorLumaPre[k],
7663 &mode_lib->ms.UrgentBurstFactorChromaPre[k],
7664 &mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]);
7665 }
7666
7667 // Calculate urgent bandwidth required, both urg and non urg peak bandwidth
7668 // assume flip bw is 0 at this point
7669 for (k = 0; k < mode_lib->ms.num_active_planes; k++)
7670 mode_lib->ms.final_flip_bw[k] = 0;
7671
7672 calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = mode_lib->ms.support.urg_vactive_bandwidth_required;
7673 calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required;
7674 calculate_peak_bandwidth_params->urg_bandwidth_required_qual = mode_lib->ms.support.urg_bandwidth_required_qual;
7675 calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required;
7676 calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = mode_lib->ms.surface_avg_vactive_required_bw;
7677 calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw;
7678
7679 calculate_peak_bandwidth_params->display_cfg = display_cfg;
7680 calculate_peak_bandwidth_params->inc_flip_bw = 0;
7681 calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes;
7682 calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP;
7683 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0;
7684 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1;
7685 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0;
7686 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1;
7687 calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor;
7688 calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor;
7689
7690 calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l;
7691 calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c;
7692 calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma;
7693 calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma;
7694 calculate_peak_bandwidth_params->prefetch_bandwidth_max = mode_lib->ms.RequiredPrefetchBWMax;
7695 calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l;
7696 calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c;
7697 calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw;
7698 calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw;
7699 calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw;
7700 calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw;
7701 calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw;
7702 calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw;
7703 calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma;
7704 calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma;
7705 calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor;
7706 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre;
7707 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre;
7708 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre;
7709
7710 calculate_peak_bandwidth_required(
7711 &mode_lib->scratch,
7712 calculate_peak_bandwidth_params);
7713
7714 // Check urg peak bandwidth against available urg bw
7715 // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active)
7716 check_urgent_bandwidth_support(
7717 &s->dummy_single[0], // double* frac_urg_bandwidth
7718 &s->dummy_single[1], // double* frac_urg_bandwidth_mall
7719 &mode_lib->ms.support.UrgVactiveBandwidthSupport,
7720 &mode_lib->ms.support.PrefetchBandwidthSupported,
7721
7722 mode_lib->soc.mall_allocated_for_dcn_mbytes,
7723 mode_lib->ms.support.non_urg_bandwidth_required,
7724 mode_lib->ms.support.urg_vactive_bandwidth_required,
7725 mode_lib->ms.support.urg_bandwidth_required,
7726 mode_lib->ms.support.urg_bandwidth_available);
7727
7728 mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.PrefetchBandwidthSupported;
7729 DML_LOG_VERBOSE("DML::%s: PrefetchBandwidthSupported=%0d\n", __func__, mode_lib->ms.support.PrefetchBandwidthSupported);
7730
7731 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7732 if (mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]) {
7733 mode_lib->ms.support.PrefetchSupported = false;
7734 DML_LOG_VERBOSE("DML::%s: k=%d, NotEnoughUrgentLatencyHidingPre=%d\n", __func__, k, mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]);
7735 }
7736 }
7737
7738 #ifdef DML_GLOBAL_PREFETCH_CHECK
7739 if (mode_lib->ms.support.PrefetchSupported && mode_lib->ms.num_active_planes > 1 && s->recalc_prefetch_done == 0) {
7740 CheckGlobalPrefetchAdmissibility_params->num_active_planes = mode_lib->ms.num_active_planes;
7741 CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format;
7742 CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
7743 CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
7744 CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l;
7745 CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c;
7746 CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->ms.SwathHeightY;
7747 CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->ms.SwathHeightC;
7748 CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
7749 CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->ms.CompressedBufferSizeInkByte;
7750 CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->ms.DETBufferSizeY;
7751 CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->ms.DETBufferSizeC;
7752 CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l;
7753 CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c;
7754 CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes;
7755 CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = s->Tpre_rounded;
7756 CheckGlobalPrefetchAdmissibility_params->Tpre_oto = s->Tpre_oto;
7757 CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->ms.support.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
7758 CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times;
7759 CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->ms.dst_y_prefetch;
7760 if (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps < 10 * 1024)
7761 CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = 10 * 1024;
7762
7763 CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps / (double) mode_lib->soc.return_bus_width_bytes) /
7764 ((double)mode_lib->soc.qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0);
7765
7766 // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible
7767 CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->recalc_prefetch_schedule;
7768 CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre;
7769 mode_lib->ms.support.PrefetchSupported = CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params);
7770 s->recalc_prefetch_done = 1;
7771 s->recalc_prefetch_schedule = 1;
7772 }
7773 #endif
7774 } // prefetch schedule ok, do urg bw and flip schedule
7775 } while (s->recalc_prefetch_schedule);
7776
7777 // Flip Schedule
7778 // Both prefetch schedule and BW okay
7779 if (mode_lib->ms.support.PrefetchSupported == true) {
7780 mode_lib->ms.BandwidthAvailableForImmediateFlip =
7781 get_bandwidth_available_for_immediate_flip(
7782 dml2_core_internal_soc_state_sys_active,
7783 mode_lib->ms.support.urg_bandwidth_required_qual, // no flip
7784 mode_lib->ms.support.urg_bandwidth_available);
7785
7786 mode_lib->ms.TotImmediateFlipBytes = 0;
7787 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7788 if (display_cfg->plane_descriptors[k].immediate_flip) {
7789 s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes(
7790 s->HostVMInefficiencyFactor,
7791 mode_lib->ms.vm_bytes[k],
7792 mode_lib->ms.DPTEBytesPerRow[k],
7793 mode_lib->ms.meta_row_bytes[k]);
7794 } else {
7795 s->per_pipe_flip_bytes[k] = 0;
7796 }
7797 mode_lib->ms.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->ms.NoOfDPP[k];
7798
7799 }
7800
7801 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7802 CalculateFlipSchedule(
7803 &mode_lib->scratch,
7804 display_cfg->plane_descriptors[k].immediate_flip,
7805 1, // use_lb_flip_bw
7806 s->HostVMInefficiencyFactor,
7807 s->Tvm_trips_flip[k],
7808 s->Tr0_trips_flip[k],
7809 s->Tvm_trips_flip_rounded[k],
7810 s->Tr0_trips_flip_rounded[k],
7811 display_cfg->gpuvm_enable,
7812 mode_lib->ms.vm_bytes[k],
7813 mode_lib->ms.DPTEBytesPerRow[k],
7814 mode_lib->ms.BandwidthAvailableForImmediateFlip,
7815 mode_lib->ms.TotImmediateFlipBytes,
7816 display_cfg->plane_descriptors[k].pixel_format,
7817 (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)),
7818 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
7819 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
7820 mode_lib->ms.Tno_bw_flip[k],
7821 mode_lib->ms.dpte_row_height[k],
7822 mode_lib->ms.dpte_row_height_chroma[k],
7823 mode_lib->ms.use_one_row_for_frame_flip[k],
7824 mode_lib->ip.max_flip_time_us,
7825 mode_lib->ip.max_flip_time_lines,
7826 s->per_pipe_flip_bytes[k],
7827 mode_lib->ms.meta_row_bytes[k],
7828 s->meta_row_height_luma[k],
7829 s->meta_row_height_chroma[k],
7830 mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable,
7831
7832 /* Output */
7833 &mode_lib->ms.dst_y_per_vm_flip[k],
7834 &mode_lib->ms.dst_y_per_row_flip[k],
7835 &mode_lib->ms.final_flip_bw[k],
7836 &mode_lib->ms.ImmediateFlipSupportedForPipe[k]);
7837 }
7838
7839 calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw;
7840 calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required_flip;
7841 calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw;
7842 calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required_flip;
7843 calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw;
7844 calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw;
7845
7846 calculate_peak_bandwidth_params->display_cfg = display_cfg;
7847 calculate_peak_bandwidth_params->inc_flip_bw = 1;
7848 calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes;
7849 calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP;
7850 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0;
7851 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1;
7852 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0;
7853 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1;
7854 calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor;
7855 calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor;
7856
7857 calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l;
7858 calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c;
7859 calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma;
7860 calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma;
7861 calculate_peak_bandwidth_params->prefetch_bandwidth_max = mode_lib->ms.RequiredPrefetchBWMax;
7862 calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l;
7863 calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c;
7864 calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw;
7865 calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw;
7866 calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw;
7867 calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw;
7868 calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw;
7869 calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw;
7870 calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma;
7871 calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma;
7872 calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor;
7873 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre;
7874 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre;
7875 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre;
7876
7877 calculate_peak_bandwidth_required(
7878 &mode_lib->scratch,
7879 calculate_peak_bandwidth_params);
7880
7881 calculate_immediate_flip_bandwidth_support(
7882 &s->dummy_single[0], // double* frac_urg_bandwidth_flip
7883 &mode_lib->ms.support.ImmediateFlipSupport,
7884
7885 dml2_core_internal_soc_state_sys_active,
7886 mode_lib->ms.support.urg_bandwidth_required_flip,
7887 mode_lib->ms.support.non_urg_bandwidth_required_flip,
7888 mode_lib->ms.support.urg_bandwidth_available);
7889
7890 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7891 if (display_cfg->plane_descriptors[k].immediate_flip == true && mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false)
7892 mode_lib->ms.support.ImmediateFlipSupport = false;
7893 }
7894
7895 } else { // if prefetch not support, assume iflip is not supported too
7896 mode_lib->ms.support.ImmediateFlipSupport = false;
7897 }
7898
7899 s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency;
7900 s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency;
7901 s->mSOCParameters.ExtraLatency_sr = mode_lib->ms.ExtraLatency_sr;
7902 s->mSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us;
7903 s->mSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
7904 s->mSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us;
7905 s->mSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us;
7906 s->mSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us;
7907 s->mSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us;
7908 s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us;
7909 s->mSOCParameters.USRRetrainingLatency = 0;
7910 s->mSOCParameters.SMNLatency = 0;
7911 s->mSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), mode_lib->ms.state_idx);
7912 s->mSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, mode_lib->ms.state_idx);
7913 s->mSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock;
7914 s->mSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type;
7915
7916 CalculateWatermarks_params->display_cfg = display_cfg;
7917 CalculateWatermarks_params->USRRetrainingRequired = false;
7918 CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
7919 CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines;
7920 CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits;
7921 CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes;
7922 CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLK;
7923 CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings;
7924 CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change;
7925 CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
7926 CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters;
7927 CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes;
7928 CalculateWatermarks_params->SOCCLK = mode_lib->ms.SOCCLK;
7929 CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep;
7930 CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY;
7931 CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC;
7932 CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightY;
7933 CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightC;
7934 CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthY;
7935 CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthC;
7936 CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPP;
7937 CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY;
7938 CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC;
7939 CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler;
7940 CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler;
7941 CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabled;
7942 CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByte;
7943 CalculateWatermarks_params->meta_row_height_l = s->meta_row_height_luma;
7944 CalculateWatermarks_params->meta_row_height_c = s->meta_row_height_chroma;
7945
7946 // Output
7947 CalculateWatermarks_params->Watermark = &mode_lib->ms.support.watermarks; // Watermarks *Watermark
7948 CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->ms.support.DRAMClockChangeSupport;
7949 CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->ms.support.global_dram_clock_change_supported;
7950 CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // double *MaxActiveDRAMClockChangeLatencySupported[]
7951 CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->ms.SubViewportLinesNeededInMALL; // unsigned int SubViewportLinesNeededInMALL[]
7952 CalculateWatermarks_params->FCLKChangeSupport = mode_lib->ms.support.FCLKChangeSupport;
7953 CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->ms.support.global_fclk_change_supported;
7954 CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // double *MaxActiveFCLKChangeLatencySupported
7955 CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport;
7956 CalculateWatermarks_params->g6_temp_read_support = &mode_lib->ms.support.g6_temp_read_support;
7957 CalculateWatermarks_params->VActiveLatencyHidingMargin = mode_lib->ms.VActiveLatencyHidingMargin;
7958 CalculateWatermarks_params->VActiveLatencyHidingUs = mode_lib->ms.VActiveLatencyHidingUs;
7959
7960 CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params);
7961
7962 calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->ms.support.watermarks, s->dummy_integer_array[0]);
7963 DML_LOG_VERBOSE("DML::%s: Done prefetch calculation\n", __func__);
7964
7965 }
7966
7967
dml_core_mode_support(struct dml2_core_calcs_mode_support_ex * in_out_params)7968 static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out_params)
7969 {
7970 struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib;
7971 const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg;
7972 const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table;
7973
7974 double outstanding_latency_us = 0;
7975
7976 struct dml2_core_calcs_mode_support_locals *s = &mode_lib->scratch.dml_core_mode_support_locals;
7977 struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
7978 struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
7979 struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params;
7980 struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *calculate_bytes_to_fetch_required_to_hide_latency_params = &mode_lib->scratch.calculate_bytes_to_fetch_required_to_hide_latency_params;
7981 unsigned int k, m, n;
7982
7983 memset(&mode_lib->scratch, 0, sizeof(struct dml2_core_internal_scratch));
7984 memset(&mode_lib->ms, 0, sizeof(struct dml2_core_internal_mode_support));
7985
7986 mode_lib->ms.num_active_planes = display_cfg->num_planes;
7987 get_stream_output_bpp(s->OutputBpp, display_cfg);
7988
7989 mode_lib->ms.state_idx = in_out_params->min_clk_index;
7990 mode_lib->ms.SOCCLK = ((double)mode_lib->soc.clk_table.socclk.clk_values_khz[0] / 1000);
7991 mode_lib->ms.DCFCLK = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_dcfclk_khz / 1000);
7992 mode_lib->ms.FabricClock = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz / 1000);
7993 mode_lib->ms.MaxDCFCLK = (double)min_clk_table->max_clocks_khz.dcfclk / 1000;
7994 mode_lib->ms.MaxFabricClock = (double)min_clk_table->max_clocks_khz.fclk / 1000;
7995 mode_lib->ms.max_dispclk_freq_mhz = (double)min_clk_table->max_ss_clocks_khz.dispclk / 1000;
7996 mode_lib->ms.max_dscclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dscclk / 1000;
7997 mode_lib->ms.max_dppclk_freq_mhz = (double)min_clk_table->max_ss_clocks_khz.dppclk / 1000;
7998 mode_lib->ms.uclk_freq_mhz = (double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_uclk_khz / 1000.0;
7999 if (!mode_lib->ms.uclk_freq_mhz)
8000 mode_lib->ms.uclk_freq_mhz = dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config);
8001 mode_lib->ms.dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps / 1000);
8002 mode_lib->ms.max_dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[min_clk_table->dram_bw_table.num_entries - 1].pre_derate_dram_bw_kbps / 1000);
8003 mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params);
8004 mode_lib->ms.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), &mode_lib->soc.clk_table);
8005
8006 #if defined(__DML_VBA_DEBUG__)
8007 DML_LOG_VERBOSE("DML::%s: --- START --- \n", __func__);
8008 DML_LOG_VERBOSE("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes);
8009 DML_LOG_VERBOSE("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index);
8010 DML_LOG_VERBOSE("DML::%s: qos_param_index = %0d\n", __func__, mode_lib->ms.qos_param_index);
8011 DML_LOG_VERBOSE("DML::%s: SOCCLK = %f\n", __func__, mode_lib->ms.SOCCLK);
8012 DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->ms.dram_bw_mbps);
8013 DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz);
8014 DML_LOG_VERBOSE("DML::%s: DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK);
8015 DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, mode_lib->ms.FabricClock);
8016 DML_LOG_VERBOSE("DML::%s: MaxDCFCLK = %f\n", __func__, mode_lib->ms.MaxDCFCLK);
8017 DML_LOG_VERBOSE("DML::%s: max_dispclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dispclk_freq_mhz);
8018 DML_LOG_VERBOSE("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz);
8019 DML_LOG_VERBOSE("DML::%s: max_dppclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dppclk_freq_mhz);
8020 DML_LOG_VERBOSE("DML::%s: MaxFabricClock = %f\n", __func__, mode_lib->ms.MaxFabricClock);
8021 DML_LOG_VERBOSE("DML::%s: ip.compressed_buffer_segment_size_in_kbytes = %u\n", __func__, mode_lib->ip.compressed_buffer_segment_size_in_kbytes);
8022 DML_LOG_VERBOSE("DML::%s: ip.dcn_mrq_present = %u\n", __func__, mode_lib->ip.dcn_mrq_present);
8023
8024 for (k = 0; k < mode_lib->ms.num_active_planes; k++)
8025 DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
8026 #endif
8027
8028 CalculateMaxDETAndMinCompressedBufferSize(
8029 mode_lib->ip.config_return_buffer_size_in_kbytes,
8030 mode_lib->ip.config_return_buffer_segment_size_in_kbytes,
8031 mode_lib->ip.rob_buffer_size_kbytes,
8032 mode_lib->ip.max_num_dpp,
8033 display_cfg->overrides.hw.force_nom_det_size_kbytes.enable,
8034 display_cfg->overrides.hw.force_nom_det_size_kbytes.value,
8035 mode_lib->ip.dcn_mrq_present,
8036
8037 /* Output */
8038 &mode_lib->ms.MaxTotalDETInKByte,
8039 &mode_lib->ms.NomDETInKByte,
8040 &mode_lib->ms.MinCompressedBufferSizeInKByte);
8041
8042 PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg, mode_lib->ip.ptoi_supported, s->PixelClockBackEnd);
8043
8044 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
8045
8046 /*Scale Ratio, taps Support Check*/
8047 mode_lib->ms.support.ScaleRatioAndTapsSupport = true;
8048 // Many core tests are still setting scaling parameters "incorrectly"
8049 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8050 if (display_cfg->plane_descriptors[k].composition.scaler_info.enabled == false
8051 && (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)
8052 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio != 1.0
8053 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps != 1.0
8054 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio != 1.0
8055 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps != 1.0)) {
8056 mode_lib->ms.support.ScaleRatioAndTapsSupport = false;
8057 } else if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps < 1.0 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps > 8.0
8058 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps < 1.0 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps > 8.0
8059 || (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps > 1.0 && (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps % 2) == 1)
8060 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio > mode_lib->ip.max_hscl_ratio
8061 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio > mode_lib->ip.max_vscl_ratio
8062 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps
8063 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps
8064 || (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)
8065 && (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps < 1 || display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps > 8 ||
8066 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps < 1 || display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps > 8 ||
8067 (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps > 1 && display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps % 2 == 1) ||
8068 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio > mode_lib->ip.max_hscl_ratio ||
8069 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio > mode_lib->ip.max_vscl_ratio ||
8070 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps ||
8071 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps))) {
8072 mode_lib->ms.support.ScaleRatioAndTapsSupport = false;
8073 }
8074 }
8075
8076 /*Source Format, Pixel Format and Scan Support Check*/
8077 mode_lib->ms.support.SourceFormatPixelAndScanSupport = true;
8078 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8079 if (display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear && dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
8080 mode_lib->ms.support.SourceFormatPixelAndScanSupport = false;
8081 }
8082 }
8083
8084 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8085 CalculateBytePerPixelAndBlockSizes(
8086 display_cfg->plane_descriptors[k].pixel_format,
8087 display_cfg->plane_descriptors[k].surface.tiling,
8088 display_cfg->plane_descriptors[k].surface.plane0.pitch,
8089 display_cfg->plane_descriptors[k].surface.plane1.pitch,
8090
8091 /* Output */
8092 &mode_lib->ms.BytePerPixelY[k],
8093 &mode_lib->ms.BytePerPixelC[k],
8094 &mode_lib->ms.BytePerPixelInDETY[k],
8095 &mode_lib->ms.BytePerPixelInDETC[k],
8096 &mode_lib->ms.Read256BlockHeightY[k],
8097 &mode_lib->ms.Read256BlockHeightC[k],
8098 &mode_lib->ms.Read256BlockWidthY[k],
8099 &mode_lib->ms.Read256BlockWidthC[k],
8100 &mode_lib->ms.MacroTileHeightY[k],
8101 &mode_lib->ms.MacroTileHeightC[k],
8102 &mode_lib->ms.MacroTileWidthY[k],
8103 &mode_lib->ms.MacroTileWidthC[k],
8104 &mode_lib->ms.surf_linear128_l[k],
8105 &mode_lib->ms.surf_linear128_c[k]);
8106 }
8107
8108 /*Bandwidth Support Check*/
8109 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8110 if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
8111 mode_lib->ms.SwathWidthYSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
8112 mode_lib->ms.SwathWidthCSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane1.width;
8113 } else {
8114 mode_lib->ms.SwathWidthYSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
8115 mode_lib->ms.SwathWidthCSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
8116 }
8117 }
8118
8119 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8120 mode_lib->ms.vactive_sw_bw_l[k] = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
8121 mode_lib->ms.vactive_sw_bw_c[k] = mode_lib->ms.SwathWidthCSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
8122
8123 mode_lib->ms.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width *
8124 display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000));
8125
8126 #ifdef __DML_VBA_DEBUG__
8127 DML_LOG_VERBOSE("DML::%s: k=%u, old_ReadBandwidthLuma = %f\n", __func__, k, mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelInDETY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
8128 DML_LOG_VERBOSE("DML::%s: k=%u, old_ReadBandwidthChroma = %f\n", __func__, k, mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * math_ceil2(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio / 2.0);
8129 DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_l[k]);
8130 DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_c[k]);
8131 #endif
8132 }
8133
8134 // Writeback bandwidth
8135 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
8136 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format == dml2_444_64) {
8137 mode_lib->ms.WriteBandwidth[k][0] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height
8138 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width
8139 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height
8140 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total
8141 / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 8.0;
8142 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
8143 mode_lib->ms.WriteBandwidth[k][0] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height
8144 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width
8145 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height
8146 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total
8147 / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 4.0;
8148 } else {
8149 mode_lib->ms.WriteBandwidth[k][0] = 0.0;
8150 }
8151 }
8152
8153 /*Writeback Latency support check*/
8154 mode_lib->ms.support.WritebackLatencySupport = true;
8155 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8156 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0 &&
8157 (mode_lib->ms.WriteBandwidth[k][0] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024 / ((double)mode_lib->soc.qos_parameters.writeback.base_latency_us))) {
8158 mode_lib->ms.support.WritebackLatencySupport = false;
8159 }
8160 }
8161
8162
8163 /* Writeback Scale Ratio and Taps Support Check */
8164 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = true;
8165 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8166 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
8167 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio > mode_lib->ip.writeback_max_hscl_ratio
8168 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio > mode_lib->ip.writeback_max_vscl_ratio
8169 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio < mode_lib->ip.writeback_min_hscl_ratio
8170 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio < mode_lib->ip.writeback_min_vscl_ratio
8171 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps > (unsigned int) mode_lib->ip.writeback_max_hscl_taps
8172 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps > (unsigned int) mode_lib->ip.writeback_max_vscl_taps
8173 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps
8174 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps
8175 || (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps > 2.0 && ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps % 2) == 1))) {
8176 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false;
8177 }
8178 if (2.0 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height * (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps - 1) * 57 > mode_lib->ip.writeback_line_buffer_buffer_size) {
8179 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false;
8180 }
8181 }
8182 }
8183
8184 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8185 CalculateSinglePipeDPPCLKAndSCLThroughput(
8186 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
8187 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
8188 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
8189 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
8190 mode_lib->ip.max_dchub_pscl_bw_pix_per_clk,
8191 mode_lib->ip.max_pscl_lb_bw_pix_per_clk,
8192 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
8193 display_cfg->plane_descriptors[k].pixel_format,
8194 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps,
8195 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps,
8196 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps,
8197 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps,
8198 /* Output */
8199 &mode_lib->ms.PSCL_FACTOR[k],
8200 &mode_lib->ms.PSCL_FACTOR_CHROMA[k],
8201 &mode_lib->ms.MinDPPCLKUsingSingleDPP[k]);
8202 }
8203
8204 // Max Viewport Size support
8205 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
8206 if (display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) {
8207 s->MaximumSwathWidthSupportLuma = 15360;
8208 } else if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelC[k] > 0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { // horz video
8209 s->MaximumSwathWidthSupportLuma = 7680 + 16;
8210 } else if (dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelC[k] > 0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { // vert video
8211 s->MaximumSwathWidthSupportLuma = 4320 + 16;
8212 } else if (display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { // rgbe + alpha
8213 s->MaximumSwathWidthSupportLuma = 5120 + 16;
8214 } else if (dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelY[k] == 8 && display_cfg->plane_descriptors[k].surface.dcc.enable == true) { // vert 64bpp
8215 s->MaximumSwathWidthSupportLuma = 3072 + 16;
8216 } else {
8217 s->MaximumSwathWidthSupportLuma = 6144 + 16;
8218 }
8219
8220 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) {
8221 s->MaximumSwathWidthSupportChroma = (unsigned int)(s->MaximumSwathWidthSupportLuma / 2.0);
8222 } else {
8223 s->MaximumSwathWidthSupportChroma = s->MaximumSwathWidthSupportLuma;
8224 }
8225
8226 unsigned lb_buffer_size_bits_luma = mode_lib->ip.line_buffer_size_bits;
8227 unsigned lb_buffer_size_bits_chroma = mode_lib->ip.line_buffer_size_bits;
8228
8229 /*
8230 #if defined(DV_BUILD)
8231 // Assume a memory config setting of 3 in 420 mode or get a new ip parameter that reflects the programming.
8232 if (mode_lib->ms.BytePerPixelC[k] != 0.0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) {
8233 lb_buffer_size_bits_luma = 34620 * 57;
8234 lb_buffer_size_bits_chroma = 13560 * 57;
8235 }
8236 #endif
8237 */
8238 mode_lib->ms.MaximumSwathWidthInLineBufferLuma = lb_buffer_size_bits_luma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, 1.0) / 57 /
8239 (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, 1.0) - 2, 0.0));
8240 if (mode_lib->ms.BytePerPixelC[k] == 0.0) {
8241 mode_lib->ms.MaximumSwathWidthInLineBufferChroma = 0;
8242 } else {
8243 mode_lib->ms.MaximumSwathWidthInLineBufferChroma = lb_buffer_size_bits_chroma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, 1.0) / 57 /
8244 (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, 1.0) - 2, 0.0));
8245 }
8246
8247 mode_lib->ms.MaximumSwathWidthLuma[k] = math_min2(s->MaximumSwathWidthSupportLuma, mode_lib->ms.MaximumSwathWidthInLineBufferLuma);
8248 mode_lib->ms.MaximumSwathWidthChroma[k] = math_min2(s->MaximumSwathWidthSupportChroma, mode_lib->ms.MaximumSwathWidthInLineBufferChroma);
8249
8250 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthLuma[k]);
8251 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthSupportLuma=%u\n", __func__, k, s->MaximumSwathWidthSupportLuma);
8252 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthInLineBufferLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferLuma);
8253
8254 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthChroma[k]);
8255 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthSupportChroma=%u\n", __func__, k, s->MaximumSwathWidthSupportChroma);
8256 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthInLineBufferChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferChroma);
8257 }
8258
8259 /* Cursor Support Check */
8260 mode_lib->ms.support.CursorSupport = true;
8261 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
8262 if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) {
8263 if (display_cfg->plane_descriptors[k].cursor.cursor_bpp == 64 && mode_lib->ip.cursor_64bpp_support == false)
8264 mode_lib->ms.support.CursorSupport = false;
8265 }
8266 }
8267
8268 /* Valid Pitch Check */
8269 mode_lib->ms.support.PitchSupport = true;
8270 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
8271
8272 // data pitch
8273 unsigned int alignment_l = mode_lib->ms.MacroTileWidthY[k];
8274
8275 if (mode_lib->ms.surf_linear128_l[k])
8276 alignment_l = alignment_l / 2;
8277
8278 mode_lib->ms.support.AlignedYPitch[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.plane0.pitch, display_cfg->plane_descriptors[k].surface.plane0.width), alignment_l);
8279 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) {
8280 unsigned int alignment_c = mode_lib->ms.MacroTileWidthC[k];
8281
8282 if (mode_lib->ms.surf_linear128_c[k])
8283 alignment_c = alignment_c / 2;
8284 mode_lib->ms.support.AlignedCPitch[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.plane1.pitch, display_cfg->plane_descriptors[k].surface.plane1.width), alignment_c);
8285 } else {
8286 mode_lib->ms.support.AlignedCPitch[k] = display_cfg->plane_descriptors[k].surface.plane1.pitch;
8287 }
8288
8289 if (mode_lib->ms.support.AlignedYPitch[k] > display_cfg->plane_descriptors[k].surface.plane0.pitch ||
8290 mode_lib->ms.support.AlignedCPitch[k] > display_cfg->plane_descriptors[k].surface.plane1.pitch) {
8291 mode_lib->ms.support.PitchSupport = false;
8292 #if defined(__DML_VBA_DEBUG__)
8293 DML_LOG_VERBOSE("DML::%s: k=%u AlignedYPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedYPitch[k]);
8294 DML_LOG_VERBOSE("DML::%s: k=%u PitchY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.pitch);
8295 DML_LOG_VERBOSE("DML::%s: k=%u AlignedCPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedCPitch[k]);
8296 DML_LOG_VERBOSE("DML::%s: k=%u PitchC = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane1.pitch);
8297 DML_LOG_VERBOSE("DML::%s: k=%u PitchSupport = %d\n", __func__, k, mode_lib->ms.support.PitchSupport);
8298 #endif
8299 }
8300
8301 // meta pitch
8302 if (mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable) {
8303 mode_lib->ms.support.AlignedDCCMetaPitchY[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch,
8304 display_cfg->plane_descriptors[k].surface.plane0.width), 64.0 * mode_lib->ms.Read256BlockWidthY[k]);
8305
8306 if (mode_lib->ms.support.AlignedDCCMetaPitchY[k] > display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch)
8307 mode_lib->ms.support.PitchSupport = false;
8308
8309 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) {
8310 mode_lib->ms.support.AlignedDCCMetaPitchC[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch,
8311 display_cfg->plane_descriptors[k].surface.plane1.width), 64.0 * mode_lib->ms.Read256BlockWidthC[k]);
8312
8313 if (mode_lib->ms.support.AlignedDCCMetaPitchC[k] > display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch)
8314 mode_lib->ms.support.PitchSupport = false;
8315 }
8316 } else {
8317 mode_lib->ms.support.AlignedDCCMetaPitchY[k] = 0;
8318 mode_lib->ms.support.AlignedDCCMetaPitchC[k] = 0;
8319 }
8320 }
8321
8322 mode_lib->ms.support.ViewportExceedsSurface = false;
8323 if (!display_cfg->overrides.hw.surface_viewport_size_check_disable) {
8324 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
8325 if (display_cfg->plane_descriptors[k].composition.viewport.plane0.width > display_cfg->plane_descriptors[k].surface.plane0.width ||
8326 display_cfg->plane_descriptors[k].composition.viewport.plane0.height > display_cfg->plane_descriptors[k].surface.plane0.height) {
8327 mode_lib->ms.support.ViewportExceedsSurface = true;
8328 #if defined(__DML_VBA_DEBUG__)
8329 DML_LOG_VERBOSE("DML::%s: k=%u ViewportWidth = %ld\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width);
8330 DML_LOG_VERBOSE("DML::%s: k=%u SurfaceWidthY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.width);
8331 DML_LOG_VERBOSE("DML::%s: k=%u ViewportHeight = %ld\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height);
8332 DML_LOG_VERBOSE("DML::%s: k=%u SurfaceHeightY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.height);
8333 DML_LOG_VERBOSE("DML::%s: k=%u ViewportExceedsSurface = %d\n", __func__, k, mode_lib->ms.support.ViewportExceedsSurface);
8334 #endif
8335 }
8336 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) {
8337 if (display_cfg->plane_descriptors[k].composition.viewport.plane1.width > display_cfg->plane_descriptors[k].surface.plane1.width ||
8338 display_cfg->plane_descriptors[k].composition.viewport.plane1.height > display_cfg->plane_descriptors[k].surface.plane1.height) {
8339 mode_lib->ms.support.ViewportExceedsSurface = true;
8340 }
8341 }
8342 }
8343 }
8344
8345 CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg;
8346 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ip.config_return_buffer_size_in_kbytes;
8347 CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte;
8348 CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte;
8349 CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
8350 CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
8351 CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
8352 CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
8353 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = 1;
8354 CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
8355 CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte;
8356 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes;
8357 CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes;
8358 CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.vactive_sw_bw_l;
8359 CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.vactive_sw_bw_c;
8360 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma;
8361 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma;
8362 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY;
8363 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC;
8364 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY;
8365 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC;
8366 CalculateSwathAndDETConfiguration_params->surf_linear128_l = mode_lib->ms.surf_linear128_l;
8367 CalculateSwathAndDETConfiguration_params->surf_linear128_c = mode_lib->ms.surf_linear128_c;
8368 CalculateSwathAndDETConfiguration_params->ODMMode = s->dummy_odm_mode;
8369 CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY;
8370 CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC;
8371 CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY;
8372 CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC;
8373 CalculateSwathAndDETConfiguration_params->DPPPerSurface = s->dummy_integer_array[2];
8374 CalculateSwathAndDETConfiguration_params->mrq_present = mode_lib->ip.dcn_mrq_present;
8375
8376 // output
8377 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = s->dummy_integer_array[0];
8378 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = s->dummy_integer_array[1];
8379 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_integer_array[3];
8380 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_integer_array[4];
8381 CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_integer_array[5];
8382 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_integer_array[6];
8383 CalculateSwathAndDETConfiguration_params->SwathHeightY = s->dummy_integer_array[7];
8384 CalculateSwathAndDETConfiguration_params->SwathHeightC = s->dummy_integer_array[8];
8385 CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = s->dummy_integer_array[26];
8386 CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = s->dummy_integer_array[27];
8387 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = s->dummy_integer_array[9];
8388 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = s->dummy_integer_array[10];
8389 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = s->dummy_integer_array[11];
8390 CalculateSwathAndDETConfiguration_params->full_swath_bytes_l = s->full_swath_bytes_l;
8391 CalculateSwathAndDETConfiguration_params->full_swath_bytes_c = s->full_swath_bytes_c;
8392 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &s->dummy_boolean[0];
8393 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[1];
8394 CalculateSwathAndDETConfiguration_params->hw_debug5 = &s->dummy_boolean[2];
8395 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &s->dummy_integer[0];
8396 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = mode_lib->ms.SingleDPPViewportSizeSupportPerSurface;
8397 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[1];
8398
8399 // This calls is just to find out if there is enough DET space to support full vp in 1 pipe.
8400 CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params);
8401
8402 mode_lib->ms.TotalNumberOfActiveDPP = 0;
8403 mode_lib->ms.TotalNumberOfActiveOPP = 0;
8404 mode_lib->ms.support.TotalAvailablePipesSupport = true;
8405
8406 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8407 /*Number Of DSC Slices*/
8408 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable ||
8409 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) {
8410
8411 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices != 0)
8412 mode_lib->ms.support.NumberOfDSCSlices[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices;
8413 else {
8414 if (s->PixelClockBackEnd[k] > 4800) {
8415 mode_lib->ms.support.NumberOfDSCSlices[k] = (unsigned int)(math_ceil2(s->PixelClockBackEnd[k] / 600, 4));
8416 } else if (s->PixelClockBackEnd[k] > 2400) {
8417 mode_lib->ms.support.NumberOfDSCSlices[k] = 8;
8418 } else if (s->PixelClockBackEnd[k] > 1200) {
8419 mode_lib->ms.support.NumberOfDSCSlices[k] = 4;
8420 } else if (s->PixelClockBackEnd[k] > 340) {
8421 mode_lib->ms.support.NumberOfDSCSlices[k] = 2;
8422 } else {
8423 mode_lib->ms.support.NumberOfDSCSlices[k] = 1;
8424 }
8425 }
8426 } else {
8427 mode_lib->ms.support.NumberOfDSCSlices[k] = 0;
8428 }
8429
8430 CalculateODMMode(
8431 mode_lib->ip.maximum_pixels_per_line_per_dsc_unit,
8432 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
8433 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
8434 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
8435 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode,
8436 mode_lib->ms.max_dispclk_freq_mhz,
8437 false, // DSCEnable
8438 mode_lib->ms.TotalNumberOfActiveDPP,
8439 mode_lib->ms.TotalNumberOfActiveOPP,
8440 mode_lib->ip.max_num_dpp,
8441 mode_lib->ip.max_num_opp,
8442 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
8443 mode_lib->ms.support.NumberOfDSCSlices[k],
8444
8445 /* Output */
8446 &s->TotalAvailablePipesSupportNoDSC,
8447 &s->NumberOfDPPNoDSC,
8448 &s->ODMModeNoDSC,
8449 &s->RequiredDISPCLKPerSurfaceNoDSC);
8450
8451 CalculateODMMode(
8452 mode_lib->ip.maximum_pixels_per_line_per_dsc_unit,
8453 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
8454 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
8455 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
8456 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode,
8457 mode_lib->ms.max_dispclk_freq_mhz,
8458 true, // DSCEnable
8459 mode_lib->ms.TotalNumberOfActiveDPP,
8460 mode_lib->ms.TotalNumberOfActiveOPP,
8461 mode_lib->ip.max_num_dpp,
8462 mode_lib->ip.max_num_opp,
8463 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
8464 mode_lib->ms.support.NumberOfDSCSlices[k],
8465
8466 /* Output */
8467 &s->TotalAvailablePipesSupportDSC,
8468 &s->NumberOfDPPDSC,
8469 &s->ODMModeDSC,
8470 &s->RequiredDISPCLKPerSurfaceDSC);
8471
8472 CalculateOutputLink(
8473 &mode_lib->scratch,
8474 ((double)mode_lib->soc.clk_table.phyclk.clk_values_khz[0] / 1000),
8475 ((double)mode_lib->soc.clk_table.phyclk_d18.clk_values_khz[0] / 1000),
8476 ((double)mode_lib->soc.clk_table.phyclk_d32.clk_values_khz[0] / 1000),
8477 mode_lib->soc.phy_downspread_percent,
8478 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
8479 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
8480 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
8481 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
8482 s->PixelClockBackEnd[k],
8483 s->OutputBpp[k],
8484 mode_lib->ip.maximum_dsc_bits_per_component,
8485 mode_lib->ms.support.NumberOfDSCSlices[k],
8486 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate,
8487 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout,
8488 s->ODMModeNoDSC,
8489 s->ODMModeDSC,
8490 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable,
8491 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count,
8492 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate,
8493
8494 /* Output */
8495 &mode_lib->ms.RequiresDSC[k],
8496 &mode_lib->ms.RequiresFEC[k],
8497 &mode_lib->ms.OutputBpp[k],
8498 &mode_lib->ms.OutputType[k],
8499 &mode_lib->ms.OutputRate[k],
8500 &mode_lib->ms.RequiredSlots[k]);
8501
8502 if (s->OutputBpp[k] == 0.0) {
8503 s->OutputBpp[k] = mode_lib->ms.OutputBpp[k];
8504 }
8505
8506 if (mode_lib->ms.RequiresDSC[k] == false) {
8507 mode_lib->ms.ODMMode[k] = s->ODMModeNoDSC;
8508 mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceNoDSC;
8509 if (!s->TotalAvailablePipesSupportNoDSC)
8510 mode_lib->ms.support.TotalAvailablePipesSupport = false;
8511 mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPNoDSC;
8512 } else {
8513 mode_lib->ms.ODMMode[k] = s->ODMModeDSC;
8514 mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceDSC;
8515 if (!s->TotalAvailablePipesSupportDSC)
8516 mode_lib->ms.support.TotalAvailablePipesSupport = false;
8517 mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPDSC;
8518 }
8519 #if defined(__DML_VBA_DEBUG__)
8520 DML_LOG_VERBOSE("DML::%s: k=%d RequiresDSC = %d\n", __func__, k, mode_lib->ms.RequiresDSC[k]);
8521 DML_LOG_VERBOSE("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]);
8522 #endif
8523
8524 // ensure the number dsc slices is integer multiple based on ODM mode
8525 mode_lib->ms.support.DSCSlicesODMModeSupported = true;
8526 if (mode_lib->ms.RequiresDSC[k]) {
8527 // fail a ms check if the override num_slices doesn't align with odm mode setting
8528 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices != 0) {
8529 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
8530 mode_lib->ms.support.DSCSlicesODMModeSupported = ((mode_lib->ms.support.NumberOfDSCSlices[k] % 2) == 0);
8531 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
8532 mode_lib->ms.support.DSCSlicesODMModeSupported = (mode_lib->ms.support.NumberOfDSCSlices[k] == 12);
8533 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
8534 mode_lib->ms.support.DSCSlicesODMModeSupported = ((mode_lib->ms.support.NumberOfDSCSlices[k] % 4) == 0);
8535 #if defined(__DML_VBA_DEBUG__)
8536 if (!mode_lib->ms.support.DSCSlicesODMModeSupported) {
8537 DML_LOG_VERBOSE("DML::%s: k=%d Invalid dsc num_slices and ODM mode setting\n", __func__, k);
8538 DML_LOG_VERBOSE("DML::%s: k=%d num_slices = %d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices);
8539 DML_LOG_VERBOSE("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]);
8540 }
8541 #endif
8542 } else {
8543 // safe guard to ensure the dml derived dsc slices and odm setting are compatible
8544 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
8545 mode_lib->ms.support.NumberOfDSCSlices[k] = 2 * (unsigned int)math_ceil2(mode_lib->ms.support.NumberOfDSCSlices[k] / 2.0, 1.0);
8546 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
8547 mode_lib->ms.support.NumberOfDSCSlices[k] = 12;
8548 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
8549 mode_lib->ms.support.NumberOfDSCSlices[k] = 4 * (unsigned int)math_ceil2(mode_lib->ms.support.NumberOfDSCSlices[k] / 4.0, 1.0);
8550 }
8551
8552 } else {
8553 mode_lib->ms.support.NumberOfDSCSlices[k] = 0;
8554 }
8555 }
8556
8557 mode_lib->ms.support.incorrect_imall_usage = 0;
8558 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8559 if (mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall)
8560 mode_lib->ms.support.incorrect_imall_usage = 1;
8561 }
8562
8563 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8564 mode_lib->ms.MPCCombine[k] = false;
8565 mode_lib->ms.NoOfDPP[k] = 1;
8566 mode_lib->ms.NoOfOPP[k] = 1;
8567
8568 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) {
8569 mode_lib->ms.MPCCombine[k] = false;
8570 mode_lib->ms.NoOfDPP[k] = 4;
8571 mode_lib->ms.NoOfOPP[k] = 4;
8572 } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) {
8573 mode_lib->ms.MPCCombine[k] = false;
8574 mode_lib->ms.NoOfDPP[k] = 3;
8575 mode_lib->ms.NoOfOPP[k] = 3;
8576 } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) {
8577 mode_lib->ms.MPCCombine[k] = false;
8578 mode_lib->ms.NoOfDPP[k] = 2;
8579 mode_lib->ms.NoOfOPP[k] = 2;
8580 } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 2) {
8581 mode_lib->ms.MPCCombine[k] = true;
8582 mode_lib->ms.NoOfDPP[k] = 2;
8583 } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 1) {
8584 mode_lib->ms.MPCCombine[k] = false;
8585 mode_lib->ms.NoOfDPP[k] = 1;
8586 if (!mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) {
8587 DML_LOG_VERBOSE("WARNING: DML::%s: MPCC is override to disable but viewport is too large to be supported with single pipe!\n", __func__);
8588 }
8589 } else {
8590 if ((mode_lib->ms.MinDPPCLKUsingSingleDPP[k] > mode_lib->ms.max_dppclk_freq_mhz) || !mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) {
8591 mode_lib->ms.MPCCombine[k] = true;
8592 mode_lib->ms.NoOfDPP[k] = 2;
8593 }
8594 }
8595 #if defined(__DML_VBA_DEBUG__)
8596 DML_LOG_VERBOSE("DML::%s: k=%d, NoOfDPP = %d\n", __func__, k, mode_lib->ms.NoOfDPP[k]);
8597 #endif
8598 }
8599
8600 mode_lib->ms.TotalNumberOfActiveDPP = 0;
8601 mode_lib->ms.TotalNumberOfActiveOPP = 0;
8602 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8603 mode_lib->ms.TotalNumberOfActiveDPP += mode_lib->ms.NoOfDPP[k];
8604 mode_lib->ms.TotalNumberOfActiveOPP += mode_lib->ms.NoOfOPP[k];
8605 }
8606 if (mode_lib->ms.TotalNumberOfActiveDPP > (unsigned int)mode_lib->ip.max_num_dpp)
8607 mode_lib->ms.support.TotalAvailablePipesSupport = false;
8608 if (mode_lib->ms.TotalNumberOfActiveOPP > (unsigned int)mode_lib->ip.max_num_opp)
8609 mode_lib->ms.support.TotalAvailablePipesSupport = false;
8610
8611
8612 mode_lib->ms.TotalNumberOfSingleDPPSurfaces = 0;
8613 for (k = 0; k < (unsigned int)mode_lib->ms.num_active_planes; ++k) {
8614 if (mode_lib->ms.NoOfDPP[k] == 1)
8615 mode_lib->ms.TotalNumberOfSingleDPPSurfaces = mode_lib->ms.TotalNumberOfSingleDPPSurfaces + 1;
8616 }
8617
8618 //DISPCLK/DPPCLK
8619 mode_lib->ms.WritebackRequiredDISPCLK = 0;
8620 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8621 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
8622 mode_lib->ms.WritebackRequiredDISPCLK = math_max2(mode_lib->ms.WritebackRequiredDISPCLK,
8623 CalculateWriteBackDISPCLK(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format,
8624 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
8625 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio,
8626 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio,
8627 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps,
8628 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps,
8629 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_width,
8630 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width,
8631 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
8632 mode_lib->ip.writeback_line_buffer_buffer_size));
8633 }
8634 }
8635
8636 mode_lib->ms.RequiredDISPCLK = mode_lib->ms.WritebackRequiredDISPCLK;
8637 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8638 mode_lib->ms.RequiredDISPCLK = math_max2(mode_lib->ms.RequiredDISPCLK, mode_lib->ms.RequiredDISPCLKPerSurface[k]);
8639 }
8640
8641 mode_lib->ms.GlobalDPPCLK = 0;
8642 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8643 mode_lib->ms.RequiredDPPCLK[k] = mode_lib->ms.MinDPPCLKUsingSingleDPP[k] / mode_lib->ms.NoOfDPP[k];
8644 mode_lib->ms.GlobalDPPCLK = math_max2(mode_lib->ms.GlobalDPPCLK, mode_lib->ms.RequiredDPPCLK[k]);
8645 }
8646
8647 mode_lib->ms.support.DISPCLK_DPPCLK_Support = !((mode_lib->ms.RequiredDISPCLK > mode_lib->ms.max_dispclk_freq_mhz) || (mode_lib->ms.GlobalDPPCLK > mode_lib->ms.max_dppclk_freq_mhz));
8648
8649 /* Total Available OTG, Writeback, HDMIFRL, DP Support Check */
8650 s->TotalNumberOfActiveOTG = 0;
8651 s->TotalNumberOfActiveHDMIFRL = 0;
8652 s->TotalNumberOfActiveDP2p0 = 0;
8653 s->TotalNumberOfActiveDP2p0Outputs = 0;
8654 s->TotalNumberOfActiveWriteback = 0;
8655 memset(s->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool));
8656
8657 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8658 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
8659 if (!s->stream_visited[display_cfg->plane_descriptors[k].stream_index]) {
8660 s->stream_visited[display_cfg->plane_descriptors[k].stream_index] = 1;
8661
8662 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0)
8663 s->TotalNumberOfActiveWriteback = s->TotalNumberOfActiveWriteback + 1;
8664
8665 s->TotalNumberOfActiveOTG = s->TotalNumberOfActiveOTG + 1;
8666 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl)
8667 s->TotalNumberOfActiveHDMIFRL = s->TotalNumberOfActiveHDMIFRL + 1;
8668 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0) {
8669 s->TotalNumberOfActiveDP2p0 = s->TotalNumberOfActiveDP2p0 + 1;
8670 // FIXME_STAGE2: SW not using backend related stuff, need mapping for mst setup
8671 //if (display_cfg->output.OutputMultistreamId[k] == k || display_cfg->output.OutputMultistreamEn[k] == false) {
8672 s->TotalNumberOfActiveDP2p0Outputs = s->TotalNumberOfActiveDP2p0Outputs + 1;
8673 //}
8674 }
8675 }
8676 }
8677 }
8678
8679 /* Writeback Mode Support Check */
8680 mode_lib->ms.support.EnoughWritebackUnits = 1;
8681 if (s->TotalNumberOfActiveWriteback > (unsigned int)mode_lib->ip.max_num_wb) {
8682 mode_lib->ms.support.EnoughWritebackUnits = false;
8683 }
8684 mode_lib->ms.support.NumberOfOTGSupport = (s->TotalNumberOfActiveOTG <= (unsigned int)mode_lib->ip.max_num_otg);
8685 mode_lib->ms.support.NumberOfHDMIFRLSupport = (s->TotalNumberOfActiveHDMIFRL <= (unsigned int)mode_lib->ip.max_num_hdmi_frl_outputs);
8686 mode_lib->ms.support.NumberOfDP2p0Support = (s->TotalNumberOfActiveDP2p0 <= (unsigned int)mode_lib->ip.max_num_dp2p0_streams && s->TotalNumberOfActiveDP2p0Outputs <= (unsigned int)mode_lib->ip.max_num_dp2p0_outputs);
8687
8688
8689 mode_lib->ms.support.ExceededMultistreamSlots = false;
8690 mode_lib->ms.support.LinkCapacitySupport = true;
8691 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8692 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_disabled == false &&
8693 (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
8694 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) && mode_lib->ms.OutputBpp[k] == 0) {
8695 mode_lib->ms.support.LinkCapacitySupport = false;
8696 }
8697 }
8698
8699 mode_lib->ms.support.P2IWith420 = false;
8700 mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = false;
8701 mode_lib->ms.support.DSC422NativeNotSupported = false;
8702 mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = false;
8703 mode_lib->ms.support.LinkRateForMultistreamNotIndicated = false;
8704 mode_lib->ms.support.BPPForMultistreamNotIndicated = false;
8705 mode_lib->ms.support.MultistreamWithHDMIOreDP = false;
8706 mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = false;
8707 mode_lib->ms.support.NotEnoughLanesForMSO = false;
8708
8709 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8710 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
8711 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) {
8712 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && mode_lib->ip.ptoi_supported == true)
8713 mode_lib->ms.support.P2IWith420 = true;
8714
8715 if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 && !mode_lib->ip.dsc422_native_support)
8716 mode_lib->ms.support.DSC422NativeNotSupported = true;
8717
8718 if (((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr2 ||
8719 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr3) &&
8720 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_edp) ||
8721 ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr10 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr13p5 ||
8722 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr20) &&
8723 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp2p0))
8724 mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = true;
8725
8726 // FIXME_STAGE2
8727 //if (display_cfg->output.OutputMultistreamEn[k] == 1) {
8728 // if (display_cfg->output.OutputMultistreamId[k] == k && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_na)
8729 // mode_lib->ms.support.LinkRateForMultistreamNotIndicated = true;
8730 // if (display_cfg->output.OutputMultistreamId[k] == k && s->OutputBpp[k] == 0)
8731 // mode_lib->ms.support.BPPForMultistreamNotIndicated = true;
8732 // for (n = 0; n < mode_lib->ms.num_active_planes; ++n) {
8733 // if (display_cfg->output.OutputMultistreamId[k] == n && s->OutputBpp[k] == 0)
8734 // mode_lib->ms.support.BPPForMultistreamNotIndicated = true;
8735 // }
8736 //}
8737
8738 if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
8739 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi ||
8740 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl)) {
8741 // FIXME_STAGE2
8742 //if (display_cfg->output.OutputMultistreamEn[k] == 1 && display_cfg->output.OutputMultistreamId[k] == k)
8743 // mode_lib->ms.support.MultistreamWithHDMIOreDP = true;
8744 //for (n = 0; n < mode_lib->ms.num_active_planes; ++n) {
8745 // if (display_cfg->output.OutputMultistreamEn[k] == 1 && display_cfg->output.OutputMultistreamId[k] == n)
8746 // mode_lib->ms.support.MultistreamWithHDMIOreDP = true;
8747 //}
8748 }
8749 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp && (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_split_1to2 ||
8750 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to2 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to4))
8751 mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = true;
8752
8753 if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to2 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count < 2) ||
8754 (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to4 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count < 4))
8755 mode_lib->ms.support.NotEnoughLanesForMSO = true;
8756 }
8757 }
8758
8759 mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = false;
8760 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8761 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl &&
8762 !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
8763 mode_lib->ms.RequiredDTBCLK[k] = RequiredDTBCLK(
8764 mode_lib->ms.RequiresDSC[k],
8765 s->PixelClockBackEnd[k],
8766 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
8767 mode_lib->ms.OutputBpp[k],
8768 mode_lib->ms.support.NumberOfDSCSlices[k],
8769 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
8770 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
8771 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate,
8772 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout);
8773
8774 if (mode_lib->ms.RequiredDTBCLK[k] > ((double)min_clk_table->max_ss_clocks_khz.dtbclk / 1000)) {
8775 mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = true;
8776 }
8777 } else {
8778 /* Phantom DTBCLK can be calculated different from main because phantom has no DSC and thus
8779 * will have a different output BPP. Ignore phantom DTBCLK requirement and only consider
8780 * non-phantom DTBCLK requirements. In map_mode_to_soc_dpm we choose the highest DTBCLK
8781 * required - by setting phantom dtbclk to 0 we ignore it.
8782 */
8783 mode_lib->ms.RequiredDTBCLK[k] = 0;
8784 }
8785 }
8786
8787 mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = false;
8788 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8789 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp ||
8790 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 ||
8791 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
8792 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) {
8793 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420) {
8794 s->DSCFormatFactor = 2;
8795 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_444) {
8796 s->DSCFormatFactor = 1;
8797 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) {
8798 s->DSCFormatFactor = 2;
8799 } else {
8800 s->DSCFormatFactor = 1;
8801 }
8802 #ifdef __DML_VBA_DEBUG__
8803 DML_LOG_VERBOSE("DML::%s: k=%u, RequiresDSC = %u\n", __func__, k, mode_lib->ms.RequiresDSC[k]);
8804 #endif
8805 if (mode_lib->ms.RequiresDSC[k] == true) {
8806 s->PixelClockBackEndFactor = 3.0;
8807
8808 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
8809 s->PixelClockBackEndFactor = 12.0;
8810 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
8811 s->PixelClockBackEndFactor = 9.0;
8812 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
8813 s->PixelClockBackEndFactor = 6.0;
8814
8815 mode_lib->ms.required_dscclk_freq_mhz[k] = s->PixelClockBackEnd[k] / s->PixelClockBackEndFactor / (double)s->DSCFormatFactor;
8816 if (mode_lib->ms.required_dscclk_freq_mhz[k] > mode_lib->ms.max_dscclk_freq_mhz) {
8817 mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true;
8818 }
8819
8820 #ifdef __DML_VBA_DEBUG__
8821 DML_LOG_VERBOSE("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, s->PixelClockBackEnd[k]);
8822 DML_LOG_VERBOSE("DML::%s: k=%u, required_dscclk_freq_mhz = %f\n", __func__, k, mode_lib->ms.required_dscclk_freq_mhz[k]);
8823 DML_LOG_VERBOSE("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor);
8824 DML_LOG_VERBOSE("DML::%s: k=%u, DSCCLKRequiredMoreThanSupported = %u\n", __func__, k, mode_lib->ms.support.DSCCLKRequiredMoreThanSupported);
8825 #endif
8826 }
8827 }
8828 }
8829
8830 /* Check DSC Unit and Slices Support */
8831 mode_lib->ms.support.NotEnoughDSCSlices = false;
8832 s->TotalDSCUnitsRequired = 0;
8833 mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = true;
8834 memset(s->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool));
8835
8836 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8837 if (mode_lib->ms.RequiresDSC[k] == true && !s->stream_visited[display_cfg->plane_descriptors[k].stream_index]) {
8838 s->NumDSCUnitRequired = 1;
8839
8840 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
8841 s->NumDSCUnitRequired = 4;
8842 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
8843 s->NumDSCUnitRequired = 3;
8844 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
8845 s->NumDSCUnitRequired = 2;
8846
8847 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active > s->NumDSCUnitRequired * (unsigned int)mode_lib->ip.maximum_pixels_per_line_per_dsc_unit)
8848 mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false;
8849 s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + s->NumDSCUnitRequired;
8850
8851 if (mode_lib->ms.support.NumberOfDSCSlices[k] > 4 * s->NumDSCUnitRequired)
8852 mode_lib->ms.support.NotEnoughDSCSlices = true;
8853 }
8854 s->stream_visited[display_cfg->plane_descriptors[k].stream_index] = 1;
8855 }
8856
8857 mode_lib->ms.support.NotEnoughDSCUnits = false;
8858 if (s->TotalDSCUnitsRequired > (unsigned int)mode_lib->ip.num_dsc) {
8859 mode_lib->ms.support.NotEnoughDSCUnits = true;
8860 }
8861
8862 /*DSC Delay per state*/
8863 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8864 mode_lib->ms.DSCDelay[k] = DSCDelayRequirement(mode_lib->ms.RequiresDSC[k],
8865 mode_lib->ms.ODMMode[k],
8866 mode_lib->ip.maximum_dsc_bits_per_component,
8867 s->OutputBpp[k],
8868 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
8869 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
8870 mode_lib->ms.support.NumberOfDSCSlices[k],
8871 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
8872 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
8873 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
8874 s->PixelClockBackEnd[k]);
8875 }
8876
8877 // Figure out the swath and DET configuration after the num dpp per plane is figured out
8878 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false;
8879 CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.ODMMode;
8880 CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.NoOfDPP;
8881
8882 // output
8883 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = s->dummy_integer_array[0];
8884 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = s->dummy_integer_array[1];
8885 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub;
8886 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub;
8887 CalculateSwathAndDETConfiguration_params->SwathWidth = mode_lib->ms.SwathWidthY;
8888 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = mode_lib->ms.SwathWidthC;
8889 CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->ms.SwathHeightY;
8890 CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->ms.SwathHeightC;
8891 CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = mode_lib->ms.support.request_size_bytes_luma;
8892 CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = mode_lib->ms.support.request_size_bytes_chroma;
8893 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->ms.DETBufferSizeInKByte; // FIXME: This is per pipe but the pipes in plane will use that
8894 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY;
8895 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC;
8896 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->ms.UnboundedRequestEnabled;
8897 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = s->dummy_integer_array[3];
8898 CalculateSwathAndDETConfiguration_params->hw_debug5 = s->dummy_boolean_array[1];
8899 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->ms.CompressedBufferSizeInkByte;
8900 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = s->dummy_boolean_array[0];
8901 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &mode_lib->ms.support.ViewportSizeSupport;
8902
8903 CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params);
8904
8905 if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0) {
8906 for (k = 0; k < mode_lib->ms.num_active_planes; k++)
8907 mode_lib->ms.SurfaceSizeInMALL[k] = 0;
8908 mode_lib->ms.support.ExceededMALLSize = 0;
8909 } else {
8910 CalculateSurfaceSizeInMall(
8911 display_cfg,
8912 mode_lib->ms.num_active_planes,
8913 mode_lib->soc.mall_allocated_for_dcn_mbytes,
8914
8915 mode_lib->ms.BytePerPixelY,
8916 mode_lib->ms.BytePerPixelC,
8917 mode_lib->ms.Read256BlockWidthY,
8918 mode_lib->ms.Read256BlockWidthC,
8919 mode_lib->ms.Read256BlockHeightY,
8920 mode_lib->ms.Read256BlockHeightC,
8921 mode_lib->ms.MacroTileWidthY,
8922 mode_lib->ms.MacroTileWidthC,
8923 mode_lib->ms.MacroTileHeightY,
8924 mode_lib->ms.MacroTileHeightC,
8925
8926 /* Output */
8927 mode_lib->ms.SurfaceSizeInMALL,
8928 &mode_lib->ms.support.ExceededMALLSize);
8929 }
8930
8931 mode_lib->ms.TotalNumberOfDCCActiveDPP = 0;
8932 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8933 if (display_cfg->plane_descriptors[k].surface.dcc.enable == true) {
8934 mode_lib->ms.TotalNumberOfDCCActiveDPP = mode_lib->ms.TotalNumberOfDCCActiveDPP + mode_lib->ms.NoOfDPP[k];
8935 }
8936 }
8937
8938 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8939 s->SurfParameters[k].PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
8940 s->SurfParameters[k].DPPPerSurface = mode_lib->ms.NoOfDPP[k];
8941 s->SurfParameters[k].RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
8942 s->SurfParameters[k].ViewportHeight = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
8943 s->SurfParameters[k].ViewportHeightC = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
8944 s->SurfParameters[k].BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k];
8945 s->SurfParameters[k].BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k];
8946 s->SurfParameters[k].BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k];
8947 s->SurfParameters[k].BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k];
8948 s->SurfParameters[k].BlockWidthY = mode_lib->ms.MacroTileWidthY[k];
8949 s->SurfParameters[k].BlockHeightY = mode_lib->ms.MacroTileHeightY[k];
8950 s->SurfParameters[k].BlockWidthC = mode_lib->ms.MacroTileWidthC[k];
8951 s->SurfParameters[k].BlockHeightC = mode_lib->ms.MacroTileHeightC[k];
8952 s->SurfParameters[k].InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
8953 s->SurfParameters[k].HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
8954 s->SurfParameters[k].DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
8955 s->SurfParameters[k].SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
8956 s->SurfParameters[k].SurfaceTiling = display_cfg->plane_descriptors[k].surface.tiling;
8957 s->SurfParameters[k].BytePerPixelY = mode_lib->ms.BytePerPixelY[k];
8958 s->SurfParameters[k].BytePerPixelC = mode_lib->ms.BytePerPixelC[k];
8959 s->SurfParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
8960 s->SurfParameters[k].VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
8961 s->SurfParameters[k].VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
8962 s->SurfParameters[k].VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
8963 s->SurfParameters[k].VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
8964 s->SurfParameters[k].PitchY = display_cfg->plane_descriptors[k].surface.plane0.pitch;
8965 s->SurfParameters[k].PitchC = display_cfg->plane_descriptors[k].surface.plane1.pitch;
8966 s->SurfParameters[k].ViewportStationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
8967 s->SurfParameters[k].ViewportXStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
8968 s->SurfParameters[k].ViewportYStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
8969 s->SurfParameters[k].ViewportXStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
8970 s->SurfParameters[k].ViewportYStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
8971 s->SurfParameters[k].FORCE_ONE_ROW_FOR_FRAME = display_cfg->plane_descriptors[k].overrides.hw.force_one_row_for_frame;
8972 s->SurfParameters[k].SwathHeightY = mode_lib->ms.SwathHeightY[k];
8973 s->SurfParameters[k].SwathHeightC = mode_lib->ms.SwathHeightC[k];
8974
8975 s->SurfParameters[k].DCCMetaPitchY = display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch;
8976 s->SurfParameters[k].DCCMetaPitchC = display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch;
8977 }
8978
8979 CalculateVMRowAndSwath_params->display_cfg = display_cfg;
8980 CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
8981 CalculateVMRowAndSwath_params->myPipe = s->SurfParameters;
8982 CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->ms.SurfaceSizeInMALL;
8983 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
8984 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma;
8985 CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes;
8986 CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->ms.SwathWidthY;
8987 CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->ms.SwathWidthC;
8988 CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes * 1024;
8989 CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes;
8990 CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present;
8991
8992 // output
8993 CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceeded;
8994 CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = s->dummy_integer_array[12];
8995 CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = s->dummy_integer_array[13];
8996 CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->ms.dpte_row_height;
8997 CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->ms.dpte_row_height_chroma;
8998 CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = s->dummy_integer_array[14]; // VBA_DELTA
8999 CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = s->dummy_integer_array[15]; // VBA_DELTA
9000 CalculateVMRowAndSwath_params->vm_group_bytes = s->dummy_integer_array[16];
9001 CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
9002 CalculateVMRowAndSwath_params->PixelPTEReqWidthY = s->dummy_integer_array[17];
9003 CalculateVMRowAndSwath_params->PixelPTEReqHeightY = s->dummy_integer_array[18];
9004 CalculateVMRowAndSwath_params->PTERequestSizeY = s->dummy_integer_array[19];
9005 CalculateVMRowAndSwath_params->PixelPTEReqWidthC = s->dummy_integer_array[20];
9006 CalculateVMRowAndSwath_params->PixelPTEReqHeightC = s->dummy_integer_array[21];
9007 CalculateVMRowAndSwath_params->PTERequestSizeC = s->dummy_integer_array[22];
9008 CalculateVMRowAndSwath_params->vmpg_width_y = s->vmpg_width_y;
9009 CalculateVMRowAndSwath_params->vmpg_height_y = s->vmpg_height_y;
9010 CalculateVMRowAndSwath_params->vmpg_width_c = s->vmpg_width_c;
9011 CalculateVMRowAndSwath_params->vmpg_height_c = s->vmpg_height_c;
9012 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = s->dummy_integer_array[23];
9013 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = s->dummy_integer_array[24];
9014 CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY;
9015 CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC;
9016 CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->ms.PrefillY;
9017 CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->ms.PrefillC;
9018 CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY;
9019 CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC;
9020 CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->ms.dpte_row_bw;
9021 CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow;
9022 CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_l = s->dpte_row_bytes_per_row_l;
9023 CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_c = s->dpte_row_bytes_per_row_c;
9024 CalculateVMRowAndSwath_params->vm_bytes = mode_lib->ms.vm_bytes;
9025 CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->ms.use_one_row_for_frame;
9026 CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->ms.use_one_row_for_frame_flip;
9027 CalculateVMRowAndSwath_params->is_using_mall_for_ss = s->dummy_boolean_array[0];
9028 CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = s->dummy_boolean_array[1];
9029 CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = s->dummy_integer_array[25];
9030 CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = mode_lib->ms.DCCMetaBufferSizeNotExceeded;
9031 CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->ms.meta_row_bw;
9032 CalculateVMRowAndSwath_params->meta_row_bytes = mode_lib->ms.meta_row_bytes;
9033 CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l;
9034 CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c;
9035 CalculateVMRowAndSwath_params->meta_req_width_luma = s->dummy_integer_array[26];
9036 CalculateVMRowAndSwath_params->meta_req_height_luma = s->dummy_integer_array[27];
9037 CalculateVMRowAndSwath_params->meta_row_width_luma = s->dummy_integer_array[28];
9038 CalculateVMRowAndSwath_params->meta_row_height_luma = s->meta_row_height_luma;
9039 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = s->dummy_integer_array[29];
9040 CalculateVMRowAndSwath_params->meta_req_width_chroma = s->dummy_integer_array[30];
9041 CalculateVMRowAndSwath_params->meta_req_height_chroma = s->dummy_integer_array[31];
9042 CalculateVMRowAndSwath_params->meta_row_width_chroma = s->dummy_integer_array[32];
9043 CalculateVMRowAndSwath_params->meta_row_height_chroma = s->meta_row_height_chroma;
9044 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = s->dummy_integer_array[33];
9045
9046 CalculateVMRowAndSwath(&mode_lib->scratch, CalculateVMRowAndSwath_params);
9047
9048 mode_lib->ms.support.PTEBufferSizeNotExceeded = true;
9049 mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = true;
9050
9051 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9052 if (mode_lib->ms.PTEBufferSizeNotExceeded[k] == false)
9053 mode_lib->ms.support.PTEBufferSizeNotExceeded = false;
9054
9055 if (mode_lib->ms.DCCMetaBufferSizeNotExceeded[k] == false)
9056 mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = false;
9057
9058 #ifdef __DML_VBA_DEBUG__
9059 DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.PTEBufferSizeNotExceeded[k]);
9060 DML_LOG_VERBOSE("DML::%s: k=%u, DCCMetaBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.DCCMetaBufferSizeNotExceeded[k]);
9061 #endif
9062 }
9063 #ifdef __DML_VBA_DEBUG__
9064 DML_LOG_VERBOSE("DML::%s: PTEBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.PTEBufferSizeNotExceeded);
9065 DML_LOG_VERBOSE("DML::%s: DCCMetaBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.DCCMetaBufferSizeNotExceeded);
9066 #endif
9067
9068 /* VActive bytes to fetch for UCLK P-State */
9069 calculate_bytes_to_fetch_required_to_hide_latency_params->display_cfg = display_cfg;
9070 calculate_bytes_to_fetch_required_to_hide_latency_params->mrq_present = mode_lib->ip.dcn_mrq_present;
9071
9072 calculate_bytes_to_fetch_required_to_hide_latency_params->num_active_planes = mode_lib->ms.num_active_planes;
9073 calculate_bytes_to_fetch_required_to_hide_latency_params->num_of_dpp = mode_lib->ms.NoOfDPP;
9074 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_l = s->meta_row_height_luma;
9075 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_c = s->meta_row_height_chroma;
9076 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l;
9077 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c;
9078 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_l = mode_lib->ms.dpte_row_height;
9079 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_c = mode_lib->ms.dpte_row_height_chroma;
9080 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_l = s->dpte_row_bytes_per_row_l;
9081 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_c = s->dpte_row_bytes_per_row_c;
9082 calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_l = mode_lib->ms.BytePerPixelY;
9083 calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_c = mode_lib->ms.BytePerPixelC;
9084 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_l = mode_lib->ms.SwathWidthY;
9085 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_c = mode_lib->ms.SwathWidthC;
9086 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_l = mode_lib->ms.SwathHeightY;
9087 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_c = mode_lib->ms.SwathHeightC;
9088 calculate_bytes_to_fetch_required_to_hide_latency_params->latency_to_hide_us[0] = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
9089
9090 /* outputs */
9091 calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_l = s->pstate_bytes_required_l[dml2_pstate_type_uclk];
9092 calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_c = s->pstate_bytes_required_c[dml2_pstate_type_uclk];
9093
9094 calculate_bytes_to_fetch_required_to_hide_latency(calculate_bytes_to_fetch_required_to_hide_latency_params);
9095
9096 /* Excess VActive bandwidth required to fill DET */
9097 calculate_excess_vactive_bandwidth_required(
9098 display_cfg,
9099 mode_lib->ms.num_active_planes,
9100 s->pstate_bytes_required_l[dml2_pstate_type_uclk],
9101 s->pstate_bytes_required_c[dml2_pstate_type_uclk],
9102 /* outputs */
9103 mode_lib->ms.excess_vactive_fill_bw_l,
9104 mode_lib->ms.excess_vactive_fill_bw_c);
9105
9106 mode_lib->ms.UrgLatency = CalculateUrgentLatency(
9107 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us,
9108 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us,
9109 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us,
9110 mode_lib->soc.do_urgent_latency_adjustment,
9111 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us,
9112 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz,
9113 mode_lib->ms.FabricClock,
9114 mode_lib->ms.uclk_freq_mhz,
9115 mode_lib->soc.qos_parameters.qos_type,
9116 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles,
9117 mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles,
9118 mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
9119 mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
9120 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin,
9121 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
9122
9123 mode_lib->ms.TripToMemory = CalculateTripToMemory(
9124 mode_lib->ms.UrgLatency,
9125 mode_lib->ms.FabricClock,
9126 mode_lib->ms.uclk_freq_mhz,
9127 mode_lib->soc.qos_parameters.qos_type,
9128 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles,
9129 mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
9130 mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
9131 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin,
9132 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
9133
9134 mode_lib->ms.TripToMemory = math_max2(mode_lib->ms.UrgLatency, mode_lib->ms.TripToMemory);
9135
9136 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9137 double line_time_us = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
9138 bool cursor_not_enough_urgent_latency_hiding = false;
9139
9140 if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) {
9141 calculate_cursor_req_attributes(
9142 display_cfg->plane_descriptors[k].cursor.cursor_width,
9143 display_cfg->plane_descriptors[k].cursor.cursor_bpp,
9144
9145 // output
9146 &s->cursor_lines_per_chunk[k],
9147 &s->cursor_bytes_per_line[k],
9148 &s->cursor_bytes_per_chunk[k],
9149 &s->cursor_bytes[k]);
9150
9151 calculate_cursor_urgent_burst_factor(
9152 mode_lib->ip.cursor_buffer_size,
9153 display_cfg->plane_descriptors[k].cursor.cursor_width,
9154 s->cursor_bytes_per_chunk[k],
9155 s->cursor_lines_per_chunk[k],
9156 line_time_us,
9157 mode_lib->ms.UrgLatency,
9158
9159 // output
9160 &mode_lib->ms.UrgentBurstFactorCursor[k],
9161 &cursor_not_enough_urgent_latency_hiding);
9162 }
9163
9164 mode_lib->ms.UrgentBurstFactorCursorPre[k] = mode_lib->ms.UrgentBurstFactorCursor[k];
9165
9166 #ifdef __DML_VBA_DEBUG__
9167 DML_LOG_VERBOSE("DML::%s: k=%d, Calling CalculateUrgentBurstFactor\n", __func__, k);
9168 DML_LOG_VERBOSE("DML::%s: k=%d, VRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
9169 DML_LOG_VERBOSE("DML::%s: k=%d, VRatioChroma=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio);
9170 #endif
9171
9172 CalculateUrgentBurstFactor(
9173 &display_cfg->plane_descriptors[k],
9174 mode_lib->ms.swath_width_luma_ub[k],
9175 mode_lib->ms.swath_width_chroma_ub[k],
9176 mode_lib->ms.SwathHeightY[k],
9177 mode_lib->ms.SwathHeightC[k],
9178 line_time_us,
9179 mode_lib->ms.UrgLatency,
9180 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
9181 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
9182 mode_lib->ms.BytePerPixelInDETY[k],
9183 mode_lib->ms.BytePerPixelInDETC[k],
9184 mode_lib->ms.DETBufferSizeY[k],
9185 mode_lib->ms.DETBufferSizeC[k],
9186
9187 // Output
9188 &mode_lib->ms.UrgentBurstFactorLuma[k],
9189 &mode_lib->ms.UrgentBurstFactorChroma[k],
9190 &mode_lib->ms.NotEnoughUrgentLatencyHiding[k]);
9191
9192 mode_lib->ms.NotEnoughUrgentLatencyHiding[k] = mode_lib->ms.NotEnoughUrgentLatencyHiding[k] || cursor_not_enough_urgent_latency_hiding;
9193 }
9194
9195 CalculateDCFCLKDeepSleep(
9196 display_cfg,
9197 mode_lib->ms.num_active_planes,
9198 mode_lib->ms.BytePerPixelY,
9199 mode_lib->ms.BytePerPixelC,
9200 mode_lib->ms.SwathWidthY,
9201 mode_lib->ms.SwathWidthC,
9202 mode_lib->ms.NoOfDPP,
9203 mode_lib->ms.PSCL_FACTOR,
9204 mode_lib->ms.PSCL_FACTOR_CHROMA,
9205 mode_lib->ms.RequiredDPPCLK,
9206 mode_lib->ms.vactive_sw_bw_l,
9207 mode_lib->ms.vactive_sw_bw_c,
9208 mode_lib->soc.return_bus_width_bytes,
9209
9210 /* Output */
9211 &mode_lib->ms.dcfclk_deepsleep);
9212
9213 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
9214 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
9215 mode_lib->ms.WritebackDelayTime[k] = mode_lib->soc.qos_parameters.writeback.base_latency_us + CalculateWriteBackDelay(
9216 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format,
9217 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio,
9218 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio,
9219 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps,
9220 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width,
9221 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height,
9222 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height,
9223 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->ms.RequiredDISPCLK;
9224 } else {
9225 mode_lib->ms.WritebackDelayTime[k] = 0.0;
9226 }
9227 }
9228
9229 // MaximumVStartup is actually Tvstartup_min in DCN4 programming guide
9230 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
9231 bool isInterlaceTiming = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !mode_lib->ip.ptoi_supported);
9232 s->MaximumVStartup[k] = CalculateMaxVStartup(
9233 mode_lib->ip.ptoi_supported,
9234 mode_lib->ip.vblank_nom_default_us,
9235 &display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing,
9236 mode_lib->ms.WritebackDelayTime[k]);
9237 mode_lib->ms.MaxVStartupLines[k] = (isInterlaceTiming ? (2 * s->MaximumVStartup[k]) : s->MaximumVStartup[k]);
9238 }
9239
9240 #ifdef __DML_VBA_DEBUG__
9241 DML_LOG_VERBOSE("DML::%s: k=%u, MaximumVStartup = %u\n", __func__, k, s->MaximumVStartup[k]);
9242 #endif
9243
9244 /* Immediate Flip and MALL parameters */
9245 s->ImmediateFlipRequired = false;
9246 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9247 s->ImmediateFlipRequired = s->ImmediateFlipRequired || display_cfg->plane_descriptors[k].immediate_flip;
9248 }
9249
9250 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = false;
9251 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9252 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe =
9253 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe ||
9254 ((display_cfg->hostvm_enable == true || display_cfg->plane_descriptors[k].immediate_flip == true) &&
9255 (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame || dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])));
9256 }
9257
9258 mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = false;
9259 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9260 mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen ||
9261 ((display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable || display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_auto) && (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))) ||
9262 ((display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_disable || display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_auto) && (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame));
9263 }
9264
9265 s->FullFrameMALLPStateMethod = false;
9266 s->SubViewportMALLPStateMethod = false;
9267 s->PhantomPipeMALLPStateMethod = false;
9268 s->SubViewportMALLRefreshGreaterThan120Hz = false;
9269 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9270 if (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame)
9271 s->FullFrameMALLPStateMethod = true;
9272 if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) {
9273 s->SubViewportMALLPStateMethod = true;
9274 if (!display_cfg->overrides.enable_subvp_implicit_pmo) {
9275 // For dv, small frame tests will have very high refresh rate
9276 unsigned long long refresh_rate = (unsigned long long) ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz * 1000 /
9277 (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
9278 (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total);
9279 if (refresh_rate > 120)
9280 s->SubViewportMALLRefreshGreaterThan120Hz = true;
9281 }
9282 }
9283 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))
9284 s->PhantomPipeMALLPStateMethod = true;
9285 }
9286 mode_lib->ms.support.InvalidCombinationOfMALLUseForPState = (s->SubViewportMALLPStateMethod != s->PhantomPipeMALLPStateMethod) ||
9287 (s->SubViewportMALLPStateMethod && s->FullFrameMALLPStateMethod) || s->SubViewportMALLRefreshGreaterThan120Hz;
9288
9289 #ifdef __DML_VBA_DEBUG__
9290 DML_LOG_VERBOSE("DML::%s: SubViewportMALLPStateMethod = %u\n", __func__, s->SubViewportMALLPStateMethod);
9291 DML_LOG_VERBOSE("DML::%s: PhantomPipeMALLPStateMethod = %u\n", __func__, s->PhantomPipeMALLPStateMethod);
9292 DML_LOG_VERBOSE("DML::%s: FullFrameMALLPStateMethod = %u\n", __func__, s->FullFrameMALLPStateMethod);
9293 DML_LOG_VERBOSE("DML::%s: SubViewportMALLRefreshGreaterThan120Hz = %u\n", __func__, s->SubViewportMALLRefreshGreaterThan120Hz);
9294 DML_LOG_VERBOSE("DML::%s: InvalidCombinationOfMALLUseForPState = %u\n", __func__, mode_lib->ms.support.InvalidCombinationOfMALLUseForPState);
9295 DML_LOG_VERBOSE("DML::%s: in_out_params->min_clk_index = %u\n", __func__, in_out_params->min_clk_index);
9296 DML_LOG_VERBOSE("DML::%s: mode_lib->ms.DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK);
9297 DML_LOG_VERBOSE("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock);
9298 DML_LOG_VERBOSE("DML::%s: mode_lib->ms.uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz);
9299 #endif
9300
9301 mode_lib->ms.support.OutstandingRequestsSupport = true;
9302 mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = true;
9303
9304 mode_lib->ms.support.avg_urgent_latency_us
9305 = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz
9306 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0)
9307 + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock)
9308 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0);
9309
9310 mode_lib->ms.support.avg_non_urgent_latency_us
9311 = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz
9312 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0)
9313 + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock)
9314 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0);
9315
9316 mode_lib->ms.support.max_non_urgent_latency_us
9317 = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles
9318 / mode_lib->ms.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0)
9319 + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock
9320 + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock
9321 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0);
9322
9323 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
9324
9325 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) {
9326 outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_luma[k]
9327 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes));
9328
9329 if (outstanding_latency_us < mode_lib->ms.support.avg_urgent_latency_us) {
9330 mode_lib->ms.support.OutstandingRequestsSupport = false;
9331 }
9332
9333 if (outstanding_latency_us < mode_lib->ms.support.avg_non_urgent_latency_us) {
9334 mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false;
9335 }
9336
9337 #ifdef __DML_VBA_DEBUG__
9338 DML_LOG_VERBOSE("DML::%s: avg_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_urgent_latency_us);
9339 DML_LOG_VERBOSE("DML::%s: avg_non_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_non_urgent_latency_us);
9340 DML_LOG_VERBOSE("DML::%s: k=%d, request_size_bytes_luma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_luma[k]);
9341 DML_LOG_VERBOSE("DML::%s: k=%d, outstanding_latency_us = %f (luma)\n", __func__, k, outstanding_latency_us);
9342 #endif
9343 }
9344
9345 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x && mode_lib->ms.BytePerPixelC[k] > 0) {
9346 outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_chroma[k]
9347 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes));
9348
9349 if (outstanding_latency_us < mode_lib->ms.support.avg_urgent_latency_us) {
9350 mode_lib->ms.support.OutstandingRequestsSupport = false;
9351 }
9352
9353 if (outstanding_latency_us < mode_lib->ms.support.avg_non_urgent_latency_us) {
9354 mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false;
9355 }
9356 #ifdef __DML_VBA_DEBUG__
9357 DML_LOG_VERBOSE("DML::%s: k=%d, request_size_bytes_chroma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_chroma[k]);
9358 DML_LOG_VERBOSE("DML::%s: k=%d, outstanding_latency_us = %f (chroma)\n", __func__, k, outstanding_latency_us);
9359 #endif
9360 }
9361 }
9362
9363 memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params));
9364 if (mode_lib->soc.mcache_size_bytes == 0 || mode_lib->ip.dcn_mrq_present) {
9365 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
9366 mode_lib->ms.mall_prefetch_sdp_overhead_factor[k] = 1.0;
9367 mode_lib->ms.mall_prefetch_dram_overhead_factor[k] = 1.0;
9368 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0[k] = 1.0;
9369 mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0[k] = 1.0;
9370 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1[k] = 1.0;
9371 mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1[k] = 1.0;
9372 }
9373 } else {
9374 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
9375 calculate_mcache_setting_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
9376 calculate_mcache_setting_params->num_chans = mode_lib->soc.clk_table.dram_config.channel_count;
9377 calculate_mcache_setting_params->mem_word_bytes = mode_lib->soc.mem_word_bytes;
9378 calculate_mcache_setting_params->mcache_size_bytes = mode_lib->soc.mcache_size_bytes;
9379 calculate_mcache_setting_params->mcache_line_size_bytes = mode_lib->soc.mcache_line_size_bytes;
9380 calculate_mcache_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
9381 calculate_mcache_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
9382
9383 calculate_mcache_setting_params->source_format = display_cfg->plane_descriptors[k].pixel_format;
9384 calculate_mcache_setting_params->surf_vert = dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle);
9385 calculate_mcache_setting_params->vp_stationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
9386 calculate_mcache_setting_params->tiling_mode = display_cfg->plane_descriptors[k].surface.tiling;
9387 calculate_mcache_setting_params->imall_enable = mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall;
9388
9389 calculate_mcache_setting_params->vp_start_x_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
9390 calculate_mcache_setting_params->vp_start_y_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
9391 calculate_mcache_setting_params->full_vp_width_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
9392 calculate_mcache_setting_params->full_vp_height_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
9393 calculate_mcache_setting_params->blk_width_l = mode_lib->ms.MacroTileWidthY[k];
9394 calculate_mcache_setting_params->blk_height_l = mode_lib->ms.MacroTileHeightY[k];
9395 calculate_mcache_setting_params->vmpg_width_l = s->vmpg_width_y[k];
9396 calculate_mcache_setting_params->vmpg_height_l = s->vmpg_height_y[k];
9397 calculate_mcache_setting_params->full_swath_bytes_l = s->full_swath_bytes_l[k];
9398 calculate_mcache_setting_params->bytes_per_pixel_l = mode_lib->ms.BytePerPixelY[k];
9399
9400 calculate_mcache_setting_params->vp_start_x_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.x_start;
9401 calculate_mcache_setting_params->vp_start_y_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
9402 calculate_mcache_setting_params->full_vp_width_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.width;
9403 calculate_mcache_setting_params->full_vp_height_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
9404 calculate_mcache_setting_params->blk_width_c = mode_lib->ms.MacroTileWidthC[k];
9405 calculate_mcache_setting_params->blk_height_c = mode_lib->ms.MacroTileHeightC[k];
9406 calculate_mcache_setting_params->vmpg_width_c = s->vmpg_width_c[k];
9407 calculate_mcache_setting_params->vmpg_height_c = s->vmpg_height_c[k];
9408 calculate_mcache_setting_params->full_swath_bytes_c = s->full_swath_bytes_c[k];
9409 calculate_mcache_setting_params->bytes_per_pixel_c = mode_lib->ms.BytePerPixelC[k];
9410
9411 // output
9412 calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_l = &mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0[k];
9413 calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_l = &mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0[k];
9414 calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_c = &mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1[k];
9415 calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_c = &mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1[k];
9416
9417 calculate_mcache_setting_params->num_mcaches_l = &mode_lib->ms.num_mcaches_l[k];
9418 calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->ms.mcache_row_bytes_l[k];
9419 calculate_mcache_setting_params->mcache_row_bytes_per_channel_l = &mode_lib->ms.mcache_row_bytes_per_channel_l[k];
9420 calculate_mcache_setting_params->mcache_offsets_l = mode_lib->ms.mcache_offsets_l[k];
9421 calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->ms.mcache_shift_granularity_l[k];
9422
9423 calculate_mcache_setting_params->num_mcaches_c = &mode_lib->ms.num_mcaches_c[k];
9424 calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->ms.mcache_row_bytes_c[k];
9425 calculate_mcache_setting_params->mcache_row_bytes_per_channel_c = &mode_lib->ms.mcache_row_bytes_per_channel_c[k];
9426 calculate_mcache_setting_params->mcache_offsets_c = mode_lib->ms.mcache_offsets_c[k];
9427 calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->ms.mcache_shift_granularity_c[k];
9428
9429 calculate_mcache_setting_params->mall_comb_mcache_l = &mode_lib->ms.mall_comb_mcache_l[k];
9430 calculate_mcache_setting_params->mall_comb_mcache_c = &mode_lib->ms.mall_comb_mcache_c[k];
9431 calculate_mcache_setting_params->lc_comb_mcache = &mode_lib->ms.lc_comb_mcache[k];
9432
9433 calculate_mcache_setting(&mode_lib->scratch, calculate_mcache_setting_params);
9434 }
9435
9436 calculate_mall_bw_overhead_factor(
9437 mode_lib->ms.mall_prefetch_sdp_overhead_factor,
9438 mode_lib->ms.mall_prefetch_dram_overhead_factor,
9439
9440 // input
9441 display_cfg,
9442 mode_lib->ms.num_active_planes);
9443 }
9444
9445 // Calculate all the bandwidth available
9446 // Need anothe bw for latency evaluation
9447 calculate_bandwidth_available(
9448 mode_lib->ms.support.avg_bandwidth_available_min, // not used
9449 mode_lib->ms.support.avg_bandwidth_available, // not used
9450 mode_lib->ms.support.urg_bandwidth_available_min_latency,
9451 mode_lib->ms.support.urg_bandwidth_available, // not used
9452 mode_lib->ms.support.urg_bandwidth_available_vm_only, // not used
9453 mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm, // not used
9454
9455 &mode_lib->soc,
9456 display_cfg->hostvm_enable,
9457 mode_lib->ms.DCFCLK,
9458 mode_lib->ms.FabricClock,
9459 mode_lib->ms.dram_bw_mbps);
9460
9461 calculate_bandwidth_available(
9462 mode_lib->ms.support.avg_bandwidth_available_min,
9463 mode_lib->ms.support.avg_bandwidth_available,
9464 mode_lib->ms.support.urg_bandwidth_available_min,
9465 mode_lib->ms.support.urg_bandwidth_available,
9466 mode_lib->ms.support.urg_bandwidth_available_vm_only,
9467 mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm,
9468
9469 &mode_lib->soc,
9470 display_cfg->hostvm_enable,
9471 mode_lib->ms.MaxDCFCLK,
9472 mode_lib->ms.MaxFabricClock,
9473 #ifdef DML_MODE_SUPPORT_USE_DPM_DRAM_BW
9474 mode_lib->ms.dram_bw_mbps);
9475 #else
9476 mode_lib->ms.max_dram_bw_mbps);
9477 #endif
9478
9479 // Average BW support check
9480 calculate_avg_bandwidth_required(
9481 mode_lib->ms.support.avg_bandwidth_required,
9482 // input
9483 display_cfg,
9484 mode_lib->ms.num_active_planes,
9485 mode_lib->ms.vactive_sw_bw_l,
9486 mode_lib->ms.vactive_sw_bw_c,
9487 mode_lib->ms.cursor_bw,
9488 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0,
9489 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1,
9490 mode_lib->ms.mall_prefetch_dram_overhead_factor,
9491 mode_lib->ms.mall_prefetch_sdp_overhead_factor);
9492
9493 for (m = 0; m < dml2_core_internal_bw_max; m++) { // check sdp and dram
9494 mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_sys_idle][m] = 1;
9495 mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_sys_active][m] = (mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][m] <= mode_lib->ms.support.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][m]);
9496 mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_svp_prefetch][m] = (mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][m] <= mode_lib->ms.support.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][m]);
9497 }
9498
9499 mode_lib->ms.support.AvgBandwidthSupport = true;
9500 mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = true;
9501 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9502 if (mode_lib->ms.NotEnoughUrgentLatencyHiding[k]) {
9503 mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = false;
9504 DML_LOG_VERBOSE("DML::%s: k=%u NotEnoughUrgentLatencyHiding set\n", __func__, k);
9505
9506 }
9507 }
9508 for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
9509 for (n = 0; n < dml2_core_internal_bw_max; n++) { // check sdp and dram
9510 if (!mode_lib->ms.support.avg_bandwidth_support_ok[m][n] && (m == dml2_core_internal_soc_state_sys_active || mode_lib->soc.mall_allocated_for_dcn_mbytes > 0)) {
9511 mode_lib->ms.support.AvgBandwidthSupport = false;
9512 #ifdef __DML_VBA_DEBUG__
9513 DML_LOG_VERBOSE("DML::%s: avg_bandwidth_support_ok[%s][%s] not ok\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n));
9514 #endif
9515 }
9516 }
9517 }
9518
9519 dml_core_ms_prefetch_check(mode_lib, display_cfg);
9520
9521 mode_lib->ms.support.max_urgent_latency_us = s->mSOCParameters.max_urgent_latency_us;
9522
9523 //Re-ordering Buffer Support Check
9524 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) {
9525 if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024
9526 / mode_lib->ms.support.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= s->mSOCParameters.max_urgent_latency_us) {
9527 mode_lib->ms.support.ROBSupport = true;
9528 } else {
9529 mode_lib->ms.support.ROBSupport = false;
9530 }
9531 } else {
9532 if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) {
9533 mode_lib->ms.support.ROBSupport = true;
9534 } else {
9535 mode_lib->ms.support.ROBSupport = false;
9536 }
9537 }
9538
9539 /* VActive fill time calculations (informative) */
9540 calculate_vactive_det_fill_latency(
9541 display_cfg,
9542 mode_lib->ms.num_active_planes,
9543 s->pstate_bytes_required_l[dml2_pstate_type_uclk],
9544 s->pstate_bytes_required_c[dml2_pstate_type_uclk],
9545 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0,
9546 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1,
9547 mode_lib->ms.vactive_sw_bw_l,
9548 mode_lib->ms.vactive_sw_bw_c,
9549 mode_lib->ms.surface_avg_vactive_required_bw,
9550 mode_lib->ms.surface_peak_required_bw,
9551 /* outputs */
9552 mode_lib->ms.pstate_vactive_det_fill_delay_us[dml2_pstate_type_uclk]);
9553
9554 #ifdef __DML_VBA_DEBUG__
9555 DML_LOG_VERBOSE("DML::%s: max_urgent_latency_us = %f\n", __func__, s->mSOCParameters.max_urgent_latency_us);
9556 DML_LOG_VERBOSE("DML::%s: ROBSupport = %u\n", __func__, mode_lib->ms.support.ROBSupport);
9557 #endif
9558
9559 /*Mode Support, Voltage State and SOC Configuration*/
9560 {
9561 if (mode_lib->ms.support.ScaleRatioAndTapsSupport
9562 && mode_lib->ms.support.SourceFormatPixelAndScanSupport
9563 && mode_lib->ms.support.ViewportSizeSupport
9564 && !mode_lib->ms.support.LinkRateDoesNotMatchDPVersion
9565 && !mode_lib->ms.support.LinkRateForMultistreamNotIndicated
9566 && !mode_lib->ms.support.BPPForMultistreamNotIndicated
9567 && !mode_lib->ms.support.MultistreamWithHDMIOreDP
9568 && !mode_lib->ms.support.ExceededMultistreamSlots
9569 && !mode_lib->ms.support.MSOOrODMSplitWithNonDPLink
9570 && !mode_lib->ms.support.NotEnoughLanesForMSO
9571 && !mode_lib->ms.support.P2IWith420
9572 && !mode_lib->ms.support.DSC422NativeNotSupported
9573 && mode_lib->ms.support.DSCSlicesODMModeSupported
9574 && !mode_lib->ms.support.NotEnoughDSCUnits
9575 && !mode_lib->ms.support.NotEnoughDSCSlices
9576 && !mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe
9577 && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen
9578 && !mode_lib->ms.support.DSCCLKRequiredMoreThanSupported
9579 && mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport
9580 && !mode_lib->ms.support.DTBCLKRequiredMoreThanSupported
9581 && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPState
9582 && mode_lib->ms.support.ROBSupport
9583 && mode_lib->ms.support.OutstandingRequestsSupport
9584 && mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance
9585 && mode_lib->ms.support.DISPCLK_DPPCLK_Support
9586 && mode_lib->ms.support.TotalAvailablePipesSupport
9587 && mode_lib->ms.support.NumberOfOTGSupport
9588 && mode_lib->ms.support.NumberOfHDMIFRLSupport
9589 && mode_lib->ms.support.NumberOfDP2p0Support
9590 && mode_lib->ms.support.EnoughWritebackUnits
9591 && mode_lib->ms.support.WritebackLatencySupport
9592 && mode_lib->ms.support.WritebackScaleRatioAndTapsSupport
9593 && mode_lib->ms.support.CursorSupport
9594 && mode_lib->ms.support.PitchSupport
9595 && !mode_lib->ms.support.ViewportExceedsSurface
9596 && mode_lib->ms.support.PrefetchSupported
9597 && mode_lib->ms.support.EnoughUrgentLatencyHidingSupport
9598 && mode_lib->ms.support.AvgBandwidthSupport
9599 && mode_lib->ms.support.DynamicMetadataSupported
9600 && mode_lib->ms.support.VRatioInPrefetchSupported
9601 && mode_lib->ms.support.PTEBufferSizeNotExceeded
9602 && mode_lib->ms.support.DCCMetaBufferSizeNotExceeded
9603 && !mode_lib->ms.support.ExceededMALLSize
9604 && mode_lib->ms.support.g6_temp_read_support
9605 && ((!display_cfg->hostvm_enable && !s->ImmediateFlipRequired) || mode_lib->ms.support.ImmediateFlipSupport)) {
9606 DML_LOG_VERBOSE("DML::%s: mode is supported\n", __func__);
9607 mode_lib->ms.support.ModeSupport = true;
9608 } else {
9609 DML_LOG_VERBOSE("DML::%s: mode is NOT supported\n", __func__);
9610 mode_lib->ms.support.ModeSupport = false;
9611 }
9612 }
9613
9614 // Since now the mode_support work on 1 particular power state, so there is only 1 state idx (index 0).
9615 DML_LOG_VERBOSE("DML::%s: ModeSupport = %u\n", __func__, mode_lib->ms.support.ModeSupport);
9616 DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport);
9617
9618 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
9619 mode_lib->ms.support.MPCCombineEnable[k] = mode_lib->ms.MPCCombine[k];
9620 mode_lib->ms.support.DPPPerSurface[k] = mode_lib->ms.NoOfDPP[k];
9621 }
9622
9623 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
9624 mode_lib->ms.support.ODMMode[k] = mode_lib->ms.ODMMode[k];
9625 mode_lib->ms.support.DSCEnabled[k] = mode_lib->ms.RequiresDSC[k];
9626 mode_lib->ms.support.FECEnabled[k] = mode_lib->ms.RequiresFEC[k];
9627 mode_lib->ms.support.OutputBpp[k] = mode_lib->ms.OutputBpp[k];
9628 mode_lib->ms.support.OutputType[k] = mode_lib->ms.OutputType[k];
9629 mode_lib->ms.support.OutputRate[k] = mode_lib->ms.OutputRate[k];
9630
9631 #if defined(__DML_VBA_DEBUG__)
9632 DML_LOG_VERBOSE("DML::%s: k=%d, ODMMode = %u\n", __func__, k, mode_lib->ms.support.ODMMode[k]);
9633 DML_LOG_VERBOSE("DML::%s: k=%d, DSCEnabled = %u\n", __func__, k, mode_lib->ms.support.DSCEnabled[k]);
9634 #endif
9635 }
9636
9637 #if defined(__DML_VBA_DEBUG__)
9638 if (!mode_lib->ms.support.ModeSupport)
9639 dml2_print_mode_support_info(&mode_lib->ms.support, true);
9640
9641 DML_LOG_VERBOSE("DML::%s: --- DONE --- \n", __func__);
9642 #endif
9643
9644 return mode_lib->ms.support.ModeSupport;
9645 }
9646
dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support_ex * in_out_params)9647 unsigned int dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support_ex *in_out_params)
9648 {
9649 unsigned int result;
9650
9651 DML_LOG_VERBOSE("DML::%s: ------------- START ----------\n", __func__);
9652 result = dml_core_mode_support(in_out_params);
9653
9654 if (result)
9655 *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support;
9656
9657 DML_LOG_VERBOSE("DML::%s: is_mode_support = %u (min_clk_index=%d)\n", __func__, result, in_out_params->min_clk_index);
9658
9659 for (unsigned int k = 0; k < in_out_params->in_display_cfg->num_planes; k++)
9660 DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, in_out_params->in_display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
9661
9662 DML_LOG_VERBOSE("DML::%s: ------------- DONE ----------\n", __func__);
9663
9664 return result;
9665 }
9666
CalculatePixelDeliveryTimes(const struct dml2_display_cfg * display_cfg,const struct core_display_cfg_support_info * cfg_support_info,unsigned int NumberOfActiveSurfaces,double VRatioPrefetchY[],double VRatioPrefetchC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],unsigned int BytePerPixelC[],unsigned int req_per_swath_ub_l[],unsigned int req_per_swath_ub_c[],double DisplayPipeLineDeliveryTimeLuma[],double DisplayPipeLineDeliveryTimeChroma[],double DisplayPipeLineDeliveryTimeLumaPrefetch[],double DisplayPipeLineDeliveryTimeChromaPrefetch[],double DisplayPipeRequestDeliveryTimeLuma[],double DisplayPipeRequestDeliveryTimeChroma[],double DisplayPipeRequestDeliveryTimeLumaPrefetch[],double DisplayPipeRequestDeliveryTimeChromaPrefetch[])9667 static void CalculatePixelDeliveryTimes(
9668 const struct dml2_display_cfg *display_cfg,
9669 const struct core_display_cfg_support_info *cfg_support_info,
9670 unsigned int NumberOfActiveSurfaces,
9671 double VRatioPrefetchY[],
9672 double VRatioPrefetchC[],
9673 unsigned int swath_width_luma_ub[],
9674 unsigned int swath_width_chroma_ub[],
9675 double PSCL_THROUGHPUT[],
9676 double PSCL_THROUGHPUT_CHROMA[],
9677 double Dppclk[],
9678 unsigned int BytePerPixelC[],
9679 unsigned int req_per_swath_ub_l[],
9680 unsigned int req_per_swath_ub_c[],
9681
9682 // Output
9683 double DisplayPipeLineDeliveryTimeLuma[],
9684 double DisplayPipeLineDeliveryTimeChroma[],
9685 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
9686 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
9687 double DisplayPipeRequestDeliveryTimeLuma[],
9688 double DisplayPipeRequestDeliveryTimeChroma[],
9689 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
9690 double DisplayPipeRequestDeliveryTimeChromaPrefetch[])
9691 {
9692 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
9693 double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
9694
9695 #ifdef __DML_VBA_DEBUG__
9696 DML_LOG_VERBOSE("DML::%s: k=%u : HRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
9697 DML_LOG_VERBOSE("DML::%s: k=%u : VRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
9698 DML_LOG_VERBOSE("DML::%s: k=%u : HRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio);
9699 DML_LOG_VERBOSE("DML::%s: k=%u : VRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio);
9700 DML_LOG_VERBOSE("DML::%s: k=%u : VRatioPrefetchY = %f\n", __func__, k, VRatioPrefetchY[k]);
9701 DML_LOG_VERBOSE("DML::%s: k=%u : VRatioPrefetchC = %f\n", __func__, k, VRatioPrefetchC[k]);
9702 DML_LOG_VERBOSE("DML::%s: k=%u : swath_width_luma_ub = %u\n", __func__, k, swath_width_luma_ub[k]);
9703 DML_LOG_VERBOSE("DML::%s: k=%u : swath_width_chroma_ub = %u\n", __func__, k, swath_width_chroma_ub[k]);
9704 DML_LOG_VERBOSE("DML::%s: k=%u : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
9705 DML_LOG_VERBOSE("DML::%s: k=%u : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
9706 DML_LOG_VERBOSE("DML::%s: k=%u : DPPPerSurface = %u\n", __func__, k, cfg_support_info->plane_support_info[k].dpps_used);
9707 DML_LOG_VERBOSE("DML::%s: k=%u : pixel_clock_mhz = %f\n", __func__, k, pixel_clock_mhz);
9708 DML_LOG_VERBOSE("DML::%s: k=%u : Dppclk = %f\n", __func__, k, Dppclk[k]);
9709 #endif
9710 if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) {
9711 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz;
9712 } else {
9713 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
9714 }
9715
9716 if (BytePerPixelC[k] == 0) {
9717 DisplayPipeLineDeliveryTimeChroma[k] = 0;
9718 } else {
9719 if (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio <= 1) {
9720 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_clock_mhz;
9721 } else {
9722 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
9723 }
9724 }
9725
9726 if (VRatioPrefetchY[k] <= 1) {
9727 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz;
9728 } else {
9729 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
9730 }
9731
9732 if (BytePerPixelC[k] == 0) {
9733 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
9734 } else {
9735 if (VRatioPrefetchC[k] <= 1) {
9736 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_clock_mhz;
9737 } else {
9738 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
9739 }
9740 }
9741 #ifdef __DML_VBA_DEBUG__
9742 DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
9743 DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
9744 DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
9745 DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
9746 #endif
9747 }
9748
9749 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
9750
9751 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub_l[k];
9752 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub_l[k];
9753 if (BytePerPixelC[k] == 0) {
9754 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
9755 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
9756 } else {
9757 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub_c[k];
9758 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub_c[k];
9759 }
9760 #ifdef __DML_VBA_DEBUG__
9761 DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
9762 DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
9763 DML_LOG_VERBOSE("DML::%s: k=%u : req_per_swath_ub_l = %d\n", __func__, k, req_per_swath_ub_l[k]);
9764 DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
9765 DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
9766 DML_LOG_VERBOSE("DML::%s: k=%u : req_per_swath_ub_c = %d\n", __func__, k, req_per_swath_ub_c[k]);
9767 #endif
9768 }
9769 }
9770
CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTETimes_params * p)9771 static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTETimes_params *p)
9772 {
9773 unsigned int meta_chunk_width;
9774 unsigned int min_meta_chunk_width;
9775 unsigned int meta_chunk_per_row_int;
9776 unsigned int meta_row_remainder;
9777 unsigned int meta_chunk_threshold;
9778 unsigned int meta_chunks_per_row_ub;
9779 unsigned int meta_chunk_width_chroma;
9780 unsigned int min_meta_chunk_width_chroma;
9781 unsigned int meta_chunk_per_row_int_chroma;
9782 unsigned int meta_row_remainder_chroma;
9783 unsigned int meta_chunk_threshold_chroma;
9784 unsigned int meta_chunks_per_row_ub_chroma;
9785 unsigned int dpte_group_width_luma;
9786 unsigned int dpte_groups_per_row_luma_ub;
9787 unsigned int dpte_group_width_chroma;
9788 unsigned int dpte_groups_per_row_chroma_ub;
9789 double pixel_clock_mhz;
9790
9791 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9792 p->DST_Y_PER_PTE_ROW_NOM_L[k] = p->dpte_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
9793 if (p->BytePerPixelC[k] == 0) {
9794 p->DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
9795 } else {
9796 p->DST_Y_PER_PTE_ROW_NOM_C[k] = p->dpte_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
9797 }
9798 p->DST_Y_PER_META_ROW_NOM_L[k] = p->meta_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
9799 if (p->BytePerPixelC[k] == 0) {
9800 p->DST_Y_PER_META_ROW_NOM_C[k] = 0;
9801 } else {
9802 p->DST_Y_PER_META_ROW_NOM_C[k] = p->meta_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
9803 }
9804 }
9805
9806 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9807 if (p->display_cfg->plane_descriptors[k].surface.dcc.enable == true && p->mrq_present) {
9808 meta_chunk_width = p->MetaChunkSize * 1024 * 256 / p->BytePerPixelY[k] / p->meta_row_height[k];
9809 min_meta_chunk_width = p->MinMetaChunkSizeBytes * 256 / p->BytePerPixelY[k] / p->meta_row_height[k];
9810 meta_chunk_per_row_int = p->meta_row_width[k] / meta_chunk_width;
9811 meta_row_remainder = p->meta_row_width[k] % meta_chunk_width;
9812 if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
9813 meta_chunk_threshold = 2 * min_meta_chunk_width - p->meta_req_width[k];
9814 } else {
9815 meta_chunk_threshold = 2 * min_meta_chunk_width - p->meta_req_height[k];
9816 }
9817 if (meta_row_remainder <= meta_chunk_threshold) {
9818 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
9819 } else {
9820 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
9821 }
9822 p->TimePerMetaChunkNominal[k] = p->meta_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio *
9823 p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total /
9824 (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub;
9825 p->TimePerMetaChunkVBlank[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total /
9826 (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub;
9827 p->TimePerMetaChunkFlip[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total /
9828 (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub;
9829 if (p->BytePerPixelC[k] == 0) {
9830 p->TimePerChromaMetaChunkNominal[k] = 0;
9831 p->TimePerChromaMetaChunkVBlank[k] = 0;
9832 p->TimePerChromaMetaChunkFlip[k] = 0;
9833 } else {
9834 meta_chunk_width_chroma = p->MetaChunkSize * 1024 * 256 / p->BytePerPixelC[k] / p->meta_row_height_chroma[k];
9835 min_meta_chunk_width_chroma = p->MinMetaChunkSizeBytes * 256 / p->BytePerPixelC[k] / p->meta_row_height_chroma[k];
9836 meta_chunk_per_row_int_chroma = (unsigned int)((double)p->meta_row_width_chroma[k] / meta_chunk_width_chroma);
9837 meta_row_remainder_chroma = p->meta_row_width_chroma[k] % meta_chunk_width_chroma;
9838 if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
9839 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - p->meta_req_width_chroma[k];
9840 } else {
9841 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - p->meta_req_height_chroma[k];
9842 }
9843 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
9844 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
9845 } else {
9846 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
9847 }
9848 p->TimePerChromaMetaChunkNominal[k] = p->meta_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma;
9849 p->TimePerChromaMetaChunkVBlank[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma;
9850 p->TimePerChromaMetaChunkFlip[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma;
9851 }
9852 } else {
9853 p->TimePerMetaChunkNominal[k] = 0;
9854 p->TimePerMetaChunkVBlank[k] = 0;
9855 p->TimePerMetaChunkFlip[k] = 0;
9856 p->TimePerChromaMetaChunkNominal[k] = 0;
9857 p->TimePerChromaMetaChunkVBlank[k] = 0;
9858 p->TimePerChromaMetaChunkFlip[k] = 0;
9859 }
9860
9861 #ifdef __DML_VBA_DEBUG__
9862 DML_LOG_VERBOSE("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_L[k]);
9863 DML_LOG_VERBOSE("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_C[k]);
9864 DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkNominal = %f\n", __func__, k, p->TimePerMetaChunkNominal[k]);
9865 DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkVBlank = %f\n", __func__, k, p->TimePerMetaChunkVBlank[k]);
9866 DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkFlip = %f\n", __func__, k, p->TimePerMetaChunkFlip[k]);
9867 DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkNominal = %f\n", __func__, k, p->TimePerChromaMetaChunkNominal[k]);
9868 DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkVBlank = %f\n", __func__, k, p->TimePerChromaMetaChunkVBlank[k]);
9869 DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkFlip = %f\n", __func__, k, p->TimePerChromaMetaChunkFlip[k]);
9870 #endif
9871 }
9872
9873 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9874 p->DST_Y_PER_PTE_ROW_NOM_L[k] = p->dpte_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
9875 if (p->BytePerPixelC[k] == 0) {
9876 p->DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
9877 } else {
9878 p->DST_Y_PER_PTE_ROW_NOM_C[k] = p->dpte_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
9879 }
9880 }
9881
9882 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9883 pixel_clock_mhz = ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
9884
9885 if (p->display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut)
9886 p->time_per_tdlut_group[k] = 2 * p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / p->tdlut_groups_per_2row_ub[k];
9887 else
9888 p->time_per_tdlut_group[k] = 0;
9889
9890 DML_LOG_VERBOSE("DML::%s: k=%u, time_per_tdlut_group = %f\n", __func__, k, p->time_per_tdlut_group[k]);
9891
9892 if (p->display_cfg->gpuvm_enable == true) {
9893 if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
9894 dpte_group_width_luma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeY[k] * p->PixelPTEReqWidthY[k]);
9895 } else {
9896 dpte_group_width_luma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeY[k] * p->PixelPTEReqHeightY[k]);
9897 }
9898 if (p->use_one_row_for_frame[k]) {
9899 dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma / 2.0, 1.0));
9900 } else {
9901 dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma, 1.0));
9902 }
9903 if (dpte_groups_per_row_luma_ub <= 2) {
9904 dpte_groups_per_row_luma_ub = dpte_groups_per_row_luma_ub + 1;
9905 }
9906 DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]);
9907 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, p->dpte_group_bytes[k]);
9908 DML_LOG_VERBOSE("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, p->PTERequestSizeY[k]);
9909 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEReqWidthY = %u\n", __func__, k, p->PixelPTEReqWidthY[k]);
9910 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEReqHeightY = %u\n", __func__, k, p->PixelPTEReqHeightY[k]);
9911 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]);
9912 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_width_luma = %u\n", __func__, k, dpte_group_width_luma);
9913 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_groups_per_row_luma_ub = %u\n", __func__, k, dpte_groups_per_row_luma_ub);
9914
9915 p->time_per_pte_group_nom_luma[k] = p->DST_Y_PER_PTE_ROW_NOM_L[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub;
9916 p->time_per_pte_group_vblank_luma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub;
9917 p->time_per_pte_group_flip_luma[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub;
9918 if (p->BytePerPixelC[k] == 0) {
9919 p->time_per_pte_group_nom_chroma[k] = 0;
9920 p->time_per_pte_group_vblank_chroma[k] = 0;
9921 p->time_per_pte_group_flip_chroma[k] = 0;
9922 } else {
9923 if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
9924 dpte_group_width_chroma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeC[k] * p->PixelPTEReqWidthC[k]);
9925 } else {
9926 dpte_group_width_chroma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeC[k] * p->PixelPTEReqHeightC[k]);
9927 }
9928
9929 if (p->use_one_row_for_frame[k]) {
9930 dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma / 2.0, 1.0));
9931 } else {
9932 dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma, 1.0));
9933 }
9934 if (dpte_groups_per_row_chroma_ub <= 2) {
9935 dpte_groups_per_row_chroma_ub = dpte_groups_per_row_chroma_ub + 1;
9936 }
9937 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]);
9938 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma);
9939 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub);
9940
9941 p->time_per_pte_group_nom_chroma[k] = p->DST_Y_PER_PTE_ROW_NOM_C[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub;
9942 p->time_per_pte_group_vblank_chroma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub;
9943 p->time_per_pte_group_flip_chroma[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub;
9944 }
9945 } else {
9946 p->time_per_pte_group_nom_luma[k] = 0;
9947 p->time_per_pte_group_vblank_luma[k] = 0;
9948 p->time_per_pte_group_flip_luma[k] = 0;
9949 p->time_per_pte_group_nom_chroma[k] = 0;
9950 p->time_per_pte_group_vblank_chroma[k] = 0;
9951 p->time_per_pte_group_flip_chroma[k] = 0;
9952 }
9953 #ifdef __DML_VBA_DEBUG__
9954 DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_row_vblank = %f\n", __func__, k, p->dst_y_per_row_vblank[k]);
9955 DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_row_flip = %f\n", __func__, k, p->dst_y_per_row_flip[k]);
9956
9957 DML_LOG_VERBOSE("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_L[k]);
9958 DML_LOG_VERBOSE("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_C[k]);
9959 DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_nom_luma = %f\n", __func__, k, p->time_per_pte_group_nom_luma[k]);
9960 DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_vblank_luma = %f\n", __func__, k, p->time_per_pte_group_vblank_luma[k]);
9961 DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_flip_luma = %f\n", __func__, k, p->time_per_pte_group_flip_luma[k]);
9962 DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_nom_chroma = %f\n", __func__, k, p->time_per_pte_group_nom_chroma[k]);
9963 DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_vblank_chroma = %f\n", __func__, k, p->time_per_pte_group_vblank_chroma[k]);
9964 DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_flip_chroma = %f\n", __func__, k, p->time_per_pte_group_flip_chroma[k]);
9965 #endif
9966 }
9967 } // CalculateMetaAndPTETimes
9968
CalculateVMGroupAndRequestTimes(const struct dml2_display_cfg * display_cfg,unsigned int NumberOfActiveSurfaces,unsigned int BytePerPixelC[],double dst_y_per_vm_vblank[],double dst_y_per_vm_flip[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],unsigned int vm_group_bytes[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],unsigned int tdlut_pte_bytes_per_frame[],unsigned int meta_pte_bytes_per_frame_ub_l[],unsigned int meta_pte_bytes_per_frame_ub_c[],bool mrq_present,double TimePerVMGroupVBlank[],double TimePerVMGroupFlip[],double TimePerVMRequestVBlank[],double TimePerVMRequestFlip[])9969 static void CalculateVMGroupAndRequestTimes(
9970 const struct dml2_display_cfg *display_cfg,
9971 unsigned int NumberOfActiveSurfaces,
9972 unsigned int BytePerPixelC[],
9973 double dst_y_per_vm_vblank[],
9974 double dst_y_per_vm_flip[],
9975 unsigned int dpte_row_width_luma_ub[],
9976 unsigned int dpte_row_width_chroma_ub[],
9977 unsigned int vm_group_bytes[],
9978 unsigned int dpde0_bytes_per_frame_ub_l[],
9979 unsigned int dpde0_bytes_per_frame_ub_c[],
9980 unsigned int tdlut_pte_bytes_per_frame[],
9981 unsigned int meta_pte_bytes_per_frame_ub_l[],
9982 unsigned int meta_pte_bytes_per_frame_ub_c[],
9983 bool mrq_present,
9984
9985 // Output
9986 double TimePerVMGroupVBlank[],
9987 double TimePerVMGroupFlip[],
9988 double TimePerVMRequestVBlank[],
9989 double TimePerVMRequestFlip[])
9990 {
9991 (void)dpte_row_width_luma_ub;
9992 (void)dpte_row_width_chroma_ub;
9993 unsigned int num_group_per_lower_vm_stage = 0;
9994 unsigned int num_req_per_lower_vm_stage = 0;
9995 unsigned int num_group_per_lower_vm_stage_flip;
9996 unsigned int num_group_per_lower_vm_stage_pref;
9997 unsigned int num_req_per_lower_vm_stage_flip;
9998 unsigned int num_req_per_lower_vm_stage_pref;
9999 double line_time;
10000
10001 #ifdef __DML_VBA_DEBUG__
10002 DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
10003 #endif
10004 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
10005 double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
10006 bool dcc_mrq_enable = display_cfg->plane_descriptors[k].surface.dcc.enable && mrq_present;
10007 #ifdef __DML_VBA_DEBUG__
10008 DML_LOG_VERBOSE("DML::%s: k=%u, dcc_mrq_enable = %u\n", __func__, k, dcc_mrq_enable);
10009 DML_LOG_VERBOSE("DML::%s: k=%u, vm_group_bytes = %u\n", __func__, k, vm_group_bytes[k]);
10010 DML_LOG_VERBOSE("DML::%s: k=%u, dpde0_bytes_per_frame_ub_l = %u\n", __func__, k, dpde0_bytes_per_frame_ub_l[k]);
10011 DML_LOG_VERBOSE("DML::%s: k=%u, dpde0_bytes_per_frame_ub_c = %u\n", __func__, k, dpde0_bytes_per_frame_ub_c[k]);
10012 DML_LOG_VERBOSE("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_l = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
10013 DML_LOG_VERBOSE("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_c = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
10014 #endif
10015
10016 if (display_cfg->gpuvm_enable) {
10017 if (display_cfg->gpuvm_max_page_table_levels >= 2) {
10018 num_group_per_lower_vm_stage += (unsigned int) math_ceil2((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
10019
10020 if (BytePerPixelC[k] > 0)
10021 num_group_per_lower_vm_stage += (unsigned int) math_ceil2((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
10022 }
10023
10024 if (dcc_mrq_enable) {
10025 if (BytePerPixelC[k] > 0) {
10026 num_group_per_lower_vm_stage += (unsigned int)(2.0 /*for each mpde0 group*/ + math_ceil2((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) +
10027 math_ceil2((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1));
10028 } else {
10029 num_group_per_lower_vm_stage += (unsigned int)(1.0 + math_ceil2((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1));
10030 }
10031 }
10032
10033 num_group_per_lower_vm_stage_flip = num_group_per_lower_vm_stage;
10034 num_group_per_lower_vm_stage_pref = num_group_per_lower_vm_stage;
10035
10036 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && display_cfg->gpuvm_enable) {
10037 num_group_per_lower_vm_stage_pref += (unsigned int) math_ceil2(tdlut_pte_bytes_per_frame[k] / vm_group_bytes[k], 1);
10038 if (display_cfg->gpuvm_max_page_table_levels >= 2)
10039 num_group_per_lower_vm_stage_pref += 1; // tdpe0 group
10040 }
10041
10042 if (display_cfg->gpuvm_max_page_table_levels >= 2) {
10043 num_req_per_lower_vm_stage += dpde0_bytes_per_frame_ub_l[k] / 64;
10044 if (BytePerPixelC[k] > 0)
10045 num_req_per_lower_vm_stage += dpde0_bytes_per_frame_ub_c[k];
10046 }
10047
10048 if (dcc_mrq_enable) {
10049 num_req_per_lower_vm_stage += meta_pte_bytes_per_frame_ub_l[k] / 64;
10050 if (BytePerPixelC[k] > 0)
10051 num_req_per_lower_vm_stage += meta_pte_bytes_per_frame_ub_c[k] / 64;
10052 }
10053
10054 num_req_per_lower_vm_stage_flip = num_req_per_lower_vm_stage;
10055 num_req_per_lower_vm_stage_pref = num_req_per_lower_vm_stage;
10056
10057 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && display_cfg->gpuvm_enable) {
10058 num_req_per_lower_vm_stage_pref += tdlut_pte_bytes_per_frame[k] / 64;
10059 }
10060
10061 line_time = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz;
10062
10063 if (num_group_per_lower_vm_stage_pref > 0)
10064 TimePerVMGroupVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_group_per_lower_vm_stage_pref;
10065 else
10066 TimePerVMGroupVBlank[k] = 0;
10067
10068 if (num_group_per_lower_vm_stage_flip > 0)
10069 TimePerVMGroupFlip[k] = dst_y_per_vm_flip[k] * line_time / num_group_per_lower_vm_stage_flip;
10070 else
10071 TimePerVMGroupFlip[k] = 0;
10072
10073 if (num_req_per_lower_vm_stage_pref > 0)
10074 TimePerVMRequestVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_req_per_lower_vm_stage_pref;
10075 else
10076 TimePerVMRequestVBlank[k] = 0.0;
10077 if (num_req_per_lower_vm_stage_flip > 0)
10078 TimePerVMRequestFlip[k] = dst_y_per_vm_flip[k] * line_time / num_req_per_lower_vm_stage_flip;
10079 else
10080 TimePerVMRequestFlip[k] = 0.0;
10081
10082 DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_vm_vblank = %f\n", __func__, k, dst_y_per_vm_vblank[k]);
10083 DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_vm_flip = %f\n", __func__, k, dst_y_per_vm_flip[k]);
10084 DML_LOG_VERBOSE("DML::%s: k=%u, line_time = %f\n", __func__, k, line_time);
10085 DML_LOG_VERBOSE("DML::%s: k=%u, num_group_per_lower_vm_stage_pref = %d\n", __func__, k, num_group_per_lower_vm_stage_pref);
10086 DML_LOG_VERBOSE("DML::%s: k=%u, num_group_per_lower_vm_stage_flip = %d\n", __func__, k, num_group_per_lower_vm_stage_flip);
10087 DML_LOG_VERBOSE("DML::%s: k=%u, num_req_per_lower_vm_stage_pref = %d\n", __func__, k, num_req_per_lower_vm_stage_pref);
10088 DML_LOG_VERBOSE("DML::%s: k=%u, num_req_per_lower_vm_stage_flip = %d\n", __func__, k, num_req_per_lower_vm_stage_flip);
10089
10090 if (display_cfg->gpuvm_max_page_table_levels > 2) {
10091 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
10092 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
10093 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
10094 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
10095 }
10096
10097 } else {
10098 TimePerVMGroupVBlank[k] = 0;
10099 TimePerVMGroupFlip[k] = 0;
10100 TimePerVMRequestVBlank[k] = 0;
10101 TimePerVMRequestFlip[k] = 0;
10102 }
10103
10104 #ifdef __DML_VBA_DEBUG__
10105 DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
10106 DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
10107 DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
10108 DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
10109 #endif
10110 }
10111 }
10112
CalculateStutterEfficiency(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculateStutterEfficiency_params * p)10113 static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratch,
10114 struct dml2_core_calcs_CalculateStutterEfficiency_params *p)
10115 {
10116 struct dml2_core_calcs_CalculateStutterEfficiency_locals *l = &scratch->CalculateStutterEfficiency_locals;
10117
10118 unsigned int TotalNumberOfActiveOTG = 0;
10119 double SinglePixelClock = 0;
10120 unsigned int SingleHTotal = 0;
10121 unsigned int SingleVTotal = 0;
10122 bool SameTiming = true;
10123 bool FoundCriticalSurface = false;
10124
10125 memset(l, 0, sizeof(struct dml2_core_calcs_CalculateStutterEfficiency_locals));
10126
10127 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
10128 if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
10129 if (p->display_cfg->plane_descriptors[k].surface.dcc.enable == true) {
10130 if ((dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockWidth256BytesY[k] > p->SwathHeightY[k]) || (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockHeight256BytesY[k] > p->SwathHeightY[k]) || p->DCCYMaxUncompressedBlock[k] < 256) {
10131 l->MaximumEffectiveCompressionLuma = 2;
10132 } else {
10133 l->MaximumEffectiveCompressionLuma = 4;
10134 }
10135 l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0, l->MaximumEffectiveCompressionLuma);
10136 #ifdef __DML_VBA_DEBUG__
10137 DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
10138 DML_LOG_VERBOSE("DML::%s: k=%u, NetDCCRateLuma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0);
10139 DML_LOG_VERBOSE("DML::%s: k=%u, MaximumEffectiveCompressionLuma = %f\n", __func__, k, l->MaximumEffectiveCompressionLuma);
10140 #endif
10141 l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0;
10142 l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0 / l->MaximumEffectiveCompressionLuma;
10143
10144 if (p->ReadBandwidthSurfaceChroma[k] > 0) {
10145 if ((dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockWidth256BytesC[k] > p->SwathHeightC[k]) || (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockHeight256BytesC[k] > p->SwathHeightC[k]) || p->DCCCMaxUncompressedBlock[k] < 256) {
10146 l->MaximumEffectiveCompressionChroma = 2;
10147 } else {
10148 l->MaximumEffectiveCompressionChroma = 4;
10149 }
10150 l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1, l->MaximumEffectiveCompressionChroma);
10151 #ifdef __DML_VBA_DEBUG__
10152 DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, p->ReadBandwidthSurfaceChroma[k]);
10153 DML_LOG_VERBOSE("DML::%s: k=%u, NetDCCRateChroma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1);
10154 DML_LOG_VERBOSE("DML::%s: k=%u, MaximumEffectiveCompressionChroma = %f\n", __func__, k, l->MaximumEffectiveCompressionChroma);
10155 #endif
10156 l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1;
10157 l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1 / l->MaximumEffectiveCompressionChroma;
10158 }
10159 } else {
10160 l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] + p->ReadBandwidthSurfaceChroma[k];
10161 }
10162 l->TotalRowReadBandwidth = l->TotalRowReadBandwidth + p->DPPPerSurface[k] * (p->meta_row_bw[k] + p->dpte_row_bw[k]);
10163 }
10164 }
10165
10166 l->AverageDCCCompressionRate = p->TotalDataReadBandwidth / l->TotalCompressedReadBandwidth;
10167 l->AverageDCCZeroSizeFraction = l->TotalZeroSizeRequestReadBandwidth / p->TotalDataReadBandwidth;
10168
10169 #ifdef __DML_VBA_DEBUG__
10170 DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled);
10171 DML_LOG_VERBOSE("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, l->TotalCompressedReadBandwidth);
10172 DML_LOG_VERBOSE("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, l->TotalZeroSizeRequestReadBandwidth);
10173 DML_LOG_VERBOSE("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, l->TotalZeroSizeCompressedReadBandwidth);
10174 DML_LOG_VERBOSE("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, l->MaximumEffectiveCompressionLuma);
10175 DML_LOG_VERBOSE("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, l->MaximumEffectiveCompressionChroma);
10176 DML_LOG_VERBOSE("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate);
10177 DML_LOG_VERBOSE("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, l->AverageDCCZeroSizeFraction);
10178
10179 DML_LOG_VERBOSE("DML::%s: CompbufReservedSpace64B = %u (%f kbytes)\n", __func__, p->CompbufReservedSpace64B, p->CompbufReservedSpace64B * 64 / 1024.0);
10180 DML_LOG_VERBOSE("DML::%s: CompbufReservedSpaceZs = %u\n", __func__, p->CompbufReservedSpaceZs);
10181 DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u kbytes\n", __func__, p->CompressedBufferSizeInkByte);
10182 DML_LOG_VERBOSE("DML::%s: ROBBufferSizeInKByte = %u kbytes\n", __func__, p->ROBBufferSizeInKByte);
10183 #endif
10184 if (l->AverageDCCZeroSizeFraction == 1) {
10185 l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth;
10186 l->EffectiveCompressedBufferSize = (double)p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageZeroSizeCompressionRate + ((double)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 * l->AverageZeroSizeCompressionRate;
10187
10188
10189 } else if (l->AverageDCCZeroSizeFraction > 0) {
10190 l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth;
10191 l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate,
10192 (double)p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate)) +
10193 (p->rob_alloc_compressed ? math_min2(((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * l->AverageDCCCompressionRate,
10194 ((double)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate))
10195 : ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64));
10196
10197
10198 #ifdef __DML_VBA_DEBUG__
10199 DML_LOG_VERBOSE("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate);
10200 DML_LOG_VERBOSE("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate));
10201 DML_LOG_VERBOSE("DML::%s: min 3 = %d\n", __func__, (p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64));
10202 DML_LOG_VERBOSE("DML::%s: min 4 = %f\n", __func__, (p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate));
10203 #endif
10204 } else {
10205 l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate,
10206 (double)p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate) +
10207 ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * (p->rob_alloc_compressed ? l->AverageDCCCompressionRate : 1.0);
10208
10209 #ifdef __DML_VBA_DEBUG__
10210 DML_LOG_VERBOSE("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate);
10211 DML_LOG_VERBOSE("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate);
10212 #endif
10213 }
10214
10215 #ifdef __DML_VBA_DEBUG__
10216 DML_LOG_VERBOSE("DML::%s: MetaFIFOSizeInKEntries = %u\n", __func__, p->MetaFIFOSizeInKEntries);
10217 DML_LOG_VERBOSE("DML::%s: ZeroSizeBufferEntries = %u\n", __func__, p->ZeroSizeBufferEntries);
10218 DML_LOG_VERBOSE("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, l->AverageZeroSizeCompressionRate);
10219 DML_LOG_VERBOSE("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0);
10220 #endif
10221
10222 *p->StutterPeriod = 0;
10223
10224 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
10225 if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
10226 l->LinesInDETY = ((double)p->DETBufferSizeY[k] + (p->UnboundedRequestEnabled == true ? l->EffectiveCompressedBufferSize : 0) * p->ReadBandwidthSurfaceLuma[k] / p->TotalDataReadBandwidth) / p->BytePerPixelDETY[k] / p->SwathWidthY[k];
10227 l->LinesInDETYRoundedDownToSwath = math_floor2(l->LinesInDETY, p->SwathHeightY[k]);
10228 l->DETBufferingTimeY = l->LinesInDETYRoundedDownToSwath * ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
10229 #ifdef __DML_VBA_DEBUG__
10230 DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferSizeY = %u (%u kbytes)\n", __func__, k, p->DETBufferSizeY[k], p->DETBufferSizeY[k] / 1024);
10231 DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
10232 DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
10233 DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
10234 DML_LOG_VERBOSE("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, p->TotalDataReadBandwidth);
10235 DML_LOG_VERBOSE("DML::%s: k=%u, LinesInDETY = %f\n", __func__, k, l->LinesInDETY);
10236 DML_LOG_VERBOSE("DML::%s: k=%u, LinesInDETYRoundedDownToSwath = %f\n", __func__, k, l->LinesInDETYRoundedDownToSwath);
10237 DML_LOG_VERBOSE("DML::%s: k=%u, VRatio = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
10238 DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferingTimeY = %f\n", __func__, k, l->DETBufferingTimeY);
10239 #endif
10240
10241 if (!FoundCriticalSurface || l->DETBufferingTimeY < *p->StutterPeriod) {
10242 bool isInterlaceTiming = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !p->ProgressiveToInterlaceUnitInOPP;
10243
10244 FoundCriticalSurface = true;
10245 *p->StutterPeriod = l->DETBufferingTimeY;
10246 l->FrameTimeCriticalSurface = (isInterlaceTiming ? math_floor2((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total / 2.0, 1.0) : p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total) * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
10247 l->VActiveTimeCriticalSurface = (isInterlaceTiming ? math_floor2((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_active / 2.0, 1.0) : p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_active) * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
10248 l->BytePerPixelYCriticalSurface = p->BytePerPixelY[k];
10249 l->SwathWidthYCriticalSurface = p->SwathWidthY[k];
10250 l->SwathHeightYCriticalSurface = p->SwathHeightY[k];
10251 l->BlockWidth256BytesYCriticalSurface = p->BlockWidth256BytesY[k];
10252 l->DETBufferSizeYCriticalSurface = p->DETBufferSizeY[k];
10253 l->MinTTUVBlankCriticalSurface = p->MinTTUVBlank[k];
10254 l->SinglePlaneCriticalSurface = (p->ReadBandwidthSurfaceChroma[k] == 0);
10255 l->SinglePipeCriticalSurface = (p->DPPPerSurface[k] == 1);
10256
10257 #ifdef __DML_VBA_DEBUG__
10258 DML_LOG_VERBOSE("DML::%s: k=%u, FoundCriticalSurface = %u\n", __func__, k, FoundCriticalSurface);
10259 DML_LOG_VERBOSE("DML::%s: k=%u, StutterPeriod = %f\n", __func__, k, *p->StutterPeriod);
10260 DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlankCriticalSurface = %f\n", __func__, k, l->MinTTUVBlankCriticalSurface);
10261 DML_LOG_VERBOSE("DML::%s: k=%u, FrameTimeCriticalSurface= %f\n", __func__, k, l->FrameTimeCriticalSurface);
10262 DML_LOG_VERBOSE("DML::%s: k=%u, VActiveTimeCriticalSurface = %f\n", __func__, k, l->VActiveTimeCriticalSurface);
10263 DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelYCriticalSurface = %u\n", __func__, k, l->BytePerPixelYCriticalSurface);
10264 DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthYCriticalSurface = %f\n", __func__, k, l->SwathWidthYCriticalSurface);
10265 DML_LOG_VERBOSE("DML::%s: k=%u, SwathHeightYCriticalSurface = %f\n", __func__, k, l->SwathHeightYCriticalSurface);
10266 DML_LOG_VERBOSE("DML::%s: k=%u, BlockWidth256BytesYCriticalSurface = %u\n", __func__, k, l->BlockWidth256BytesYCriticalSurface);
10267 DML_LOG_VERBOSE("DML::%s: k=%u, SinglePlaneCriticalSurface = %u\n", __func__, k, l->SinglePlaneCriticalSurface);
10268 DML_LOG_VERBOSE("DML::%s: k=%u, SinglePipeCriticalSurface = %u\n", __func__, k, l->SinglePipeCriticalSurface);
10269 #endif
10270 }
10271 }
10272 }
10273
10274 // for bounded req, the stutter period is calculated only based on DET size, but during burst there can be some return inside ROB/compressed buffer
10275 // stutter period is calculated only on the det sizing
10276 // if (cdb + rob >= det) the stutter burst will be absorbed by the cdb + rob which is before decompress
10277 // else
10278 // the cdb + rob part will be in compressed rate with urg bw (idea bw)
10279 // the det part will be return at uncompressed rate with 64B/dcfclk
10280 //
10281 // for unbounded req, the stutter period should be calculated as total of CDB+ROB+DET, so the term "PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer"
10282 // should be == EffectiveCompressedBufferSize which will returned a compressed rate, the rest of stutter period is from the DET will be returned at uncompressed rate with 64B/dcfclk
10283
10284 l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = math_min2(*p->StutterPeriod * p->TotalDataReadBandwidth, l->EffectiveCompressedBufferSize);
10285 #ifdef __DML_VBA_DEBUG__
10286 DML_LOG_VERBOSE("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate);
10287 DML_LOG_VERBOSE("DML::%s: StutterPeriod*TotalDataReadBandwidth = %f (%f kbytes)\n", __func__, *p->StutterPeriod * p->TotalDataReadBandwidth, (*p->StutterPeriod * p->TotalDataReadBandwidth) / 1024.0);
10288 DML_LOG_VERBOSE("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0);
10289 DML_LOG_VERBOSE("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f (%f kbytes)\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / 1024);
10290 DML_LOG_VERBOSE("DML::%s: ReturnBW = %f\n", __func__, p->ReturnBW);
10291 DML_LOG_VERBOSE("DML::%s: TotalDataReadBandwidth = %f\n", __func__, p->TotalDataReadBandwidth);
10292 DML_LOG_VERBOSE("DML::%s: TotalRowReadBandwidth = %f\n", __func__, l->TotalRowReadBandwidth);
10293 DML_LOG_VERBOSE("DML::%s: DCFCLK = %f\n", __func__, p->DCFCLK);
10294 #endif
10295
10296 l->StutterBurstTime = l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer
10297 / (p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) +
10298 (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer)
10299 / math_min2(p->DCFCLK * 64, p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) +
10300 *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW;
10301 #ifdef __DML_VBA_DEBUG__
10302 DML_LOG_VERBOSE("DML::%s: Part 1 = %f\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / p->ReturnBW / (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate));
10303 DML_LOG_VERBOSE("DML::%s: Part 2 = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64));
10304 DML_LOG_VERBOSE("DML::%s: Part 3 = %f\n", __func__, *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW);
10305 DML_LOG_VERBOSE("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime);
10306 #endif
10307 l->TotalActiveWriteback = 0;
10308 memset(l->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool));
10309
10310 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
10311 if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
10312 if (!l->stream_visited[p->display_cfg->plane_descriptors[k].stream_index]) {
10313
10314 if (p->display_cfg->stream_descriptors[k].writeback.active_writebacks_per_stream > 0)
10315 l->TotalActiveWriteback = l->TotalActiveWriteback + 1;
10316
10317 if (TotalNumberOfActiveOTG == 0) { // first otg
10318 SinglePixelClock = ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
10319 SingleHTotal = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total;
10320 SingleVTotal = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total;
10321 } else if (SinglePixelClock != ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) ||
10322 SingleHTotal != p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total ||
10323 SingleVTotal != p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total) {
10324 SameTiming = false;
10325 }
10326 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
10327 l->stream_visited[p->display_cfg->plane_descriptors[k].stream_index] = 1;
10328 }
10329 }
10330 }
10331
10332 if (l->TotalActiveWriteback == 0) {
10333 #ifdef __DML_VBA_DEBUG__
10334 DML_LOG_VERBOSE("DML::%s: SRExitTime = %f\n", __func__, p->SRExitTime);
10335 DML_LOG_VERBOSE("DML::%s: SRExitZ8Time = %f\n", __func__, p->SRExitZ8Time);
10336 DML_LOG_VERBOSE("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
10337 #endif
10338 *p->StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitTime + l->StutterBurstTime) / *p->StutterPeriod) * 100;
10339 *p->Z8StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitZ8Time + l->StutterBurstTime) / *p->StutterPeriod) * 100;
10340 *p->NumberOfStutterBurstsPerFrame = (*p->StutterEfficiencyNotIncludingVBlank > 0 ? (unsigned int)(math_ceil2(l->VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0);
10341 *p->Z8NumberOfStutterBurstsPerFrame = (*p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? (unsigned int)(math_ceil2(l->VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0);
10342 } else {
10343 *p->StutterEfficiencyNotIncludingVBlank = 0.;
10344 *p->Z8StutterEfficiencyNotIncludingVBlank = 0.;
10345 *p->NumberOfStutterBurstsPerFrame = 0;
10346 *p->Z8NumberOfStutterBurstsPerFrame = 0;
10347 }
10348 #ifdef __DML_VBA_DEBUG__
10349 DML_LOG_VERBOSE("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, l->VActiveTimeCriticalSurface);
10350 DML_LOG_VERBOSE("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
10351 DML_LOG_VERBOSE("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank);
10352 DML_LOG_VERBOSE("DML::%s: NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->NumberOfStutterBurstsPerFrame);
10353 DML_LOG_VERBOSE("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
10354 #endif
10355
10356 if (*p->StutterEfficiencyNotIncludingVBlank > 0) {
10357 if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) {
10358 *p->StutterEfficiency = *p->StutterEfficiencyNotIncludingVBlank;
10359 } else {
10360 *p->StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitTime + l->StutterBurstTime * l->VActiveTimeCriticalSurface / *p->StutterPeriod) / l->FrameTimeCriticalSurface) * 100;
10361 }
10362 } else {
10363 *p->StutterEfficiency = 0;
10364 *p->NumberOfStutterBurstsPerFrame = 0;
10365 }
10366
10367 if (*p->Z8StutterEfficiencyNotIncludingVBlank > 0) {
10368 //LastZ8StutterPeriod = l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod;
10369 if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) {
10370 *p->Z8StutterEfficiency = *p->Z8StutterEfficiencyNotIncludingVBlank;
10371 } else {
10372 *p->Z8StutterEfficiency = (1 - (*p->Z8NumberOfStutterBurstsPerFrame * p->SRExitZ8Time + l->StutterBurstTime * l->VActiveTimeCriticalSurface / *p->StutterPeriod) / l->FrameTimeCriticalSurface) * 100;
10373 }
10374 } else {
10375 *p->Z8StutterEfficiency = 0.;
10376 *p->Z8NumberOfStutterBurstsPerFrame = 0;
10377 }
10378
10379 #ifdef __DML_VBA_DEBUG__
10380 DML_LOG_VERBOSE("DML::%s: TotalNumberOfActiveOTG = %u\n", __func__, TotalNumberOfActiveOTG);
10381 DML_LOG_VERBOSE("DML::%s: SameTiming = %u\n", __func__, SameTiming);
10382 DML_LOG_VERBOSE("DML::%s: SynchronizeTimings = %u\n", __func__, p->SynchronizeTimings);
10383 DML_LOG_VERBOSE("DML::%s: LastZ8StutterPeriod = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod : 0);
10384 DML_LOG_VERBOSE("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Z8StutterEnterPlusExitWatermark);
10385 DML_LOG_VERBOSE("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime);
10386 DML_LOG_VERBOSE("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
10387 DML_LOG_VERBOSE("DML::%s: StutterEfficiency = %f\n", __func__, *p->StutterEfficiency);
10388 DML_LOG_VERBOSE("DML::%s: Z8StutterEfficiency = %f\n", __func__, *p->Z8StutterEfficiency);
10389 DML_LOG_VERBOSE("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
10390 DML_LOG_VERBOSE("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
10391 #endif
10392
10393 *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && l->SinglePlaneCriticalSurface && l->SinglePipeCriticalSurface);
10394
10395 #ifdef __DML_VBA_DEBUG__
10396 DML_LOG_VERBOSE("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, l->DETBufferSizeYCriticalSurface);
10397 DML_LOG_VERBOSE("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte);
10398 DML_LOG_VERBOSE("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
10399 #endif
10400 }
10401
dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex * in_out_params)10402 static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex *in_out_params)
10403 {
10404 const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg;
10405 const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table;
10406 const struct core_display_cfg_support_info *cfg_support_info = in_out_params->cfg_support_info;
10407 struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib;
10408 struct dml2_display_cfg_programming *programming = in_out_params->programming;
10409
10410 struct dml2_core_calcs_mode_programming_locals *s = &mode_lib->scratch.dml_core_mode_programming_locals;
10411 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
10412 struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
10413 struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
10414 struct dml2_core_calcs_CalculateStutterEfficiency_params *CalculateStutterEfficiency_params = &mode_lib->scratch.CalculateStutterEfficiency_params;
10415 struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
10416 struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params;
10417 struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params;
10418 struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params;
10419 struct dml2_core_shared_CalculateMetaAndPTETimes_params *CalculateMetaAndPTETimes_params = &mode_lib->scratch.CalculateMetaAndPTETimes_params;
10420 struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params;
10421 struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *calculate_bytes_to_fetch_required_to_hide_latency_params = &mode_lib->scratch.calculate_bytes_to_fetch_required_to_hide_latency_params;
10422
10423 unsigned int k;
10424 bool must_support_iflip;
10425 const long min_return_uclk_cycles = 83;
10426 const long min_return_fclk_cycles = 75;
10427 const double max_fclk_mhz = min_clk_table->max_clocks_khz.fclk / 1000.0;
10428 double hard_minimum_dcfclk_mhz = (double)min_clk_table->dram_bw_table.entries[0].min_dcfclk_khz / 1000.0;
10429 double max_uclk_mhz = 0;
10430 double min_return_latency_in_DCFCLK_cycles = 0;
10431
10432 DML_LOG_VERBOSE("DML::%s: --- START --- \n", __func__);
10433
10434 memset(&mode_lib->scratch, 0, sizeof(struct dml2_core_internal_scratch));
10435 memset(&mode_lib->mp, 0, sizeof(struct dml2_core_internal_mode_program));
10436
10437 s->num_active_planes = display_cfg->num_planes;
10438 get_stream_output_bpp(s->OutputBpp, display_cfg);
10439
10440 mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(display_cfg->num_planes, cfg_support_info);
10441 dml_calc_pipe_plane_mapping(cfg_support_info, mode_lib->mp.pipe_plane);
10442
10443 mode_lib->mp.Dcfclk = programming->min_clocks.dcn4x.active.dcfclk_khz / 1000.0;
10444 mode_lib->mp.FabricClock = programming->min_clocks.dcn4x.active.fclk_khz / 1000.0;
10445 mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table.dram_config, &min_clk_table->dram_bw_table);
10446 mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4x.active.uclk_khz / 1000.0;
10447 mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4x.dpprefclk_khz / 1000.0;
10448 s->SOCCLK = (double)programming->min_clocks.dcn4x.socclk_khz / 1000;
10449 mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4x.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params);
10450 mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table);
10451
10452 for (k = 0; k < s->num_active_planes; ++k) {
10453 unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index;
10454 DML_ASSERT(cfg_support_info->stream_support_info[stream_index].odms_used <= 4);
10455 DML_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4 ||
10456 cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 2 ||
10457 cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1);
10458
10459 if (cfg_support_info->stream_support_info[stream_index].odms_used > 1)
10460 DML_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1);
10461
10462 switch (cfg_support_info->stream_support_info[stream_index].odms_used) {
10463 case (4):
10464 mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_4to1;
10465 break;
10466 case (3):
10467 mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_3to1;
10468 break;
10469 case (2):
10470 mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_2to1;
10471 break;
10472 default:
10473 if (cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4)
10474 mode_lib->mp.ODMMode[k] = dml2_odm_mode_mso_1to4;
10475 else if (cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 2)
10476 mode_lib->mp.ODMMode[k] = dml2_odm_mode_mso_1to2;
10477 else
10478 mode_lib->mp.ODMMode[k] = dml2_odm_mode_bypass;
10479 break;
10480 }
10481 }
10482
10483 for (k = 0; k < s->num_active_planes; ++k) {
10484 mode_lib->mp.NoOfDPP[k] = cfg_support_info->plane_support_info[k].dpps_used;
10485 mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4x.dppclk_khz / 1000.0;
10486 DML_ASSERT(mode_lib->mp.Dppclk[k] > 0);
10487 }
10488
10489 for (k = 0; k < s->num_active_planes; ++k) {
10490 unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index;
10491 mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4x.dscclk_khz / 1000.0;
10492 DML_LOG_VERBOSE("DML::%s: k=%d stream_index=%d, mode_lib->mp.DSCCLK = %f\n", __func__, k, stream_index, mode_lib->mp.DSCCLK[k]);
10493 }
10494
10495 mode_lib->mp.Dispclk = programming->min_clocks.dcn4x.dispclk_khz / 1000.0;
10496 mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4x.deepsleep_dcfclk_khz / 1000.0;
10497
10498 DML_ASSERT(mode_lib->mp.Dcfclk > 0);
10499 DML_ASSERT(mode_lib->mp.FabricClock > 0);
10500 DML_ASSERT(mode_lib->mp.dram_bw_mbps > 0);
10501 DML_ASSERT(mode_lib->mp.uclk_freq_mhz > 0);
10502 DML_ASSERT(mode_lib->mp.GlobalDPPCLK > 0);
10503 DML_ASSERT(mode_lib->mp.Dispclk > 0);
10504 DML_ASSERT(mode_lib->mp.DCFCLKDeepSleep > 0);
10505 DML_ASSERT(s->SOCCLK > 0);
10506
10507 #ifdef __DML_VBA_DEBUG__
10508 DML_LOG_VERBOSE("DML::%s: num_active_planes = %u\n", __func__, s->num_active_planes);
10509 DML_LOG_VERBOSE("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes);
10510 DML_LOG_VERBOSE("DML::%s: Dcfclk = %f\n", __func__, mode_lib->mp.Dcfclk);
10511 DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, mode_lib->mp.FabricClock);
10512 DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->mp.dram_bw_mbps);
10513 DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->mp.uclk_freq_mhz);
10514 DML_LOG_VERBOSE("DML::%s: Dispclk = %f\n", __func__, mode_lib->mp.Dispclk);
10515 for (k = 0; k < s->num_active_planes; ++k) {
10516 DML_LOG_VERBOSE("DML::%s: Dppclk[%0d] = %f\n", __func__, k, mode_lib->mp.Dppclk[k]);
10517 }
10518 DML_LOG_VERBOSE("DML::%s: GlobalDPPCLK = %f\n", __func__, mode_lib->mp.GlobalDPPCLK);
10519 DML_LOG_VERBOSE("DML::%s: DCFCLKDeepSleep = %f\n", __func__, mode_lib->mp.DCFCLKDeepSleep);
10520 DML_LOG_VERBOSE("DML::%s: SOCCLK = %f\n", __func__, s->SOCCLK);
10521 DML_LOG_VERBOSE("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index);
10522 DML_LOG_VERBOSE("DML::%s: min_clk_table min_fclk_khz = %ld\n", __func__, min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz);
10523 if (min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_uclk_khz)
10524 DML_LOG_VERBOSE("DML::%s: min_clk_table uclk_mhz = %f\n", __func__, min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_uclk_khz / 1000.0);
10525 else
10526 DML_LOG_VERBOSE("DML::%s: min_clk_table uclk_mhz = %f\n", __func__, dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config));
10527 for (k = 0; k < mode_lib->mp.num_active_pipes; ++k) {
10528 DML_LOG_VERBOSE("DML::%s: pipe=%d is in plane=%d\n", __func__, k, mode_lib->mp.pipe_plane[k]);
10529 DML_LOG_VERBOSE("DML::%s: Per-plane DPPPerSurface[%0d] = %d\n", __func__, k, mode_lib->mp.NoOfDPP[k]);
10530 }
10531
10532 for (k = 0; k < s->num_active_planes; k++)
10533 DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
10534 #endif
10535
10536 CalculateMaxDETAndMinCompressedBufferSize(
10537 mode_lib->ip.config_return_buffer_size_in_kbytes,
10538 mode_lib->ip.config_return_buffer_segment_size_in_kbytes,
10539 mode_lib->ip.rob_buffer_size_kbytes,
10540 mode_lib->ip.max_num_dpp,
10541 display_cfg->overrides.hw.force_nom_det_size_kbytes.enable,
10542 display_cfg->overrides.hw.force_nom_det_size_kbytes.value,
10543 mode_lib->ip.dcn_mrq_present,
10544
10545 /* Output */
10546 &s->MaxTotalDETInKByte,
10547 &s->NomDETInKByte,
10548 &s->MinCompressedBufferSizeInKByte);
10549
10550
10551 PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg, mode_lib->ip.ptoi_supported, s->PixelClockBackEnd);
10552
10553 for (k = 0; k < s->num_active_planes; ++k) {
10554 CalculateSinglePipeDPPCLKAndSCLThroughput(
10555 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
10556 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
10557 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
10558 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
10559 mode_lib->ip.max_dchub_pscl_bw_pix_per_clk,
10560 mode_lib->ip.max_pscl_lb_bw_pix_per_clk,
10561 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
10562 display_cfg->plane_descriptors[k].pixel_format,
10563 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps,
10564 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps,
10565 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps,
10566 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps,
10567
10568 /* Output */
10569 &mode_lib->mp.PSCL_THROUGHPUT[k],
10570 &mode_lib->mp.PSCL_THROUGHPUT_CHROMA[k],
10571 &mode_lib->mp.DPPCLKUsingSingleDPP[k]);
10572 }
10573
10574 for (k = 0; k < s->num_active_planes; ++k) {
10575 CalculateBytePerPixelAndBlockSizes(
10576 display_cfg->plane_descriptors[k].pixel_format,
10577 display_cfg->plane_descriptors[k].surface.tiling,
10578 display_cfg->plane_descriptors[k].surface.plane0.pitch,
10579 display_cfg->plane_descriptors[k].surface.plane1.pitch,
10580
10581 // Output
10582 &mode_lib->mp.BytePerPixelY[k],
10583 &mode_lib->mp.BytePerPixelC[k],
10584 &mode_lib->mp.BytePerPixelInDETY[k],
10585 &mode_lib->mp.BytePerPixelInDETC[k],
10586 &mode_lib->mp.Read256BlockHeightY[k],
10587 &mode_lib->mp.Read256BlockHeightC[k],
10588 &mode_lib->mp.Read256BlockWidthY[k],
10589 &mode_lib->mp.Read256BlockWidthC[k],
10590 &mode_lib->mp.MacroTileHeightY[k],
10591 &mode_lib->mp.MacroTileHeightC[k],
10592 &mode_lib->mp.MacroTileWidthY[k],
10593 &mode_lib->mp.MacroTileWidthC[k],
10594 &mode_lib->mp.surf_linear128_l[k],
10595 &mode_lib->mp.surf_linear128_c[k]);
10596 }
10597
10598 CalculateSwathWidth(
10599 display_cfg,
10600 false, // ForceSingleDPP
10601 s->num_active_planes,
10602 mode_lib->mp.ODMMode,
10603 mode_lib->mp.BytePerPixelY,
10604 mode_lib->mp.BytePerPixelC,
10605 mode_lib->mp.Read256BlockHeightY,
10606 mode_lib->mp.Read256BlockHeightC,
10607 mode_lib->mp.Read256BlockWidthY,
10608 mode_lib->mp.Read256BlockWidthC,
10609 mode_lib->mp.surf_linear128_l,
10610 mode_lib->mp.surf_linear128_c,
10611 mode_lib->mp.NoOfDPP,
10612
10613 /* Output */
10614 mode_lib->mp.req_per_swath_ub_l,
10615 mode_lib->mp.req_per_swath_ub_c,
10616 mode_lib->mp.SwathWidthSingleDPPY,
10617 mode_lib->mp.SwathWidthSingleDPPC,
10618 mode_lib->mp.SwathWidthY,
10619 mode_lib->mp.SwathWidthC,
10620 s->dummy_integer_array[0], // unsigned int MaximumSwathHeightY[]
10621 s->dummy_integer_array[1], // unsigned int MaximumSwathHeightC[]
10622 mode_lib->mp.swath_width_luma_ub,
10623 mode_lib->mp.swath_width_chroma_ub);
10624
10625 for (k = 0; k < s->num_active_planes; ++k) {
10626 mode_lib->mp.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width * display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 /
10627 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000));
10628 mode_lib->mp.vactive_sw_bw_l[k] = mode_lib->mp.SwathWidthSingleDPPY[k] * mode_lib->mp.BytePerPixelY[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
10629 mode_lib->mp.vactive_sw_bw_c[k] = mode_lib->mp.SwathWidthSingleDPPC[k] * mode_lib->mp.BytePerPixelC[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
10630 DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_l[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]);
10631 DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_c[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]);
10632 }
10633
10634 CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg;
10635 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ip.config_return_buffer_size_in_kbytes;
10636 CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = s->MaxTotalDETInKByte;
10637 CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = s->MinCompressedBufferSizeInKByte;
10638 CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
10639 CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
10640 CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
10641 CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
10642 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false;
10643 CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = s->num_active_planes;
10644 CalculateSwathAndDETConfiguration_params->nomDETInKByte = s->NomDETInKByte;
10645 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes;
10646 CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes;
10647 CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->mp.vactive_sw_bw_l;
10648 CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->mp.vactive_sw_bw_c;
10649 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = s->dummy_single_array[0];
10650 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = s->dummy_single_array[1];
10651 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->mp.Read256BlockHeightY;
10652 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->mp.Read256BlockHeightC;
10653 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->mp.Read256BlockWidthY;
10654 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->mp.Read256BlockWidthC;
10655 CalculateSwathAndDETConfiguration_params->surf_linear128_l = mode_lib->mp.surf_linear128_l;
10656 CalculateSwathAndDETConfiguration_params->surf_linear128_c = mode_lib->mp.surf_linear128_c;
10657 CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->mp.ODMMode;
10658 CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->mp.NoOfDPP;
10659 CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->mp.BytePerPixelY;
10660 CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->mp.BytePerPixelC;
10661 CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->mp.BytePerPixelInDETY;
10662 CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->mp.BytePerPixelInDETC;
10663 CalculateSwathAndDETConfiguration_params->mrq_present = mode_lib->ip.dcn_mrq_present;
10664
10665 // output
10666 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = mode_lib->mp.req_per_swath_ub_l;
10667 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = mode_lib->mp.req_per_swath_ub_c;
10668 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_long_array[0];
10669 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_long_array[1];
10670 CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_long_array[2];
10671 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_long_array[3];
10672 CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->mp.SwathHeightY;
10673 CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->mp.SwathHeightC;
10674 CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = mode_lib->mp.request_size_bytes_luma;
10675 CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = mode_lib->mp.request_size_bytes_chroma;
10676 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->mp.DETBufferSizeInKByte;
10677 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY;
10678 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC;
10679 CalculateSwathAndDETConfiguration_params->full_swath_bytes_l = s->full_swath_bytes_l;
10680 CalculateSwathAndDETConfiguration_params->full_swath_bytes_c = s->full_swath_bytes_c;
10681 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->mp.UnboundedRequestEnabled;
10682 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &mode_lib->mp.compbuf_reserved_space_64b;
10683 CalculateSwathAndDETConfiguration_params->hw_debug5 = &mode_lib->mp.hw_debug5;
10684 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->mp.CompressedBufferSizeInkByte;
10685 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = &s->dummy_boolean_array[0][0];
10686 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[0];
10687
10688 // Calculate DET size, swath height here.
10689 CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params);
10690
10691 // DSC Delay
10692 for (k = 0; k < s->num_active_planes; ++k) {
10693 mode_lib->mp.DSCDelay[k] = DSCDelayRequirement(cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].dsc_enable,
10694 mode_lib->mp.ODMMode[k],
10695 mode_lib->ip.maximum_dsc_bits_per_component,
10696 s->OutputBpp[k],
10697 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
10698 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
10699 cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].num_dsc_slices,
10700 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
10701 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
10702 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
10703 s->PixelClockBackEnd[k]);
10704 }
10705
10706 // Prefetch
10707 if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0) {
10708 for (k = 0; k < s->num_active_planes; ++k)
10709 mode_lib->mp.SurfaceSizeInTheMALL[k] = 0;
10710 } else {
10711 CalculateSurfaceSizeInMall(
10712 display_cfg,
10713 s->num_active_planes,
10714 mode_lib->soc.mall_allocated_for_dcn_mbytes,
10715 mode_lib->mp.BytePerPixelY,
10716 mode_lib->mp.BytePerPixelC,
10717 mode_lib->mp.Read256BlockWidthY,
10718 mode_lib->mp.Read256BlockWidthC,
10719 mode_lib->mp.Read256BlockHeightY,
10720 mode_lib->mp.Read256BlockHeightC,
10721 mode_lib->mp.MacroTileWidthY,
10722 mode_lib->mp.MacroTileWidthC,
10723 mode_lib->mp.MacroTileHeightY,
10724 mode_lib->mp.MacroTileHeightC,
10725
10726 /* Output */
10727 mode_lib->mp.SurfaceSizeInTheMALL,
10728 &s->dummy_boolean[0]); /* bool *ExceededMALLSize */
10729 }
10730
10731 for (k = 0; k < s->num_active_planes; ++k) {
10732 s->SurfaceParameters[k].PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
10733 s->SurfaceParameters[k].DPPPerSurface = mode_lib->mp.NoOfDPP[k];
10734 s->SurfaceParameters[k].RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
10735 s->SurfaceParameters[k].ViewportHeight = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
10736 s->SurfaceParameters[k].ViewportHeightC = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
10737 s->SurfaceParameters[k].BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY[k];
10738 s->SurfaceParameters[k].BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY[k];
10739 s->SurfaceParameters[k].BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC[k];
10740 s->SurfaceParameters[k].BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC[k];
10741 s->SurfaceParameters[k].BlockWidthY = mode_lib->mp.MacroTileWidthY[k];
10742 s->SurfaceParameters[k].BlockHeightY = mode_lib->mp.MacroTileHeightY[k];
10743 s->SurfaceParameters[k].BlockWidthC = mode_lib->mp.MacroTileWidthC[k];
10744 s->SurfaceParameters[k].BlockHeightC = mode_lib->mp.MacroTileHeightC[k];
10745 s->SurfaceParameters[k].InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
10746 s->SurfaceParameters[k].HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
10747 s->SurfaceParameters[k].DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
10748 s->SurfaceParameters[k].SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
10749 s->SurfaceParameters[k].SurfaceTiling = display_cfg->plane_descriptors[k].surface.tiling;
10750 s->SurfaceParameters[k].BytePerPixelY = mode_lib->mp.BytePerPixelY[k];
10751 s->SurfaceParameters[k].BytePerPixelC = mode_lib->mp.BytePerPixelC[k];
10752 s->SurfaceParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
10753 s->SurfaceParameters[k].VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
10754 s->SurfaceParameters[k].VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
10755 s->SurfaceParameters[k].VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
10756 s->SurfaceParameters[k].VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
10757 s->SurfaceParameters[k].PitchY = display_cfg->plane_descriptors[k].surface.plane0.pitch;
10758 s->SurfaceParameters[k].PitchC = display_cfg->plane_descriptors[k].surface.plane1.pitch;
10759 s->SurfaceParameters[k].ViewportStationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
10760 s->SurfaceParameters[k].ViewportXStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
10761 s->SurfaceParameters[k].ViewportYStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
10762 s->SurfaceParameters[k].ViewportXStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
10763 s->SurfaceParameters[k].ViewportYStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
10764 s->SurfaceParameters[k].FORCE_ONE_ROW_FOR_FRAME = display_cfg->plane_descriptors[k].overrides.hw.force_one_row_for_frame;
10765 s->SurfaceParameters[k].SwathHeightY = mode_lib->mp.SwathHeightY[k];
10766 s->SurfaceParameters[k].SwathHeightC = mode_lib->mp.SwathHeightC[k];
10767 s->SurfaceParameters[k].DCCMetaPitchY = display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch;
10768 s->SurfaceParameters[k].DCCMetaPitchC = display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch;
10769 }
10770
10771 CalculateVMRowAndSwath_params->display_cfg = display_cfg;
10772 CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = s->num_active_planes;
10773 CalculateVMRowAndSwath_params->myPipe = s->SurfaceParameters;
10774 CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->mp.SurfaceSizeInTheMALL;
10775 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
10776 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma;
10777 CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes;
10778 CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->mp.SwathWidthY;
10779 CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->mp.SwathWidthC;
10780 CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes * 1024;
10781 CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes;
10782 CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present;
10783
10784 // output
10785 CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0];
10786 CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = mode_lib->mp.dpte_row_width_luma_ub;
10787 CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = mode_lib->mp.dpte_row_width_chroma_ub;
10788 CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->mp.dpte_row_height;
10789 CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->mp.dpte_row_height_chroma;
10790 CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = mode_lib->mp.dpte_row_height_linear;
10791 CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = mode_lib->mp.dpte_row_height_linear_chroma;
10792 CalculateVMRowAndSwath_params->vm_group_bytes = mode_lib->mp.vm_group_bytes;
10793 CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes;
10794 CalculateVMRowAndSwath_params->PixelPTEReqWidthY = mode_lib->mp.PixelPTEReqWidthY;
10795 CalculateVMRowAndSwath_params->PixelPTEReqHeightY = mode_lib->mp.PixelPTEReqHeightY;
10796 CalculateVMRowAndSwath_params->PTERequestSizeY = mode_lib->mp.PTERequestSizeY;
10797 CalculateVMRowAndSwath_params->PixelPTEReqWidthC = mode_lib->mp.PixelPTEReqWidthC;
10798 CalculateVMRowAndSwath_params->PixelPTEReqHeightC = mode_lib->mp.PixelPTEReqHeightC;
10799 CalculateVMRowAndSwath_params->PTERequestSizeC = mode_lib->mp.PTERequestSizeC;
10800 CalculateVMRowAndSwath_params->vmpg_width_y = s->vmpg_width_y;
10801 CalculateVMRowAndSwath_params->vmpg_height_y = s->vmpg_height_y;
10802 CalculateVMRowAndSwath_params->vmpg_width_c = s->vmpg_width_c;
10803 CalculateVMRowAndSwath_params->vmpg_height_c = s->vmpg_height_c;
10804 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = mode_lib->mp.dpde0_bytes_per_frame_ub_l;
10805 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = mode_lib->mp.dpde0_bytes_per_frame_ub_c;
10806 CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->mp.PrefetchSourceLinesY;
10807 CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->mp.PrefetchSourceLinesC;
10808 CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->mp.VInitPreFillY;
10809 CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->mp.VInitPreFillC;
10810 CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->mp.MaxNumSwathY;
10811 CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->mp.MaxNumSwathC;
10812 CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->mp.dpte_row_bw;
10813 CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->mp.PixelPTEBytesPerRow;
10814 CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_l = s->dpte_row_bytes_per_row_l;
10815 CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_c = s->dpte_row_bytes_per_row_c;
10816 CalculateVMRowAndSwath_params->vm_bytes = mode_lib->mp.vm_bytes;
10817 CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->mp.use_one_row_for_frame;
10818 CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->mp.use_one_row_for_frame_flip;
10819 CalculateVMRowAndSwath_params->is_using_mall_for_ss = mode_lib->mp.is_using_mall_for_ss;
10820 CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = mode_lib->mp.PTE_BUFFER_MODE;
10821 CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = mode_lib->mp.BIGK_FRAGMENT_SIZE;
10822 CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = s->dummy_boolean_array[1];
10823 CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->mp.meta_row_bw;
10824 CalculateVMRowAndSwath_params->meta_row_bytes = mode_lib->mp.meta_row_bytes;
10825 CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l;
10826 CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c;
10827 CalculateVMRowAndSwath_params->meta_req_width_luma = mode_lib->mp.meta_req_width;
10828 CalculateVMRowAndSwath_params->meta_req_height_luma = mode_lib->mp.meta_req_height;
10829 CalculateVMRowAndSwath_params->meta_row_width_luma = mode_lib->mp.meta_row_width;
10830 CalculateVMRowAndSwath_params->meta_row_height_luma = mode_lib->mp.meta_row_height;
10831 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = mode_lib->mp.meta_pte_bytes_per_frame_ub_l;
10832 CalculateVMRowAndSwath_params->meta_req_width_chroma = mode_lib->mp.meta_req_width_chroma;
10833 CalculateVMRowAndSwath_params->meta_row_height_chroma = mode_lib->mp.meta_row_height_chroma;
10834 CalculateVMRowAndSwath_params->meta_row_width_chroma = mode_lib->mp.meta_row_width_chroma;
10835 CalculateVMRowAndSwath_params->meta_req_height_chroma = mode_lib->mp.meta_req_height_chroma;
10836 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = mode_lib->mp.meta_pte_bytes_per_frame_ub_c;
10837
10838 CalculateVMRowAndSwath(&mode_lib->scratch, CalculateVMRowAndSwath_params);
10839
10840 memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params));
10841 if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0 || mode_lib->ip.dcn_mrq_present) {
10842 for (k = 0; k < s->num_active_planes; k++) {
10843 mode_lib->mp.mall_prefetch_sdp_overhead_factor[k] = 1.0;
10844 mode_lib->mp.mall_prefetch_dram_overhead_factor[k] = 1.0;
10845 mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0[k] = 1.0;
10846 mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0[k] = 1.0;
10847 mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1[k] = 1.0;
10848 mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1[k] = 1.0;
10849 }
10850 } else {
10851 for (k = 0; k < s->num_active_planes; k++) {
10852 calculate_mcache_setting_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
10853 calculate_mcache_setting_params->num_chans = mode_lib->soc.clk_table.dram_config.channel_count;
10854 calculate_mcache_setting_params->mem_word_bytes = mode_lib->soc.mem_word_bytes;
10855 calculate_mcache_setting_params->mcache_size_bytes = mode_lib->soc.mcache_size_bytes;
10856 calculate_mcache_setting_params->mcache_line_size_bytes = mode_lib->soc.mcache_line_size_bytes;
10857 calculate_mcache_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
10858 calculate_mcache_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
10859
10860 calculate_mcache_setting_params->source_format = display_cfg->plane_descriptors[k].pixel_format;
10861 calculate_mcache_setting_params->surf_vert = dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle);
10862 calculate_mcache_setting_params->vp_stationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
10863 calculate_mcache_setting_params->tiling_mode = display_cfg->plane_descriptors[k].surface.tiling;
10864 calculate_mcache_setting_params->imall_enable = mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall;
10865
10866 calculate_mcache_setting_params->vp_start_x_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
10867 calculate_mcache_setting_params->vp_start_y_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
10868 calculate_mcache_setting_params->full_vp_width_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
10869 calculate_mcache_setting_params->full_vp_height_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
10870 calculate_mcache_setting_params->blk_width_l = mode_lib->mp.MacroTileWidthY[k];
10871 calculate_mcache_setting_params->blk_height_l = mode_lib->mp.MacroTileHeightY[k];
10872 calculate_mcache_setting_params->vmpg_width_l = s->vmpg_width_y[k];
10873 calculate_mcache_setting_params->vmpg_height_l = s->vmpg_height_y[k];
10874 calculate_mcache_setting_params->full_swath_bytes_l = s->full_swath_bytes_l[k];
10875 calculate_mcache_setting_params->bytes_per_pixel_l = mode_lib->mp.BytePerPixelY[k];
10876
10877 calculate_mcache_setting_params->vp_start_x_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
10878 calculate_mcache_setting_params->vp_start_y_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
10879 calculate_mcache_setting_params->full_vp_width_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.width;
10880 calculate_mcache_setting_params->full_vp_height_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
10881 calculate_mcache_setting_params->blk_width_c = mode_lib->mp.MacroTileWidthC[k];
10882 calculate_mcache_setting_params->blk_height_c = mode_lib->mp.MacroTileHeightC[k];
10883 calculate_mcache_setting_params->vmpg_width_c = s->vmpg_width_c[k];
10884 calculate_mcache_setting_params->vmpg_height_c = s->vmpg_height_c[k];
10885 calculate_mcache_setting_params->full_swath_bytes_c = s->full_swath_bytes_c[k];
10886 calculate_mcache_setting_params->bytes_per_pixel_c = mode_lib->mp.BytePerPixelC[k];
10887
10888 // output
10889 calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_l = &mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0[k];
10890 calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_l = &mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0[k];
10891 calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_c = &mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1[k];
10892 calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_c = &mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1[k];
10893
10894 calculate_mcache_setting_params->num_mcaches_l = &mode_lib->mp.num_mcaches_l[k];
10895 calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->mp.mcache_row_bytes_l[k];
10896 calculate_mcache_setting_params->mcache_row_bytes_per_channel_l = &mode_lib->mp.mcache_row_bytes_per_channel_l[k];
10897 calculate_mcache_setting_params->mcache_offsets_l = mode_lib->mp.mcache_offsets_l[k];
10898 calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->mp.mcache_shift_granularity_l[k];
10899
10900 calculate_mcache_setting_params->num_mcaches_c = &mode_lib->mp.num_mcaches_c[k];
10901 calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->mp.mcache_row_bytes_c[k];
10902 calculate_mcache_setting_params->mcache_row_bytes_per_channel_c = &mode_lib->mp.mcache_row_bytes_per_channel_c[k];
10903 calculate_mcache_setting_params->mcache_offsets_c = mode_lib->mp.mcache_offsets_c[k];
10904 calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->mp.mcache_shift_granularity_c[k];
10905
10906 calculate_mcache_setting_params->mall_comb_mcache_l = &mode_lib->mp.mall_comb_mcache_l[k];
10907 calculate_mcache_setting_params->mall_comb_mcache_c = &mode_lib->mp.mall_comb_mcache_c[k];
10908 calculate_mcache_setting_params->lc_comb_mcache = &mode_lib->mp.lc_comb_mcache[k];
10909 calculate_mcache_setting(&mode_lib->scratch, calculate_mcache_setting_params);
10910 }
10911
10912 calculate_mall_bw_overhead_factor(
10913 mode_lib->mp.mall_prefetch_sdp_overhead_factor,
10914 mode_lib->mp.mall_prefetch_dram_overhead_factor,
10915
10916 // input
10917 display_cfg,
10918 s->num_active_planes);
10919 }
10920
10921 // Calculate all the bandwidth availabe
10922 calculate_bandwidth_available(
10923 mode_lib->mp.avg_bandwidth_available_min,
10924 mode_lib->mp.avg_bandwidth_available,
10925 mode_lib->mp.urg_bandwidth_available_min,
10926 mode_lib->mp.urg_bandwidth_available,
10927 mode_lib->mp.urg_bandwidth_available_vm_only,
10928 mode_lib->mp.urg_bandwidth_available_pixel_and_vm,
10929
10930 &mode_lib->soc,
10931 display_cfg->hostvm_enable,
10932 mode_lib->mp.Dcfclk,
10933 mode_lib->mp.FabricClock,
10934 mode_lib->mp.dram_bw_mbps);
10935
10936
10937 calculate_hostvm_inefficiency_factor(
10938 &s->HostVMInefficiencyFactor,
10939 &s->HostVMInefficiencyFactorPrefetch,
10940
10941 display_cfg->gpuvm_enable,
10942 display_cfg->hostvm_enable,
10943 mode_lib->ip.remote_iommu_outstanding_translations,
10944 mode_lib->soc.max_outstanding_reqs,
10945 mode_lib->mp.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active],
10946 mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]);
10947
10948 s->TotalDCCActiveDPP = 0;
10949 s->TotalActiveDPP = 0;
10950 for (k = 0; k < s->num_active_planes; ++k) {
10951 s->TotalActiveDPP = s->TotalActiveDPP + mode_lib->mp.NoOfDPP[k];
10952 if (display_cfg->plane_descriptors[k].surface.dcc.enable)
10953 s->TotalDCCActiveDPP = s->TotalDCCActiveDPP + mode_lib->mp.NoOfDPP[k];
10954 }
10955 // Calculate tdlut schedule related terms
10956 for (k = 0; k <= s->num_active_planes - 1; k++) {
10957 calculate_tdlut_setting_params->dispclk_mhz = mode_lib->mp.Dispclk;
10958 calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
10959 calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode;
10960 calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode;
10961 calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size;
10962 calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
10963 calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
10964
10965 // output
10966 calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k];
10967 calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k];
10968 calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k];
10969 calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k];
10970 calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k];
10971 calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k];
10972 calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k];
10973 calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params);
10974 }
10975
10976 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3)
10977 s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes,
10978 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes,
10979 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes));
10980
10981 CalculateExtraLatency(
10982 display_cfg,
10983 mode_lib->ip.rob_buffer_size_kbytes,
10984 mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles,
10985 s->ReorderingBytes,
10986 mode_lib->mp.Dcfclk,
10987 mode_lib->mp.FabricClock,
10988 mode_lib->ip.pixel_chunk_size_kbytes,
10989 mode_lib->mp.urg_bandwidth_available_min[dml2_core_internal_soc_state_sys_active],
10990 s->num_active_planes,
10991 mode_lib->mp.NoOfDPP,
10992 mode_lib->mp.dpte_group_bytes,
10993 s->tdlut_bytes_per_group,
10994 s->HostVMInefficiencyFactor,
10995 s->HostVMInefficiencyFactorPrefetch,
10996 mode_lib->soc.hostvm_min_page_size_kbytes * 1024,
10997 mode_lib->soc.qos_parameters.qos_type,
10998 !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable),
10999 mode_lib->soc.max_outstanding_reqs,
11000 mode_lib->mp.request_size_bytes_luma,
11001 mode_lib->mp.request_size_bytes_chroma,
11002 mode_lib->ip.meta_chunk_size_kbytes,
11003 mode_lib->ip.dchub_arb_to_ret_delay,
11004 mode_lib->mp.TripToMemory,
11005 mode_lib->ip.hostvm_mode,
11006
11007 // output
11008 &mode_lib->mp.ExtraLatency,
11009 &mode_lib->mp.ExtraLatency_sr,
11010 &mode_lib->mp.ExtraLatencyPrefetch);
11011
11012 mode_lib->mp.TCalc = 24.0 / mode_lib->mp.DCFCLKDeepSleep;
11013
11014 for (k = 0; k < s->num_active_planes; ++k) {
11015 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
11016 mode_lib->mp.WritebackDelay[k] =
11017 mode_lib->soc.qos_parameters.writeback.base_latency_us
11018 + CalculateWriteBackDelay(
11019 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format,
11020 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio,
11021 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio,
11022 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps,
11023 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width,
11024 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height,
11025 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height,
11026 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->mp.Dispclk;
11027 } else
11028 mode_lib->mp.WritebackDelay[k] = 0;
11029 }
11030
11031 /* VActive bytes to fetch for UCLK P-State */
11032 calculate_bytes_to_fetch_required_to_hide_latency_params->display_cfg = display_cfg;
11033 calculate_bytes_to_fetch_required_to_hide_latency_params->mrq_present = mode_lib->ip.dcn_mrq_present;
11034
11035 calculate_bytes_to_fetch_required_to_hide_latency_params->num_active_planes = s->num_active_planes;
11036 calculate_bytes_to_fetch_required_to_hide_latency_params->num_of_dpp = mode_lib->mp.NoOfDPP;
11037 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_l = mode_lib->mp.meta_row_height;
11038 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_c = mode_lib->mp.meta_row_height_chroma;
11039 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l;
11040 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c;
11041 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_l = mode_lib->mp.dpte_row_height;
11042 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_c = mode_lib->mp.dpte_row_height_chroma;
11043 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_l = s->dpte_row_bytes_per_row_l;
11044 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_c = s->dpte_row_bytes_per_row_c;
11045 calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_l = mode_lib->mp.BytePerPixelY;
11046 calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_c = mode_lib->mp.BytePerPixelC;
11047 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_l = mode_lib->mp.SwathWidthY;
11048 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_c = mode_lib->mp.SwathWidthC;
11049 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_l = mode_lib->mp.SwathHeightY;
11050 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_c = mode_lib->mp.SwathHeightC;
11051 calculate_bytes_to_fetch_required_to_hide_latency_params->latency_to_hide_us[0] = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
11052
11053 /* outputs */
11054 calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_l = s->pstate_bytes_required_l[dml2_pstate_type_uclk];
11055 calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_c = s->pstate_bytes_required_c[dml2_pstate_type_uclk];
11056
11057 calculate_bytes_to_fetch_required_to_hide_latency(calculate_bytes_to_fetch_required_to_hide_latency_params);
11058
11059 /* Excess VActive bandwidth required to fill DET */
11060 calculate_excess_vactive_bandwidth_required(
11061 display_cfg,
11062 s->num_active_planes,
11063 s->pstate_bytes_required_l[dml2_pstate_type_uclk],
11064 s->pstate_bytes_required_c[dml2_pstate_type_uclk],
11065 /* outputs */
11066 mode_lib->mp.excess_vactive_fill_bw_l,
11067 mode_lib->mp.excess_vactive_fill_bw_c);
11068
11069 mode_lib->mp.UrgentLatency = CalculateUrgentLatency(
11070 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us,
11071 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us,
11072 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us,
11073 mode_lib->soc.do_urgent_latency_adjustment,
11074 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us,
11075 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz,
11076 mode_lib->mp.FabricClock,
11077 mode_lib->mp.uclk_freq_mhz,
11078 mode_lib->soc.qos_parameters.qos_type,
11079 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles,
11080 mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles,
11081 mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
11082 mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
11083 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin,
11084 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
11085
11086 mode_lib->mp.TripToMemory = CalculateTripToMemory(
11087 mode_lib->mp.UrgentLatency,
11088 mode_lib->mp.FabricClock,
11089 mode_lib->mp.uclk_freq_mhz,
11090 mode_lib->soc.qos_parameters.qos_type,
11091 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles,
11092 mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
11093 mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
11094 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin,
11095 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
11096
11097 mode_lib->mp.TripToMemory = math_max2(mode_lib->mp.UrgentLatency, mode_lib->mp.TripToMemory);
11098
11099 mode_lib->mp.MetaTripToMemory = CalculateMetaTripToMemory(
11100 mode_lib->mp.UrgentLatency,
11101 mode_lib->mp.FabricClock,
11102 mode_lib->mp.uclk_freq_mhz,
11103 mode_lib->soc.qos_parameters.qos_type,
11104 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles,
11105 mode_lib->soc.qos_parameters.qos_params.dcn4x.meta_trip_adder_fclk_cycles,
11106 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin,
11107 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
11108
11109 for (k = 0; k < s->num_active_planes; ++k) {
11110 bool cursor_not_enough_urgent_latency_hiding = false;
11111 s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
11112 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
11113
11114 s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format;
11115
11116 s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
11117 mode_lib->mp.NoOfDPP[k],
11118 display_cfg->plane_descriptors[k].composition.viewport.plane0.width,
11119 display_cfg->plane_descriptors[k].composition.viewport.plane0.height,
11120 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
11121 display_cfg->plane_descriptors[k].composition.rotation_angle);
11122
11123 s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
11124 mode_lib->mp.NoOfDPP[k],
11125 display_cfg->plane_descriptors[k].composition.viewport.plane1.width,
11126 display_cfg->plane_descriptors[k].composition.viewport.plane1.height,
11127 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
11128 display_cfg->plane_descriptors[k].composition.rotation_angle);
11129
11130 if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) {
11131 calculate_cursor_req_attributes(
11132 display_cfg->plane_descriptors[k].cursor.cursor_width,
11133 display_cfg->plane_descriptors[k].cursor.cursor_bpp,
11134
11135 // output
11136 &s->cursor_lines_per_chunk[k],
11137 &s->cursor_bytes_per_line[k],
11138 &s->cursor_bytes_per_chunk[k],
11139 &s->cursor_bytes[k]);
11140
11141 calculate_cursor_urgent_burst_factor(
11142 mode_lib->ip.cursor_buffer_size,
11143 display_cfg->plane_descriptors[k].cursor.cursor_width,
11144 s->cursor_bytes_per_chunk[k],
11145 s->cursor_lines_per_chunk[k],
11146 s->line_times[k],
11147 mode_lib->mp.UrgentLatency,
11148
11149 // output
11150 &mode_lib->mp.UrgentBurstFactorCursor[k],
11151 &cursor_not_enough_urgent_latency_hiding);
11152 }
11153 mode_lib->mp.UrgentBurstFactorCursorPre[k] = mode_lib->mp.UrgentBurstFactorCursor[k];
11154
11155 CalculateUrgentBurstFactor(
11156 &display_cfg->plane_descriptors[k],
11157 mode_lib->mp.swath_width_luma_ub[k],
11158 mode_lib->mp.swath_width_chroma_ub[k],
11159 mode_lib->mp.SwathHeightY[k],
11160 mode_lib->mp.SwathHeightC[k],
11161 s->line_times[k],
11162 mode_lib->mp.UrgentLatency,
11163 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
11164 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
11165 mode_lib->mp.BytePerPixelInDETY[k],
11166 mode_lib->mp.BytePerPixelInDETC[k],
11167 mode_lib->mp.DETBufferSizeY[k],
11168 mode_lib->mp.DETBufferSizeC[k],
11169
11170 /* output */
11171 &mode_lib->mp.UrgentBurstFactorLuma[k],
11172 &mode_lib->mp.UrgentBurstFactorChroma[k],
11173 &mode_lib->mp.NotEnoughUrgentLatencyHiding[k]);
11174
11175 mode_lib->mp.NotEnoughUrgentLatencyHiding[k] = mode_lib->mp.NotEnoughUrgentLatencyHiding[k] || cursor_not_enough_urgent_latency_hiding;
11176 }
11177
11178 for (k = 0; k < s->num_active_planes; ++k) {
11179 s->MaxVStartupLines[k] = CalculateMaxVStartup(
11180 mode_lib->ip.ptoi_supported,
11181 mode_lib->ip.vblank_nom_default_us,
11182 &display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing,
11183 mode_lib->mp.WritebackDelay[k]);
11184
11185 #ifdef __DML_VBA_DEBUG__
11186 DML_LOG_VERBOSE("DML::%s: k=%u MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
11187 DML_LOG_VERBOSE("DML::%s: k=%u WritebackDelay = %f\n", __func__, k, mode_lib->mp.WritebackDelay[k]);
11188 #endif
11189 }
11190
11191 s->immediate_flip_required = false;
11192 for (k = 0; k < s->num_active_planes; ++k) {
11193 s->immediate_flip_required = s->immediate_flip_required || display_cfg->plane_descriptors[k].immediate_flip;
11194 }
11195 #ifdef __DML_VBA_DEBUG__
11196 DML_LOG_VERBOSE("DML::%s: immediate_flip_required = %u\n", __func__, s->immediate_flip_required);
11197 #endif
11198
11199 if (s->num_active_planes > 1) {
11200 CheckGlobalPrefetchAdmissibility_params->num_active_planes = s->num_active_planes;
11201 CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format;
11202 CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
11203 CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
11204 CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l;
11205 CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c;
11206 CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->mp.SwathHeightY;
11207 CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->mp.SwathHeightC;
11208 CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
11209 CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->mp.CompressedBufferSizeInkByte;
11210 CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->mp.DETBufferSizeY;
11211 CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->mp.DETBufferSizeC;
11212 CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l;
11213 CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c;
11214 CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes;
11215 CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = 0; // don't care
11216 CheckGlobalPrefetchAdmissibility_params->Tpre_oto = 0; // don't care
11217 CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
11218 CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = mode_lib->mp.Dcfclk;
11219 CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times;
11220 CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->mp.dst_y_prefetch;
11221
11222 // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible
11223 CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->dummy_boolean[0];
11224 CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre;
11225 CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params); // dont care about the check output for mode programming
11226 }
11227
11228 {
11229 s->DestinationLineTimesForPrefetchLessThan2 = false;
11230 s->VRatioPrefetchMoreThanMax = false;
11231
11232 DML_LOG_VERBOSE("DML::%s: Start one iteration of prefetch schedule evaluation\n", __func__);
11233
11234 for (k = 0; k < s->num_active_planes; ++k) {
11235 struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe;
11236
11237 DML_LOG_VERBOSE("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
11238 mode_lib->mp.TWait[k] = CalculateTWait(
11239 display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns,
11240 mode_lib->mp.UrgentLatency,
11241 mode_lib->mp.TripToMemory,
11242 !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ?
11243 get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0);
11244
11245 myPipe->Dppclk = mode_lib->mp.Dppclk[k];
11246 myPipe->Dispclk = mode_lib->mp.Dispclk;
11247 myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
11248 myPipe->DCFClkDeepSleep = mode_lib->mp.DCFCLKDeepSleep;
11249 myPipe->DPPPerSurface = mode_lib->mp.NoOfDPP[k];
11250 myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled;
11251 myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
11252 myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
11253 myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
11254 myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
11255 myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
11256 myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored;
11257 myPipe->BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY[k];
11258 myPipe->BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY[k];
11259 myPipe->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC[k];
11260 myPipe->BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC[k];
11261 myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
11262 myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors;
11263 myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active;
11264 myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
11265 myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active;
11266 myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
11267 myPipe->ODMMode = mode_lib->mp.ODMMode[k];
11268 myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
11269 myPipe->BytePerPixelY = mode_lib->mp.BytePerPixelY[k];
11270 myPipe->BytePerPixelC = mode_lib->mp.BytePerPixelC[k];
11271 myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
11272
11273 #ifdef __DML_VBA_DEBUG__
11274 DML_LOG_VERBOSE("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k);
11275 #endif
11276 CalculatePrefetchSchedule_params->display_cfg = display_cfg;
11277 CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch;
11278 CalculatePrefetchSchedule_params->myPipe = myPipe;
11279 CalculatePrefetchSchedule_params->DSCDelay = mode_lib->mp.DSCDelay[k];
11280 CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter;
11281 CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl;
11282 CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only;
11283 CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor;
11284 CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal;
11285 CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->mp.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
11286 CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format;
11287 CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters;
11288 CalculatePrefetchSchedule_params->VStartup = s->MaxVStartupLines[k];
11289 CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes * 1024;
11290 CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable;
11291 CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled;
11292 CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required;
11293 CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes;
11294 CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->mp.ExtraLatencyPrefetch;
11295 CalculatePrefetchSchedule_params->TCalc = mode_lib->mp.TCalc;
11296 CalculatePrefetchSchedule_params->vm_bytes = mode_lib->mp.vm_bytes[k];
11297 CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->mp.PixelPTEBytesPerRow[k];
11298 CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->mp.PrefetchSourceLinesY[k];
11299 CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->mp.VInitPreFillY[k];
11300 CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->mp.MaxNumSwathY[k];
11301 CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->mp.PrefetchSourceLinesC[k];
11302 CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->mp.VInitPreFillC[k];
11303 CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->mp.MaxNumSwathC[k];
11304 CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->mp.swath_width_luma_ub[k];
11305 CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->mp.swath_width_chroma_ub[k];
11306 CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->mp.SwathHeightY[k];
11307 CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->mp.SwathHeightC[k];
11308 CalculatePrefetchSchedule_params->TWait = mode_lib->mp.TWait[k];
11309 CalculatePrefetchSchedule_params->Ttrip = mode_lib->mp.TripToMemory;
11310 CalculatePrefetchSchedule_params->Turg = mode_lib->mp.UrgentLatency;
11311 CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
11312 CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k];
11313 CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k];
11314 CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k];
11315 CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k];
11316 CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0);
11317 CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k];
11318 CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k];
11319 CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
11320 CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present;
11321 CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->mp.meta_row_bytes[k];
11322 CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor[k];
11323 CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k];
11324 CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->mp.vactive_sw_bw_l[k];
11325 CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->mp.vactive_sw_bw_c[k];
11326
11327 // output
11328 CalculatePrefetchSchedule_params->DSTXAfterScaler = &mode_lib->mp.DSTXAfterScaler[k];
11329 CalculatePrefetchSchedule_params->DSTYAfterScaler = &mode_lib->mp.DSTYAfterScaler[k];
11330 CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->mp.dst_y_prefetch[k];
11331 CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->mp.dst_y_per_vm_vblank[k];
11332 CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->mp.dst_y_per_row_vblank[k];
11333 CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->mp.VRatioPrefetchY[k];
11334 CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->mp.VRatioPrefetchC[k];
11335 CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k];
11336 CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k];
11337 CalculatePrefetchSchedule_params->RequiredPrefetchBWMax = &s->dummy_single_array[0][k];
11338 CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->mp.NotEnoughTimeForDynamicMetadata[k];
11339 CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->mp.Tno_bw[k];
11340 CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->mp.Tno_bw_flip[k];
11341 CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->mp.prefetch_vmrow_bw[k];
11342 CalculatePrefetchSchedule_params->Tdmdl_vm = &mode_lib->mp.Tdmdl_vm[k];
11343 CalculatePrefetchSchedule_params->Tdmdl = &mode_lib->mp.Tdmdl[k];
11344 CalculatePrefetchSchedule_params->TSetup = &mode_lib->mp.TSetup[k];
11345 CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k];
11346 CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k];
11347 CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k];
11348 CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k];
11349 CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k];
11350 CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k];
11351 CalculatePrefetchSchedule_params->VUpdateOffsetPix = &mode_lib->mp.VUpdateOffsetPix[k];
11352 CalculatePrefetchSchedule_params->VUpdateWidthPix = &mode_lib->mp.VUpdateWidthPix[k];
11353 CalculatePrefetchSchedule_params->VReadyOffsetPix = &mode_lib->mp.VReadyOffsetPix[k];
11354 CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->mp.prefetch_cursor_bw[k];
11355 CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k];
11356 CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k];
11357 CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k];
11358 CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->dummy_single[0];
11359
11360 mode_lib->mp.NoTimeToPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params);
11361
11362 if (s->impacted_dst_y_pre[k] > 0)
11363 mode_lib->mp.impacted_prefetch_margin_us[k] = (mode_lib->mp.dst_y_prefetch[k] - s->impacted_dst_y_pre[k]) * s->line_times[k];
11364 else
11365 mode_lib->mp.impacted_prefetch_margin_us[k] = 0;
11366
11367 #ifdef __DML_VBA_DEBUG__
11368 DML_LOG_VERBOSE("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]);
11369 #endif
11370 mode_lib->mp.VStartupMin[k] = s->MaxVStartupLines[k];
11371 } // for k
11372
11373 mode_lib->mp.PrefetchModeSupported = true;
11374 for (k = 0; k < s->num_active_planes; ++k) {
11375 if (mode_lib->mp.NoTimeToPrefetch[k] == true ||
11376 mode_lib->mp.NotEnoughTimeForDynamicMetadata[k] ||
11377 mode_lib->mp.DSTYAfterScaler[k] > 8) {
11378 DML_LOG_VERBOSE("DML::%s: k=%u, NoTimeToPrefetch = %0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]);
11379 DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughTimeForDynamicMetadata=%u\n", __func__, k, mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]);
11380 DML_LOG_VERBOSE("DML::%s: k=%u, DSTYAfterScaler=%u (should be <= 0)\n", __func__, k, mode_lib->mp.DSTYAfterScaler[k]);
11381 mode_lib->mp.PrefetchModeSupported = false;
11382 }
11383 if (mode_lib->mp.dst_y_prefetch[k] < 2)
11384 s->DestinationLineTimesForPrefetchLessThan2 = true;
11385
11386 if (mode_lib->mp.VRatioPrefetchY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ ||
11387 mode_lib->mp.VRatioPrefetchC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) {
11388 s->VRatioPrefetchMoreThanMax = true;
11389 DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPrefetchY=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k], __DML2_CALCS_MAX_VRATIO_PRE__);
11390 DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPrefetchC=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchC[k], __DML2_CALCS_MAX_VRATIO_PRE__);
11391 DML_LOG_VERBOSE("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax);
11392 }
11393
11394 if (mode_lib->mp.NotEnoughUrgentLatencyHiding[k]) {
11395 DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughUrgentLatencyHiding = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHiding[k]);
11396 mode_lib->mp.PrefetchModeSupported = false;
11397 }
11398 }
11399
11400 if (s->VRatioPrefetchMoreThanMax == true || s->DestinationLineTimesForPrefetchLessThan2 == true) {
11401 DML_LOG_VERBOSE("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax);
11402 DML_LOG_VERBOSE("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2);
11403 mode_lib->mp.PrefetchModeSupported = false;
11404 }
11405
11406 DML_LOG_VERBOSE("DML::%s: Prefetch schedule is %sOK at vstartup = %u\n", __func__,
11407 mode_lib->mp.PrefetchModeSupported ? "" : "NOT ", CalculatePrefetchSchedule_params->VStartup);
11408
11409 // Prefetch schedule OK, now check prefetch bw
11410 if (mode_lib->mp.PrefetchModeSupported == true) {
11411 for (k = 0; k < s->num_active_planes; ++k) {
11412 double line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
11413 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
11414 CalculateUrgentBurstFactor(
11415 &display_cfg->plane_descriptors[k],
11416 mode_lib->mp.swath_width_luma_ub[k],
11417 mode_lib->mp.swath_width_chroma_ub[k],
11418 mode_lib->mp.SwathHeightY[k],
11419 mode_lib->mp.SwathHeightC[k],
11420 line_time_us,
11421 mode_lib->mp.UrgentLatency,
11422 mode_lib->mp.VRatioPrefetchY[k],
11423 mode_lib->mp.VRatioPrefetchC[k],
11424 mode_lib->mp.BytePerPixelInDETY[k],
11425 mode_lib->mp.BytePerPixelInDETC[k],
11426 mode_lib->mp.DETBufferSizeY[k],
11427 mode_lib->mp.DETBufferSizeC[k],
11428 /* Output */
11429 &mode_lib->mp.UrgentBurstFactorLumaPre[k],
11430 &mode_lib->mp.UrgentBurstFactorChromaPre[k],
11431 &mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]);
11432
11433 #ifdef __DML_VBA_DEBUG__
11434 DML_LOG_VERBOSE("DML::%s: k=%0u DPPPerSurface=%u\n", __func__, k, mode_lib->mp.NoOfDPP[k]);
11435 DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorLuma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLuma[k]);
11436 DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorChroma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChroma[k]);
11437 DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorLumaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLumaPre[k]);
11438 DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorChromaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChromaPre[k]);
11439
11440 DML_LOG_VERBOSE("DML::%s: k=%0u VRatioPrefetchY=%f\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k]);
11441 DML_LOG_VERBOSE("DML::%s: k=%0u VRatioY=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
11442
11443 DML_LOG_VERBOSE("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, mode_lib->mp.prefetch_vmrow_bw[k]);
11444 DML_LOG_VERBOSE("DML::%s: k=%0u vactive_sw_bw_l=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]);
11445 DML_LOG_VERBOSE("DML::%s: k=%0u vactive_sw_bw_c=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]);
11446 DML_LOG_VERBOSE("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, mode_lib->mp.cursor_bw[k]);
11447 DML_LOG_VERBOSE("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, mode_lib->mp.dpte_row_bw[k]);
11448 DML_LOG_VERBOSE("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, mode_lib->mp.meta_row_bw[k]);
11449 DML_LOG_VERBOSE("DML::%s: k=%0u RequiredPrefetchPixelDataBWLuma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]);
11450 DML_LOG_VERBOSE("DML::%s: k=%0u RequiredPrefetchPixelDataBWChroma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]);
11451 DML_LOG_VERBOSE("DML::%s: k=%0u prefetch_cursor_bw=%f\n", __func__, k, mode_lib->mp.prefetch_cursor_bw[k]);
11452 #endif
11453 }
11454
11455 for (k = 0; k <= s->num_active_planes - 1; k++)
11456 mode_lib->mp.final_flip_bw[k] = 0;
11457
11458 calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = mode_lib->mp.urg_vactive_bandwidth_required;
11459 calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->mp.urg_bandwidth_required;
11460 calculate_peak_bandwidth_params->urg_bandwidth_required_qual = mode_lib->mp.urg_bandwidth_required_qual;
11461 calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->mp.non_urg_bandwidth_required;
11462 calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw;
11463 calculate_peak_bandwidth_params->surface_peak_required_bw = s->surface_dummy_bw0;
11464
11465 calculate_peak_bandwidth_params->display_cfg = display_cfg;
11466 calculate_peak_bandwidth_params->inc_flip_bw = 0;
11467 calculate_peak_bandwidth_params->num_active_planes = s->num_active_planes;
11468 calculate_peak_bandwidth_params->num_of_dpp = mode_lib->mp.NoOfDPP;
11469 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0;
11470 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1;
11471 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0;
11472 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1;
11473 calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor;
11474 calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->mp.mall_prefetch_dram_overhead_factor;
11475
11476 calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.vactive_sw_bw_l;
11477 calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c;
11478 calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma;
11479 calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma;
11480 calculate_peak_bandwidth_params->prefetch_bandwidth_max = s->dummy_single_array[0];
11481 calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l;
11482 calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c;
11483 calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw;
11484 calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->mp.dpte_row_bw;
11485 calculate_peak_bandwidth_params->meta_row_bw = mode_lib->mp.meta_row_bw;
11486 calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->mp.prefetch_cursor_bw;
11487 calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->mp.prefetch_vmrow_bw;
11488 calculate_peak_bandwidth_params->flip_bw = mode_lib->mp.final_flip_bw;
11489 calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->mp.UrgentBurstFactorLuma;
11490 calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->mp.UrgentBurstFactorChroma;
11491 calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->mp.UrgentBurstFactorCursor;
11492 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->mp.UrgentBurstFactorLumaPre;
11493 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->mp.UrgentBurstFactorChromaPre;
11494 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->mp.UrgentBurstFactorCursorPre;
11495
11496 calculate_peak_bandwidth_required(
11497 &mode_lib->scratch,
11498 calculate_peak_bandwidth_params);
11499
11500 // Check urg peak bandwidth against available urg bw
11501 // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active)
11502 check_urgent_bandwidth_support(
11503 &mode_lib->mp.FractionOfUrgentBandwidth, // double* frac_urg_bandwidth
11504 &mode_lib->mp.FractionOfUrgentBandwidthMALL, // double* frac_urg_bandwidth_mall
11505 &s->dummy_boolean[1], // vactive bw ok
11506 &mode_lib->mp.PrefetchModeSupported, // prefetch bw ok
11507
11508 mode_lib->soc.mall_allocated_for_dcn_mbytes,
11509 mode_lib->mp.non_urg_bandwidth_required,
11510 mode_lib->mp.urg_vactive_bandwidth_required,
11511 mode_lib->mp.urg_bandwidth_required,
11512 mode_lib->mp.urg_bandwidth_available);
11513
11514 if (!mode_lib->mp.PrefetchModeSupported)
11515 DML_LOG_VERBOSE("DML::%s: Bandwidth not sufficient for prefetch!\n", __func__);
11516
11517 for (k = 0; k < s->num_active_planes; ++k) {
11518 if (mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]) {
11519 DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughUrgentLatencyHidingPre = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]);
11520 mode_lib->mp.PrefetchModeSupported = false;
11521 }
11522 }
11523 } // prefetch schedule ok
11524
11525 // Prefetch schedule and prefetch bw ok, now check flip bw
11526 if (mode_lib->mp.PrefetchModeSupported == true) { // prefetch schedule and prefetch bw ok, now check flip bw
11527
11528 mode_lib->mp.BandwidthAvailableForImmediateFlip =
11529 get_bandwidth_available_for_immediate_flip(
11530 dml2_core_internal_soc_state_sys_active,
11531 mode_lib->mp.urg_bandwidth_required_qual, // no flip
11532 mode_lib->mp.urg_bandwidth_available);
11533 mode_lib->mp.TotImmediateFlipBytes = 0;
11534 for (k = 0; k < s->num_active_planes; ++k) {
11535 if (display_cfg->plane_descriptors[k].immediate_flip) {
11536 s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes(s->HostVMInefficiencyFactor,
11537 mode_lib->mp.vm_bytes[k],
11538 mode_lib->mp.PixelPTEBytesPerRow[k],
11539 mode_lib->mp.meta_row_bytes[k]);
11540 } else {
11541 s->per_pipe_flip_bytes[k] = 0;
11542 }
11543 mode_lib->mp.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->mp.NoOfDPP[k];
11544 #ifdef __DML_VBA_DEBUG__
11545 DML_LOG_VERBOSE("DML::%s: k = %u\n", __func__, k);
11546 DML_LOG_VERBOSE("DML::%s: DPPPerSurface = %u\n", __func__, mode_lib->mp.NoOfDPP[k]);
11547 DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, mode_lib->mp.vm_bytes[k]);
11548 DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, mode_lib->mp.PixelPTEBytesPerRow[k]);
11549 DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %u\n", __func__, mode_lib->mp.meta_row_bytes[k]);
11550 DML_LOG_VERBOSE("DML::%s: TotImmediateFlipBytes = %u\n", __func__, mode_lib->mp.TotImmediateFlipBytes);
11551 #endif
11552 }
11553 for (k = 0; k < s->num_active_planes; ++k) {
11554 CalculateFlipSchedule(
11555 &mode_lib->scratch,
11556 display_cfg->plane_descriptors[k].immediate_flip,
11557 0, // use_lb_flip_bw
11558 s->HostVMInefficiencyFactor,
11559 s->Tvm_trips_flip[k],
11560 s->Tr0_trips_flip[k],
11561 s->Tvm_trips_flip_rounded[k],
11562 s->Tr0_trips_flip_rounded[k],
11563 display_cfg->gpuvm_enable,
11564 mode_lib->mp.vm_bytes[k],
11565 mode_lib->mp.PixelPTEBytesPerRow[k],
11566 mode_lib->mp.BandwidthAvailableForImmediateFlip,
11567 mode_lib->mp.TotImmediateFlipBytes,
11568 display_cfg->plane_descriptors[k].pixel_format,
11569 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
11570 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
11571 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
11572 mode_lib->mp.Tno_bw[k],
11573 mode_lib->mp.dpte_row_height[k],
11574 mode_lib->mp.dpte_row_height_chroma[k],
11575 mode_lib->mp.use_one_row_for_frame_flip[k],
11576 mode_lib->ip.max_flip_time_us,
11577 mode_lib->ip.max_flip_time_lines,
11578 s->per_pipe_flip_bytes[k],
11579 mode_lib->mp.meta_row_bytes[k],
11580 mode_lib->mp.meta_row_height[k],
11581 mode_lib->mp.meta_row_height_chroma[k],
11582 mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable,
11583
11584 // Output
11585 &mode_lib->mp.dst_y_per_vm_flip[k],
11586 &mode_lib->mp.dst_y_per_row_flip[k],
11587 &mode_lib->mp.final_flip_bw[k],
11588 &mode_lib->mp.ImmediateFlipSupportedForPipe[k]);
11589 }
11590
11591 calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw;
11592 calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->mp.urg_bandwidth_required_flip;
11593 calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw;
11594 calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->mp.non_urg_bandwidth_required_flip;
11595 calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw;
11596 calculate_peak_bandwidth_params->surface_peak_required_bw = s->surface_dummy_bw0;
11597
11598 calculate_peak_bandwidth_params->display_cfg = display_cfg;
11599 calculate_peak_bandwidth_params->inc_flip_bw = 1;
11600 calculate_peak_bandwidth_params->num_active_planes = s->num_active_planes;
11601 calculate_peak_bandwidth_params->num_of_dpp = mode_lib->mp.NoOfDPP;
11602 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0;
11603 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1;
11604 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0;
11605 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1;
11606 calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor;
11607 calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->mp.mall_prefetch_dram_overhead_factor;
11608
11609 calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.vactive_sw_bw_l;
11610 calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c;
11611 calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma;
11612 calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma;
11613 calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l;
11614 calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c;
11615 calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw;
11616 calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->mp.dpte_row_bw;
11617 calculate_peak_bandwidth_params->meta_row_bw = mode_lib->mp.meta_row_bw;
11618 calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->mp.prefetch_cursor_bw;
11619 calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->mp.prefetch_vmrow_bw;
11620 calculate_peak_bandwidth_params->prefetch_bandwidth_max = s->dummy_single_array[0];
11621 calculate_peak_bandwidth_params->flip_bw = mode_lib->mp.final_flip_bw;
11622 calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->mp.UrgentBurstFactorLuma;
11623 calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->mp.UrgentBurstFactorChroma;
11624 calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->mp.UrgentBurstFactorCursor;
11625 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->mp.UrgentBurstFactorLumaPre;
11626 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->mp.UrgentBurstFactorChromaPre;
11627 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->mp.UrgentBurstFactorCursorPre;
11628
11629 calculate_peak_bandwidth_required(
11630 &mode_lib->scratch,
11631 calculate_peak_bandwidth_params);
11632
11633 calculate_immediate_flip_bandwidth_support(
11634 &mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip, // double* frac_urg_bandwidth_flip
11635 &mode_lib->mp.ImmediateFlipSupported, // bool* flip_bandwidth_support_ok
11636
11637 dml2_core_internal_soc_state_sys_active,
11638 mode_lib->mp.urg_bandwidth_required_flip,
11639 mode_lib->mp.non_urg_bandwidth_required_flip,
11640 mode_lib->mp.urg_bandwidth_available);
11641
11642 if (!mode_lib->mp.ImmediateFlipSupported)
11643 DML_LOG_VERBOSE("DML::%s: Bandwidth not sufficient for flip!", __func__);
11644
11645 for (k = 0; k < s->num_active_planes; ++k) {
11646 if (display_cfg->plane_descriptors[k].immediate_flip && mode_lib->mp.ImmediateFlipSupportedForPipe[k] == false) {
11647 mode_lib->mp.ImmediateFlipSupported = false;
11648 #ifdef __DML_VBA_DEBUG__
11649 DML_LOG_VERBOSE("DML::%s: Pipe %0d not supporting iflip!\n", __func__, k);
11650 #endif
11651 }
11652 }
11653 } else { // flip or prefetch not support
11654 mode_lib->mp.ImmediateFlipSupported = false;
11655 }
11656
11657 // consider flip support is okay if the flip bw is ok or (when user does't require a iflip and there is no host vm)
11658 must_support_iflip = display_cfg->hostvm_enable || s->immediate_flip_required;
11659 mode_lib->mp.PrefetchAndImmediateFlipSupported = (mode_lib->mp.PrefetchModeSupported == true && (!must_support_iflip || mode_lib->mp.ImmediateFlipSupported));
11660
11661 #ifdef __DML_VBA_DEBUG__
11662 DML_LOG_VERBOSE("DML::%s: PrefetchModeSupported = %u\n", __func__, mode_lib->mp.PrefetchModeSupported);
11663 for (k = 0; k < s->num_active_planes; ++k)
11664 DML_LOG_VERBOSE("DML::%s: immediate_flip_required[%u] = %u\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip);
11665 DML_LOG_VERBOSE("DML::%s: HostVMEnable = %u\n", __func__, display_cfg->hostvm_enable);
11666 DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupported = %u\n", __func__, mode_lib->mp.ImmediateFlipSupported);
11667 DML_LOG_VERBOSE("DML::%s: PrefetchAndImmediateFlipSupported = %u\n", __func__, mode_lib->mp.PrefetchAndImmediateFlipSupported);
11668 #endif
11669 DML_LOG_VERBOSE("DML::%s: Done one iteration: k=%d, MaxVStartupLines=%u\n", __func__, k, s->MaxVStartupLines[k]);
11670 }
11671
11672 for (k = 0; k < s->num_active_planes; ++k)
11673 DML_LOG_VERBOSE("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
11674
11675 if (!mode_lib->mp.PrefetchAndImmediateFlipSupported) {
11676 DML_LOG_VERBOSE("DML::%s: Bad, Prefetch and flip scheduling solution NOT found!\n", __func__);
11677 } else {
11678 DML_LOG_VERBOSE("DML::%s: Good, Prefetch and flip scheduling solution found\n", __func__);
11679
11680 // DCC Configuration
11681 for (k = 0; k < s->num_active_planes; ++k) {
11682 #ifdef __DML_VBA_DEBUG__
11683 DML_LOG_VERBOSE("DML::%s: Calculate DCC configuration for surface k=%u\n", __func__, k);
11684 #endif
11685 CalculateDCCConfiguration(
11686 display_cfg->plane_descriptors[k].surface.dcc.enable,
11687 display_cfg->overrides.dcc_programming_assumes_scan_direction_unknown,
11688 display_cfg->plane_descriptors[k].pixel_format,
11689 display_cfg->plane_descriptors[k].surface.plane0.width,
11690 display_cfg->plane_descriptors[k].surface.plane1.width,
11691 display_cfg->plane_descriptors[k].surface.plane0.height,
11692 display_cfg->plane_descriptors[k].surface.plane1.height,
11693 s->NomDETInKByte,
11694 mode_lib->mp.Read256BlockHeightY[k],
11695 mode_lib->mp.Read256BlockHeightC[k],
11696 display_cfg->plane_descriptors[k].surface.tiling,
11697 mode_lib->mp.BytePerPixelY[k],
11698 mode_lib->mp.BytePerPixelC[k],
11699 mode_lib->mp.BytePerPixelInDETY[k],
11700 mode_lib->mp.BytePerPixelInDETC[k],
11701 display_cfg->plane_descriptors[k].composition.rotation_angle,
11702
11703 /* Output */
11704 &mode_lib->mp.RequestLuma[k],
11705 &mode_lib->mp.RequestChroma[k],
11706 &mode_lib->mp.DCCYMaxUncompressedBlock[k],
11707 &mode_lib->mp.DCCCMaxUncompressedBlock[k],
11708 &mode_lib->mp.DCCYMaxCompressedBlock[k],
11709 &mode_lib->mp.DCCCMaxCompressedBlock[k],
11710 &mode_lib->mp.DCCYIndependentBlock[k],
11711 &mode_lib->mp.DCCCIndependentBlock[k]);
11712 }
11713
11714 //Watermarks and NB P-State/DRAM Clock Change Support
11715 s->mmSOCParameters.UrgentLatency = mode_lib->mp.UrgentLatency;
11716 s->mmSOCParameters.ExtraLatency = mode_lib->mp.ExtraLatency;
11717 s->mmSOCParameters.ExtraLatency_sr = mode_lib->mp.ExtraLatency_sr;
11718 s->mmSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us;
11719 s->mmSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
11720 s->mmSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us;
11721 s->mmSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us;
11722 s->mmSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us;
11723 s->mmSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us;
11724 s->mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us;
11725 s->mmSOCParameters.USRRetrainingLatency = 0;
11726 s->mmSOCParameters.SMNLatency = 0;
11727 s->mmSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index);
11728 s->mmSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->mp.uclk_freq_mhz, mode_lib->mp.FabricClock, in_out_params->min_clk_index);
11729 s->mmSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->mp.FabricClock;
11730 s->mmSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type;
11731
11732 CalculateWatermarks_params->display_cfg = display_cfg;
11733 CalculateWatermarks_params->USRRetrainingRequired = false;
11734 CalculateWatermarks_params->NumberOfActiveSurfaces = s->num_active_planes;
11735 CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines;
11736 CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits;
11737 CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes;
11738 CalculateWatermarks_params->DCFCLK = mode_lib->mp.Dcfclk;
11739 CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings;
11740 CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change;
11741 CalculateWatermarks_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes;
11742 CalculateWatermarks_params->mmSOCParameters = s->mmSOCParameters;
11743 CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes;
11744 CalculateWatermarks_params->SOCCLK = s->SOCCLK;
11745 CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->mp.DCFCLKDeepSleep;
11746 CalculateWatermarks_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY;
11747 CalculateWatermarks_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC;
11748 CalculateWatermarks_params->SwathHeightY = mode_lib->mp.SwathHeightY;
11749 CalculateWatermarks_params->SwathHeightC = mode_lib->mp.SwathHeightC;
11750 CalculateWatermarks_params->SwathWidthY = mode_lib->mp.SwathWidthY;
11751 CalculateWatermarks_params->SwathWidthC = mode_lib->mp.SwathWidthC;
11752 CalculateWatermarks_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY;
11753 CalculateWatermarks_params->BytePerPixelDETC = mode_lib->mp.BytePerPixelInDETC;
11754 CalculateWatermarks_params->DSTXAfterScaler = mode_lib->mp.DSTXAfterScaler;
11755 CalculateWatermarks_params->DSTYAfterScaler = mode_lib->mp.DSTYAfterScaler;
11756 CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->mp.UnboundedRequestEnabled;
11757 CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->mp.CompressedBufferSizeInkByte;
11758 CalculateWatermarks_params->meta_row_height_l = mode_lib->mp.meta_row_height;
11759 CalculateWatermarks_params->meta_row_height_c = mode_lib->mp.meta_row_height_chroma;
11760 CalculateWatermarks_params->DPPPerSurface = mode_lib->mp.NoOfDPP;
11761
11762 // Output
11763 CalculateWatermarks_params->Watermark = &mode_lib->mp.Watermark;
11764 CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->mp.DRAMClockChangeSupport;
11765 CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->mp.global_dram_clock_change_supported;
11766 CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported;
11767 CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->mp.SubViewportLinesNeededInMALL;
11768 CalculateWatermarks_params->FCLKChangeSupport = mode_lib->mp.FCLKChangeSupport;
11769 CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->mp.global_fclk_change_supported;
11770 CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &mode_lib->mp.MaxActiveFCLKChangeLatencySupported;
11771 CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->mp.USRRetrainingSupport;
11772 CalculateWatermarks_params->g6_temp_read_support = &mode_lib->mp.g6_temp_read_support;
11773 CalculateWatermarks_params->VActiveLatencyHidingMargin = 0;
11774 CalculateWatermarks_params->VActiveLatencyHidingUs = 0;
11775
11776 CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params);
11777
11778 for (k = 0; k < s->num_active_planes; ++k) {
11779 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
11780 mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
11781 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark);
11782 mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
11783 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackFCLKChangeWatermark);
11784 } else {
11785 mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = 0;
11786 mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = 0;
11787 }
11788 }
11789
11790 calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->mp.Watermark, mode_lib->mp.pstate_keepout_dst_lines);
11791
11792 DML_LOG_VERBOSE("DML::%s: DEBUG stream_index = %0d\n", __func__, display_cfg->plane_descriptors[0].stream_index);
11793 DML_LOG_VERBOSE("DML::%s: DEBUG PixelClock = %ld kHz\n", __func__, (display_cfg->stream_descriptors[display_cfg->plane_descriptors[0].stream_index].timing.pixel_clock_khz));
11794
11795 //Display Pipeline Delivery Time in Prefetch, Groups
11796 CalculatePixelDeliveryTimes(
11797 display_cfg,
11798 cfg_support_info,
11799 s->num_active_planes,
11800 mode_lib->mp.VRatioPrefetchY,
11801 mode_lib->mp.VRatioPrefetchC,
11802 mode_lib->mp.swath_width_luma_ub,
11803 mode_lib->mp.swath_width_chroma_ub,
11804 mode_lib->mp.PSCL_THROUGHPUT,
11805 mode_lib->mp.PSCL_THROUGHPUT_CHROMA,
11806 mode_lib->mp.Dppclk,
11807 mode_lib->mp.BytePerPixelC,
11808 mode_lib->mp.req_per_swath_ub_l,
11809 mode_lib->mp.req_per_swath_ub_c,
11810
11811 /* Output */
11812 mode_lib->mp.DisplayPipeLineDeliveryTimeLuma,
11813 mode_lib->mp.DisplayPipeLineDeliveryTimeChroma,
11814 mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch,
11815 mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch,
11816 mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma,
11817 mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma,
11818 mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch,
11819 mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch);
11820
11821 CalculateMetaAndPTETimes_params->scratch = &mode_lib->scratch;
11822 CalculateMetaAndPTETimes_params->display_cfg = display_cfg;
11823 CalculateMetaAndPTETimes_params->NumberOfActiveSurfaces = s->num_active_planes;
11824 CalculateMetaAndPTETimes_params->use_one_row_for_frame = mode_lib->mp.use_one_row_for_frame;
11825 CalculateMetaAndPTETimes_params->dst_y_per_row_vblank = mode_lib->mp.dst_y_per_row_vblank;
11826 CalculateMetaAndPTETimes_params->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip;
11827 CalculateMetaAndPTETimes_params->BytePerPixelY = mode_lib->mp.BytePerPixelY;
11828 CalculateMetaAndPTETimes_params->BytePerPixelC = mode_lib->mp.BytePerPixelC;
11829 CalculateMetaAndPTETimes_params->dpte_row_height = mode_lib->mp.dpte_row_height;
11830 CalculateMetaAndPTETimes_params->dpte_row_height_chroma = mode_lib->mp.dpte_row_height_chroma;
11831 CalculateMetaAndPTETimes_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes;
11832 CalculateMetaAndPTETimes_params->PTERequestSizeY = mode_lib->mp.PTERequestSizeY;
11833 CalculateMetaAndPTETimes_params->PTERequestSizeC = mode_lib->mp.PTERequestSizeC;
11834 CalculateMetaAndPTETimes_params->PixelPTEReqWidthY = mode_lib->mp.PixelPTEReqWidthY;
11835 CalculateMetaAndPTETimes_params->PixelPTEReqHeightY = mode_lib->mp.PixelPTEReqHeightY;
11836 CalculateMetaAndPTETimes_params->PixelPTEReqWidthC = mode_lib->mp.PixelPTEReqWidthC;
11837 CalculateMetaAndPTETimes_params->PixelPTEReqHeightC = mode_lib->mp.PixelPTEReqHeightC;
11838 CalculateMetaAndPTETimes_params->dpte_row_width_luma_ub = mode_lib->mp.dpte_row_width_luma_ub;
11839 CalculateMetaAndPTETimes_params->dpte_row_width_chroma_ub = mode_lib->mp.dpte_row_width_chroma_ub;
11840 CalculateMetaAndPTETimes_params->tdlut_groups_per_2row_ub = s->tdlut_groups_per_2row_ub;
11841 CalculateMetaAndPTETimes_params->mrq_present = mode_lib->ip.dcn_mrq_present;
11842
11843 CalculateMetaAndPTETimes_params->MetaChunkSize = mode_lib->ip.meta_chunk_size_kbytes;
11844 CalculateMetaAndPTETimes_params->MinMetaChunkSizeBytes = mode_lib->ip.min_meta_chunk_size_bytes;
11845 CalculateMetaAndPTETimes_params->meta_row_width = mode_lib->mp.meta_row_width;
11846 CalculateMetaAndPTETimes_params->meta_row_width_chroma = mode_lib->mp.meta_row_width_chroma;
11847 CalculateMetaAndPTETimes_params->meta_row_height = mode_lib->mp.meta_row_height;
11848 CalculateMetaAndPTETimes_params->meta_row_height_chroma = mode_lib->mp.meta_row_height_chroma;
11849 CalculateMetaAndPTETimes_params->meta_req_width = mode_lib->mp.meta_req_width;
11850 CalculateMetaAndPTETimes_params->meta_req_width_chroma = mode_lib->mp.meta_req_width_chroma;
11851 CalculateMetaAndPTETimes_params->meta_req_height = mode_lib->mp.meta_req_height;
11852 CalculateMetaAndPTETimes_params->meta_req_height_chroma = mode_lib->mp.meta_req_height_chroma;
11853
11854 CalculateMetaAndPTETimes_params->time_per_tdlut_group = mode_lib->mp.time_per_tdlut_group;
11855 CalculateMetaAndPTETimes_params->DST_Y_PER_PTE_ROW_NOM_L = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L;
11856 CalculateMetaAndPTETimes_params->DST_Y_PER_PTE_ROW_NOM_C = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C;
11857 CalculateMetaAndPTETimes_params->time_per_pte_group_nom_luma = mode_lib->mp.time_per_pte_group_nom_luma;
11858 CalculateMetaAndPTETimes_params->time_per_pte_group_vblank_luma = mode_lib->mp.time_per_pte_group_vblank_luma;
11859 CalculateMetaAndPTETimes_params->time_per_pte_group_flip_luma = mode_lib->mp.time_per_pte_group_flip_luma;
11860 CalculateMetaAndPTETimes_params->time_per_pte_group_nom_chroma = mode_lib->mp.time_per_pte_group_nom_chroma;
11861 CalculateMetaAndPTETimes_params->time_per_pte_group_vblank_chroma = mode_lib->mp.time_per_pte_group_vblank_chroma;
11862 CalculateMetaAndPTETimes_params->time_per_pte_group_flip_chroma = mode_lib->mp.time_per_pte_group_flip_chroma;
11863 CalculateMetaAndPTETimes_params->DST_Y_PER_META_ROW_NOM_L = mode_lib->mp.DST_Y_PER_META_ROW_NOM_L;
11864 CalculateMetaAndPTETimes_params->DST_Y_PER_META_ROW_NOM_C = mode_lib->mp.DST_Y_PER_META_ROW_NOM_C;
11865 CalculateMetaAndPTETimes_params->TimePerMetaChunkNominal = mode_lib->mp.TimePerMetaChunkNominal;
11866 CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkNominal = mode_lib->mp.TimePerChromaMetaChunkNominal;
11867 CalculateMetaAndPTETimes_params->TimePerMetaChunkVBlank = mode_lib->mp.TimePerMetaChunkVBlank;
11868 CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkVBlank = mode_lib->mp.TimePerChromaMetaChunkVBlank;
11869 CalculateMetaAndPTETimes_params->TimePerMetaChunkFlip = mode_lib->mp.TimePerMetaChunkFlip;
11870 CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkFlip = mode_lib->mp.TimePerChromaMetaChunkFlip;
11871
11872 CalculateMetaAndPTETimes(CalculateMetaAndPTETimes_params);
11873
11874 CalculateVMGroupAndRequestTimes(
11875 display_cfg,
11876 s->num_active_planes,
11877 mode_lib->mp.BytePerPixelC,
11878 mode_lib->mp.dst_y_per_vm_vblank,
11879 mode_lib->mp.dst_y_per_vm_flip,
11880 mode_lib->mp.dpte_row_width_luma_ub,
11881 mode_lib->mp.dpte_row_width_chroma_ub,
11882 mode_lib->mp.vm_group_bytes,
11883 mode_lib->mp.dpde0_bytes_per_frame_ub_l,
11884 mode_lib->mp.dpde0_bytes_per_frame_ub_c,
11885 s->tdlut_pte_bytes_per_frame,
11886 mode_lib->mp.meta_pte_bytes_per_frame_ub_l,
11887 mode_lib->mp.meta_pte_bytes_per_frame_ub_c,
11888 mode_lib->ip.dcn_mrq_present,
11889
11890 /* Output */
11891 mode_lib->mp.TimePerVMGroupVBlank,
11892 mode_lib->mp.TimePerVMGroupFlip,
11893 mode_lib->mp.TimePerVMRequestVBlank,
11894 mode_lib->mp.TimePerVMRequestFlip);
11895
11896 // VStartup Adjustment
11897 for (k = 0; k < s->num_active_planes; ++k) {
11898 bool isInterlaceTiming;
11899
11900 mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TWait[k] + mode_lib->mp.ExtraLatency;
11901 if (!display_cfg->plane_descriptors[k].dynamic_meta_data.enable)
11902 mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TCalc + mode_lib->mp.MinTTUVBlank[k];
11903
11904 #ifdef __DML_VBA_DEBUG__
11905 DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]);
11906 #endif
11907 s->Tvstartup_margin = (s->MaxVStartupLines[k] - mode_lib->mp.VStartupMin[k]) * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
11908 mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.MinTTUVBlank[k] + s->Tvstartup_margin;
11909
11910 #ifdef __DML_VBA_DEBUG__
11911 DML_LOG_VERBOSE("DML::%s: k=%u, Tvstartup_margin = %f\n", __func__, k, s->Tvstartup_margin);
11912 DML_LOG_VERBOSE("DML::%s: k=%u, MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
11913 DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlank = %f\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]);
11914 #endif
11915
11916 mode_lib->mp.Tdmdl[k] = mode_lib->mp.Tdmdl[k] + s->Tvstartup_margin;
11917 if (display_cfg->plane_descriptors[k].dynamic_meta_data.enable && mode_lib->ip.dynamic_metadata_vm_enabled) {
11918 mode_lib->mp.Tdmdl_vm[k] = mode_lib->mp.Tdmdl_vm[k] + s->Tvstartup_margin;
11919 }
11920
11921 isInterlaceTiming = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !mode_lib->ip.ptoi_supported);
11922
11923 // The actual positioning of the vstartup
11924 mode_lib->mp.VStartup[k] = (isInterlaceTiming ? (2 * s->MaxVStartupLines[k]) : s->MaxVStartupLines[k]);
11925
11926 s->dlg_vblank_start = ((isInterlaceTiming ? math_floor2((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch) / 2.0, 1.0) :
11927 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total) - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch);
11928 s->LSetup = math_floor2(4.0 * mode_lib->mp.TSetup[k] / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)), 1.0) / 4.0;
11929 s->blank_lines_remaining = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active) - mode_lib->mp.VStartup[k];
11930
11931 if (s->blank_lines_remaining < 0) {
11932 DML_LOG_VERBOSE("ERROR: Vstartup is larger than vblank!?\n");
11933 s->blank_lines_remaining = 0;
11934 DML_ASSERT(0);
11935 }
11936 mode_lib->mp.MIN_DST_Y_NEXT_START[k] = s->dlg_vblank_start + s->blank_lines_remaining + s->LSetup;
11937
11938 // debug only
11939 if (((mode_lib->mp.VUpdateOffsetPix[k] + mode_lib->mp.VUpdateWidthPix[k] + mode_lib->mp.VReadyOffsetPix[k]) / (double) display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) <=
11940 (isInterlaceTiming ?
11941 math_floor2((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch - mode_lib->mp.VStartup[k]) / 2.0, 1.0) :
11942 (int)(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch - mode_lib->mp.VStartup[k]))) {
11943 mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = true;
11944 } else {
11945 mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = false;
11946 }
11947 #ifdef __DML_VBA_DEBUG__
11948 DML_LOG_VERBOSE("DML::%s: k=%u, VStartup = %u (max)\n", __func__, k, mode_lib->mp.VStartup[k]);
11949 DML_LOG_VERBOSE("DML::%s: k=%u, VStartupMin = %u (max)\n", __func__, k, mode_lib->mp.VStartupMin[k]);
11950 DML_LOG_VERBOSE("DML::%s: k=%u, VUpdateOffsetPix = %u\n", __func__, k, mode_lib->mp.VUpdateOffsetPix[k]);
11951 DML_LOG_VERBOSE("DML::%s: k=%u, VUpdateWidthPix = %u\n", __func__, k, mode_lib->mp.VUpdateWidthPix[k]);
11952 DML_LOG_VERBOSE("DML::%s: k=%u, VReadyOffsetPix = %u\n", __func__, k, mode_lib->mp.VReadyOffsetPix[k]);
11953 DML_LOG_VERBOSE("DML::%s: k=%u, HTotal = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total);
11954 DML_LOG_VERBOSE("DML::%s: k=%u, VTotal = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total);
11955 DML_LOG_VERBOSE("DML::%s: k=%u, VActive = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active);
11956 DML_LOG_VERBOSE("DML::%s: k=%u, VFrontPorch = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch);
11957 DML_LOG_VERBOSE("DML::%s: k=%u, TSetup = %f\n", __func__, k, mode_lib->mp.TSetup[k]);
11958 DML_LOG_VERBOSE("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f\n", __func__, k, mode_lib->mp.MIN_DST_Y_NEXT_START[k]);
11959 DML_LOG_VERBOSE("DML::%s: k=%u, VREADY_AT_OR_AFTER_VSYNC = %u\n", __func__, k, mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k]);
11960 #endif
11961 }
11962
11963 //Maximum Bandwidth Used
11964 mode_lib->mp.TotalWRBandwidth = 0;
11965 for (k = 0; k < display_cfg->num_streams; ++k) {
11966 s->WRBandwidth = 0;
11967 if (display_cfg->stream_descriptors[k].writeback.active_writebacks_per_stream > 0) {
11968 s->WRBandwidth = display_cfg->stream_descriptors[k].writeback.writeback_stream[0].output_height
11969 * display_cfg->stream_descriptors[k].writeback.writeback_stream[0].output_width /
11970 (display_cfg->stream_descriptors[k].timing.h_total * display_cfg->stream_descriptors[k].writeback.writeback_stream[0].input_height
11971 / ((double)display_cfg->stream_descriptors[k].timing.pixel_clock_khz / 1000))
11972 * (display_cfg->stream_descriptors[k].writeback.writeback_stream[0].pixel_format == dml2_444_32 ? 4.0 : 8.0);
11973 mode_lib->mp.TotalWRBandwidth = mode_lib->mp.TotalWRBandwidth + s->WRBandwidth;
11974 }
11975 }
11976
11977 mode_lib->mp.TotalDataReadBandwidth = 0;
11978 for (k = 0; k < s->num_active_planes; ++k) {
11979 mode_lib->mp.TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth + mode_lib->mp.vactive_sw_bw_l[k] + mode_lib->mp.vactive_sw_bw_c[k];
11980 #ifdef __DML_VBA_DEBUG__
11981 DML_LOG_VERBOSE("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, mode_lib->mp.TotalDataReadBandwidth);
11982 DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]);
11983 DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]);
11984 #endif
11985 }
11986
11987 CalculateStutterEfficiency_params->display_cfg = display_cfg;
11988 CalculateStutterEfficiency_params->CompressedBufferSizeInkByte = mode_lib->mp.CompressedBufferSizeInkByte;
11989 CalculateStutterEfficiency_params->UnboundedRequestEnabled = mode_lib->mp.UnboundedRequestEnabled;
11990 CalculateStutterEfficiency_params->MetaFIFOSizeInKEntries = mode_lib->ip.meta_fifo_size_in_kentries;
11991 CalculateStutterEfficiency_params->ZeroSizeBufferEntries = mode_lib->ip.zero_size_buffer_entries;
11992 CalculateStutterEfficiency_params->PixelChunkSizeInKByte = mode_lib->ip.pixel_chunk_size_kbytes;
11993 CalculateStutterEfficiency_params->NumberOfActiveSurfaces = s->num_active_planes;
11994 CalculateStutterEfficiency_params->ROBBufferSizeInKByte = mode_lib->ip.rob_buffer_size_kbytes;
11995 CalculateStutterEfficiency_params->TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth;
11996 CalculateStutterEfficiency_params->DCFCLK = mode_lib->mp.Dcfclk;
11997 CalculateStutterEfficiency_params->ReturnBW = mode_lib->mp.urg_bandwidth_available_min[dml2_core_internal_soc_state_sys_active];
11998 CalculateStutterEfficiency_params->CompbufReservedSpace64B = mode_lib->mp.compbuf_reserved_space_64b;
11999 CalculateStutterEfficiency_params->CompbufReservedSpaceZs = mode_lib->ip.compbuf_reserved_space_zs;
12000 CalculateStutterEfficiency_params->SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us;
12001 CalculateStutterEfficiency_params->SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us;
12002 CalculateStutterEfficiency_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings;
12003 CalculateStutterEfficiency_params->StutterEnterPlusExitWatermark = mode_lib->mp.Watermark.StutterEnterPlusExitWatermark;
12004 CalculateStutterEfficiency_params->Z8StutterEnterPlusExitWatermark = mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark;
12005 CalculateStutterEfficiency_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
12006 CalculateStutterEfficiency_params->MinTTUVBlank = mode_lib->mp.MinTTUVBlank;
12007 CalculateStutterEfficiency_params->DPPPerSurface = mode_lib->mp.NoOfDPP;
12008 CalculateStutterEfficiency_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY;
12009 CalculateStutterEfficiency_params->BytePerPixelY = mode_lib->mp.BytePerPixelY;
12010 CalculateStutterEfficiency_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY;
12011 CalculateStutterEfficiency_params->SwathWidthY = mode_lib->mp.SwathWidthY;
12012 CalculateStutterEfficiency_params->SwathHeightY = mode_lib->mp.SwathHeightY;
12013 CalculateStutterEfficiency_params->SwathHeightC = mode_lib->mp.SwathHeightC;
12014 CalculateStutterEfficiency_params->BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY;
12015 CalculateStutterEfficiency_params->BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY;
12016 CalculateStutterEfficiency_params->BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC;
12017 CalculateStutterEfficiency_params->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC;
12018 CalculateStutterEfficiency_params->DCCYMaxUncompressedBlock = mode_lib->mp.DCCYMaxUncompressedBlock;
12019 CalculateStutterEfficiency_params->DCCCMaxUncompressedBlock = mode_lib->mp.DCCCMaxUncompressedBlock;
12020 CalculateStutterEfficiency_params->ReadBandwidthSurfaceLuma = mode_lib->mp.vactive_sw_bw_l;
12021 CalculateStutterEfficiency_params->ReadBandwidthSurfaceChroma = mode_lib->mp.vactive_sw_bw_c;
12022 CalculateStutterEfficiency_params->dpte_row_bw = mode_lib->mp.dpte_row_bw;
12023 CalculateStutterEfficiency_params->meta_row_bw = mode_lib->mp.meta_row_bw;
12024 CalculateStutterEfficiency_params->rob_alloc_compressed = mode_lib->ip.dcn_mrq_present;
12025
12026 // output
12027 CalculateStutterEfficiency_params->StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.StutterEfficiencyNotIncludingVBlank;
12028 CalculateStutterEfficiency_params->StutterEfficiency = &mode_lib->mp.StutterEfficiency;
12029 CalculateStutterEfficiency_params->NumberOfStutterBurstsPerFrame = &mode_lib->mp.NumberOfStutterBurstsPerFrame;
12030 CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank;
12031 CalculateStutterEfficiency_params->Z8StutterEfficiency = &mode_lib->mp.Z8StutterEfficiency;
12032 CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &mode_lib->mp.Z8NumberOfStutterBurstsPerFrame;
12033 CalculateStutterEfficiency_params->StutterPeriod = &mode_lib->mp.StutterPeriod;
12034 CalculateStutterEfficiency_params->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = &mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE;
12035
12036 // Stutter Efficiency
12037 CalculateStutterEfficiency(&mode_lib->scratch, CalculateStutterEfficiency_params);
12038
12039 #ifdef __DML_VBA_ALLOW_DELTA__
12040 // Calculate z8 stutter eff assuming 0 reserved space
12041 CalculateStutterEfficiency_params->CompbufReservedSpace64B = 0;
12042 CalculateStutterEfficiency_params->CompbufReservedSpaceZs = 0;
12043
12044 CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlankBestCase;
12045 CalculateStutterEfficiency_params->Z8StutterEfficiency = &mode_lib->mp.Z8StutterEfficiencyBestCase;
12046 CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase;
12047 CalculateStutterEfficiency_params->StutterPeriod = &mode_lib->mp.StutterPeriodBestCase;
12048
12049 // Stutter Efficiency
12050 CalculateStutterEfficiency(&mode_lib->scratch, CalculateStutterEfficiency_params);
12051 #else
12052 mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlankBestCase = mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank;
12053 mode_lib->mp.Z8StutterEfficiencyBestCase = mode_lib->mp.Z8StutterEfficiency;
12054 mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase = mode_lib->mp.Z8NumberOfStutterBurstsPerFrame;
12055 mode_lib->mp.StutterPeriodBestCase = mode_lib->mp.StutterPeriod;
12056 #endif
12057 } // PrefetchAndImmediateFlipSupported
12058
12059 max_uclk_mhz = mode_lib->soc.clk_table.uclk.clk_values_khz[mode_lib->soc.clk_table.uclk.num_clk_values - 1] / 1000.0;
12060 min_return_latency_in_DCFCLK_cycles = (min_return_uclk_cycles / max_uclk_mhz + min_return_fclk_cycles / max_fclk_mhz) * hard_minimum_dcfclk_mhz;
12061 mode_lib->mp.min_return_latency_in_dcfclk = (unsigned int)min_return_latency_in_DCFCLK_cycles;
12062 mode_lib->mp.dcfclk_deep_sleep_hysteresis = (unsigned int)math_max2(32, (double)mode_lib->ip.pixel_chunk_size_kbytes * 1024 * 3 / 4 / 64 - min_return_latency_in_DCFCLK_cycles);
12063 DML_ASSERT(mode_lib->mp.dcfclk_deep_sleep_hysteresis < 256);
12064
12065 #ifdef __DML_VBA_DEBUG__
12066 DML_LOG_VERBOSE("DML::%s: max_fclk_mhz = %f\n", __func__, max_fclk_mhz);
12067 DML_LOG_VERBOSE("DML::%s: max_uclk_mhz = %f\n", __func__, max_uclk_mhz);
12068 DML_LOG_VERBOSE("DML::%s: hard_minimum_dcfclk_mhz = %f\n", __func__, hard_minimum_dcfclk_mhz);
12069 DML_LOG_VERBOSE("DML::%s: min_return_uclk_cycles = %ld\n", __func__, min_return_uclk_cycles);
12070 DML_LOG_VERBOSE("DML::%s: min_return_fclk_cycles = %ld\n", __func__, min_return_fclk_cycles);
12071 DML_LOG_VERBOSE("DML::%s: min_return_latency_in_DCFCLK_cycles = %f\n", __func__, min_return_latency_in_DCFCLK_cycles);
12072 DML_LOG_VERBOSE("DML::%s: dcfclk_deep_sleep_hysteresis = %d \n", __func__, mode_lib->mp.dcfclk_deep_sleep_hysteresis);
12073 DML_LOG_VERBOSE("DML::%s: --- END --- \n", __func__);
12074 #endif
12075 return (in_out_params->mode_lib->mp.PrefetchAndImmediateFlipSupported);
12076 }
12077
dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex * in_out_params)12078 bool dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex *in_out_params)
12079 {
12080 DML_LOG_VERBOSE("DML::%s: ------------- START ----------\n", __func__);
12081 bool result = dml_core_mode_programming(in_out_params);
12082
12083 DML_LOG_VERBOSE("DML::%s: result = %0d\n", __func__, result);
12084 DML_LOG_VERBOSE("DML::%s: ------------- DONE ----------\n", __func__);
12085 return result;
12086 }
12087
dml2_core_calcs_get_dpte_row_height(unsigned int * dpte_row_height,struct dml2_core_internal_display_mode_lib * mode_lib,bool is_plane1,enum dml2_source_format_class SourcePixelFormat,enum dml2_swizzle_mode SurfaceTiling,enum dml2_rotation_angle ScanDirection,unsigned int pitch,unsigned int GPUVMMinPageSizeKBytes)12088 void dml2_core_calcs_get_dpte_row_height(
12089 unsigned int *dpte_row_height,
12090 struct dml2_core_internal_display_mode_lib *mode_lib,
12091 bool is_plane1,
12092 enum dml2_source_format_class SourcePixelFormat,
12093 enum dml2_swizzle_mode SurfaceTiling,
12094 enum dml2_rotation_angle ScanDirection,
12095 unsigned int pitch,
12096 unsigned int GPUVMMinPageSizeKBytes)
12097 {
12098 unsigned int BytePerPixelY;
12099 unsigned int BytePerPixelC;
12100 double BytePerPixelInDETY;
12101 double BytePerPixelInDETC;
12102 unsigned int BlockHeight256BytesY;
12103 unsigned int BlockHeight256BytesC;
12104 unsigned int BlockWidth256BytesY;
12105 unsigned int BlockWidth256BytesC;
12106 unsigned int MacroTileWidthY;
12107 unsigned int MacroTileWidthC;
12108 unsigned int MacroTileHeightY;
12109 unsigned int MacroTileHeightC;
12110 bool surf_linear_128_l = false;
12111 bool surf_linear_128_c = false;
12112
12113 CalculateBytePerPixelAndBlockSizes(
12114 SourcePixelFormat,
12115 SurfaceTiling,
12116 pitch,
12117 pitch,
12118
12119 /* Output */
12120 &BytePerPixelY,
12121 &BytePerPixelC,
12122 &BytePerPixelInDETY,
12123 &BytePerPixelInDETC,
12124 &BlockHeight256BytesY,
12125 &BlockHeight256BytesC,
12126 &BlockWidth256BytesY,
12127 &BlockWidth256BytesC,
12128 &MacroTileHeightY,
12129 &MacroTileHeightC,
12130 &MacroTileWidthY,
12131 &MacroTileWidthC,
12132 &surf_linear_128_l,
12133 &surf_linear_128_c);
12134
12135 unsigned int BytePerPixel = is_plane1 ? BytePerPixelC : BytePerPixelY;
12136 unsigned int BlockHeight256Bytes = is_plane1 ? BlockHeight256BytesC : BlockHeight256BytesY;
12137 unsigned int BlockWidth256Bytes = is_plane1 ? BlockWidth256BytesC : BlockWidth256BytesY;
12138 unsigned int MacroTileWidth = is_plane1 ? MacroTileWidthC : MacroTileWidthY;
12139 unsigned int MacroTileHeight = is_plane1 ? MacroTileHeightC : MacroTileHeightY;
12140 unsigned int PTEBufferSizeInRequests = is_plane1 ? mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma : mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
12141 #ifdef __DML_VBA_DEBUG__
12142 DML_LOG_VERBOSE("DML: %s: is_plane1 = %u\n", __func__, is_plane1);
12143 DML_LOG_VERBOSE("DML: %s: BytePerPixel = %u\n", __func__, BytePerPixel);
12144 DML_LOG_VERBOSE("DML: %s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes);
12145 DML_LOG_VERBOSE("DML: %s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes);
12146 DML_LOG_VERBOSE("DML: %s: MacroTileWidth = %u\n", __func__, MacroTileWidth);
12147 DML_LOG_VERBOSE("DML: %s: MacroTileHeight = %u\n", __func__, MacroTileHeight);
12148 DML_LOG_VERBOSE("DML: %s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests);
12149 DML_LOG_VERBOSE("DML: %s: dpte_buffer_size_in_pte_reqs_luma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma);
12150 DML_LOG_VERBOSE("DML: %s: dpte_buffer_size_in_pte_reqs_chroma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma);
12151 DML_LOG_VERBOSE("DML: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes);
12152 #endif
12153 unsigned int dummy_integer[21];
12154
12155 mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportStationary = 0;
12156 mode_lib->scratch.calculate_vm_and_row_bytes_params.DCCEnable = 0;
12157 mode_lib->scratch.calculate_vm_and_row_bytes_params.NumberOfDPPs = 1;
12158 mode_lib->scratch.calculate_vm_and_row_bytes_params.BlockHeight256Bytes = BlockHeight256Bytes;
12159 mode_lib->scratch.calculate_vm_and_row_bytes_params.BlockWidth256Bytes = BlockWidth256Bytes;
12160 mode_lib->scratch.calculate_vm_and_row_bytes_params.SourcePixelFormat = SourcePixelFormat;
12161 mode_lib->scratch.calculate_vm_and_row_bytes_params.SurfaceTiling = SurfaceTiling;
12162 mode_lib->scratch.calculate_vm_and_row_bytes_params.BytePerPixel = BytePerPixel;
12163 mode_lib->scratch.calculate_vm_and_row_bytes_params.RotationAngle = ScanDirection;
12164 mode_lib->scratch.calculate_vm_and_row_bytes_params.SwathWidth = 0;
12165 mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportHeight = 0;
12166 mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportXStart = 0;
12167 mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportYStart = 0;
12168 mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMEnable = 1;
12169 mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = 4;
12170 mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = GPUVMMinPageSizeKBytes;
12171 mode_lib->scratch.calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = PTEBufferSizeInRequests;
12172 mode_lib->scratch.calculate_vm_and_row_bytes_params.Pitch = pitch;
12173 mode_lib->scratch.calculate_vm_and_row_bytes_params.MacroTileWidth = MacroTileWidth;
12174 mode_lib->scratch.calculate_vm_and_row_bytes_params.MacroTileHeight = MacroTileHeight;
12175 mode_lib->scratch.calculate_vm_and_row_bytes_params.is_phantom = 0;
12176 mode_lib->scratch.calculate_vm_and_row_bytes_params.DCCMetaPitch = 0;
12177 mode_lib->scratch.calculate_vm_and_row_bytes_params.mrq_present = 0;
12178
12179 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &dummy_integer[1];
12180 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &dummy_integer[2];
12181 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_width_ub = &dummy_integer[3];
12182 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height = dpte_row_height;
12183 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height_linear = &dummy_integer[4];
12184 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &dummy_integer[5];
12185 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &dummy_integer[6];
12186 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &dummy_integer[7];
12187 mode_lib->scratch.calculate_vm_and_row_bytes_params.vmpg_width = &dummy_integer[8];
12188 mode_lib->scratch.calculate_vm_and_row_bytes_params.vmpg_height = &dummy_integer[9];
12189 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &dummy_integer[11];
12190 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &dummy_integer[12];
12191 mode_lib->scratch.calculate_vm_and_row_bytes_params.PTERequestSize = &dummy_integer[13];
12192 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &dummy_integer[14];
12193
12194 mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_bytes = &dummy_integer[15];
12195 mode_lib->scratch.calculate_vm_and_row_bytes_params.MetaRequestWidth = &dummy_integer[16];
12196 mode_lib->scratch.calculate_vm_and_row_bytes_params.MetaRequestHeight = &dummy_integer[17];
12197 mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_width = &dummy_integer[18];
12198 mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_height = &dummy_integer[19];
12199 mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &dummy_integer[20];
12200
12201 // just supply with enough parameters to calculate dpte
12202 CalculateVMAndRowBytes(&mode_lib->scratch.calculate_vm_and_row_bytes_params);
12203
12204 #ifdef __DML_VBA_DEBUG__
12205 DML_LOG_VERBOSE("DML: %s: dpte_row_height = %u\n", __func__, *dpte_row_height);
12206 #endif
12207 }
12208
is_dual_plane(enum dml2_source_format_class source_format)12209 static bool is_dual_plane(enum dml2_source_format_class source_format)
12210 {
12211 bool ret_val = false;
12212
12213 if ((source_format == dml2_420_12) || (source_format == dml2_420_8) || (source_format == dml2_420_10) || (source_format == dml2_rgbe_alpha))
12214 ret_val = true;
12215
12216 return ret_val;
12217 }
12218
dml_get_plane_idx(const struct dml2_core_internal_display_mode_lib * mode_lib,unsigned int pipe_idx)12219 static unsigned int dml_get_plane_idx(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx)
12220 {
12221 unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx];
12222 return plane_idx;
12223 }
12224
rq_dlg_get_wm_regs(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_dchub_watermark_regs * wm_regs)12225 static void rq_dlg_get_wm_regs(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *wm_regs)
12226 {
12227 double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz;
12228
12229 wm_regs->fclk_pstate = (unsigned int)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz);
12230 wm_regs->sr_enter = (unsigned int)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz);
12231 wm_regs->sr_exit = (unsigned int)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz);
12232 wm_regs->sr_enter_z8 = (unsigned int)(mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark * refclk_freq_in_mhz);
12233 wm_regs->sr_exit_z8 = (unsigned int)(mode_lib->mp.Watermark.Z8StutterExitWatermark * refclk_freq_in_mhz);
12234 wm_regs->temp_read_or_ppt = (unsigned int)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz);
12235 wm_regs->uclk_pstate = (unsigned int)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz);
12236 wm_regs->urgent = (unsigned int)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz);
12237 wm_regs->usr = (unsigned int)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz);
12238 wm_regs->refcyc_per_trip_to_mem = (unsigned int)(mode_lib->mp.UrgentLatency * refclk_freq_in_mhz);
12239 wm_regs->refcyc_per_meta_trip_to_mem = (unsigned int)(mode_lib->mp.MetaTripToMemory * refclk_freq_in_mhz);
12240 wm_regs->frac_urg_bw_flip = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip * 1000);
12241 wm_regs->frac_urg_bw_nom = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidth * 1000);
12242 wm_regs->frac_urg_bw_mall = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthMALL * 1000);
12243 }
12244
log_and_substract_if_non_zero(unsigned int a,unsigned int subtrahend)12245 static unsigned int log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend)
12246 {
12247 if (a == 0)
12248 return 0;
12249
12250 return (math_log2_approx(a) - subtrahend);
12251 }
12252
dml2_core_calcs_cursor_dlg_reg(struct dml2_cursor_dlg_regs * cursor_dlg_regs,const struct dml2_get_cursor_dlg_reg * p)12253 void dml2_core_calcs_cursor_dlg_reg(struct dml2_cursor_dlg_regs *cursor_dlg_regs, const struct dml2_get_cursor_dlg_reg *p)
12254 {
12255 int dst_x_offset = (int) ((p->cursor_x_position + (p->cursor_stereo_en == 0 ? 0 : math_max2(p->cursor_primary_offset, p->cursor_secondary_offset)) -
12256 (p->cursor_hotspot_x * (p->cursor_2x_magnify == 0 ? 1 : 2))) * p->dlg_refclk_mhz / p->pixel_rate_mhz / p->hratio);
12257 cursor_dlg_regs->dst_x_offset = (unsigned int) ((dst_x_offset > 0) ? dst_x_offset : 0);
12258
12259 #ifdef __DML_VBA_DEBUG__
12260 DML_LOG_VERBOSE("DML_DLG::%s: cursor_x_position=%d\n", __func__, p->cursor_x_position);
12261 DML_LOG_VERBOSE("DML_DLG::%s: dlg_refclk_mhz=%f\n", __func__, p->dlg_refclk_mhz);
12262 DML_LOG_VERBOSE("DML_DLG::%s: pixel_rate_mhz=%f\n", __func__, p->pixel_rate_mhz);
12263 DML_LOG_VERBOSE("DML_DLG::%s: dst_x_offset=%d\n", __func__, dst_x_offset);
12264 DML_LOG_VERBOSE("DML_DLG::%s: dst_x_offset=%d (reg)\n", __func__, cursor_dlg_regs->dst_x_offset);
12265 #endif
12266
12267 cursor_dlg_regs->chunk_hdl_adjust = 3;
12268 cursor_dlg_regs->dst_y_offset = 0;
12269
12270 cursor_dlg_regs->qos_level_fixed = 8;
12271 cursor_dlg_regs->qos_ramp_disable = 0;
12272 }
12273
rq_dlg_get_rq_reg(struct dml2_display_rq_regs * rq_regs,const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,unsigned int pipe_idx)12274 static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs,
12275 const struct dml2_display_cfg *display_cfg,
12276 const struct dml2_core_internal_display_mode_lib *mode_lib,
12277 unsigned int pipe_idx)
12278 {
12279 unsigned int plane_idx = dml_get_plane_idx(mode_lib, pipe_idx);
12280 enum dml2_source_format_class source_format = display_cfg->plane_descriptors[plane_idx].pixel_format;
12281 enum dml2_swizzle_mode sw_mode = display_cfg->plane_descriptors[plane_idx].surface.tiling;
12282 bool dual_plane = is_dual_plane((enum dml2_source_format_class)(source_format));
12283
12284 unsigned int pixel_chunk_bytes = 0;
12285 unsigned int min_pixel_chunk_bytes = 0;
12286 unsigned int meta_chunk_bytes = 0;
12287 unsigned int min_meta_chunk_bytes = 0;
12288 unsigned int dpte_group_bytes = 0;
12289 unsigned int mpte_group_bytes = 0;
12290
12291 unsigned int p1_pixel_chunk_bytes = 0;
12292 unsigned int p1_min_pixel_chunk_bytes = 0;
12293 unsigned int p1_meta_chunk_bytes = 0;
12294 unsigned int p1_min_meta_chunk_bytes = 0;
12295 unsigned int p1_dpte_group_bytes = 0;
12296 unsigned int p1_mpte_group_bytes = 0;
12297
12298 unsigned int detile_buf_plane1_addr = 0;
12299 unsigned int detile_buf_size_in_bytes;
12300 double stored_swath_l_bytes;
12301 double stored_swath_c_bytes;
12302 bool is_phantom_pipe;
12303
12304 DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] start\n", __func__, pipe_idx);
12305
12306 pixel_chunk_bytes = (unsigned int)(mode_lib->ip.pixel_chunk_size_kbytes * 1024);
12307 min_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.min_pixel_chunk_size_bytes);
12308
12309 if (pixel_chunk_bytes == 64 * 1024)
12310 min_pixel_chunk_bytes = 0;
12311
12312 dpte_group_bytes = (unsigned int)(dml_get_dpte_group_size_in_bytes(mode_lib, pipe_idx));
12313 mpte_group_bytes = (unsigned int)(dml_get_vm_group_size_in_bytes(mode_lib, pipe_idx));
12314
12315 meta_chunk_bytes = (unsigned int)(mode_lib->ip.meta_chunk_size_kbytes * 1024);
12316 min_meta_chunk_bytes = (unsigned int)(mode_lib->ip.min_meta_chunk_size_bytes);
12317
12318 p1_pixel_chunk_bytes = pixel_chunk_bytes;
12319 p1_min_pixel_chunk_bytes = min_pixel_chunk_bytes;
12320 p1_meta_chunk_bytes = meta_chunk_bytes;
12321 p1_min_meta_chunk_bytes = min_meta_chunk_bytes;
12322 p1_dpte_group_bytes = dpte_group_bytes;
12323 p1_mpte_group_bytes = mpte_group_bytes;
12324
12325 if (source_format == dml2_rgbe_alpha)
12326 p1_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.alpha_pixel_chunk_size_kbytes * 1024);
12327
12328 rq_regs->unbounded_request_enabled = dml_get_unbounded_request_enabled(mode_lib);
12329 rq_regs->rq_regs_l.chunk_size = log_and_substract_if_non_zero(pixel_chunk_bytes, 10);
12330 rq_regs->rq_regs_c.chunk_size = log_and_substract_if_non_zero(p1_pixel_chunk_bytes, 10);
12331
12332 if (min_pixel_chunk_bytes == 0)
12333 rq_regs->rq_regs_l.min_chunk_size = 0;
12334 else
12335 rq_regs->rq_regs_l.min_chunk_size = log_and_substract_if_non_zero(min_pixel_chunk_bytes, 8 - 1);
12336
12337 if (p1_min_pixel_chunk_bytes == 0)
12338 rq_regs->rq_regs_c.min_chunk_size = 0;
12339 else
12340 rq_regs->rq_regs_c.min_chunk_size = log_and_substract_if_non_zero(p1_min_pixel_chunk_bytes, 8 - 1);
12341
12342 rq_regs->rq_regs_l.meta_chunk_size = log_and_substract_if_non_zero(meta_chunk_bytes, 10);
12343 rq_regs->rq_regs_c.meta_chunk_size = log_and_substract_if_non_zero(p1_meta_chunk_bytes, 10);
12344
12345 if (min_meta_chunk_bytes == 0)
12346 rq_regs->rq_regs_l.min_meta_chunk_size = 0;
12347 else
12348 rq_regs->rq_regs_l.min_meta_chunk_size = log_and_substract_if_non_zero(min_meta_chunk_bytes, 6 - 1);
12349
12350 if (min_meta_chunk_bytes == 0)
12351 rq_regs->rq_regs_c.min_meta_chunk_size = 0;
12352 else
12353 rq_regs->rq_regs_c.min_meta_chunk_size = log_and_substract_if_non_zero(p1_min_meta_chunk_bytes, 6 - 1);
12354
12355 rq_regs->rq_regs_l.dpte_group_size = log_and_substract_if_non_zero(dpte_group_bytes, 6);
12356 rq_regs->rq_regs_l.mpte_group_size = log_and_substract_if_non_zero(mpte_group_bytes, 6);
12357 rq_regs->rq_regs_c.dpte_group_size = log_and_substract_if_non_zero(p1_dpte_group_bytes, 6);
12358 rq_regs->rq_regs_c.mpte_group_size = log_and_substract_if_non_zero(p1_mpte_group_bytes, 6);
12359
12360 detile_buf_size_in_bytes = (unsigned int)(dml_get_det_buffer_size_kbytes(mode_lib, pipe_idx) * 1024);
12361
12362 if (sw_mode == dml2_sw_linear && display_cfg->gpuvm_enable) {
12363 unsigned int p0_pte_row_height_linear = (unsigned int)(dml_get_dpte_row_height_linear_l(mode_lib, pipe_idx));
12364 #ifdef __DML_VBA_DEBUG__
12365 DML_LOG_VERBOSE("DML_DLG: %s: p0_pte_row_height_linear = %u\n", __func__, p0_pte_row_height_linear);
12366 #endif
12367 DML_ASSERT(p0_pte_row_height_linear >= 8);
12368
12369 rq_regs->rq_regs_l.pte_row_height_linear = math_log2_approx(p0_pte_row_height_linear) - 3;
12370 if (dual_plane) {
12371 unsigned int p1_pte_row_height_linear = (unsigned int)(dml_get_dpte_row_height_linear_c(mode_lib, pipe_idx));
12372
12373 #ifdef __DML_VBA_DEBUG__
12374 DML_LOG_VERBOSE("DML_DLG: %s: p1_pte_row_height_linear = %u\n", __func__, p1_pte_row_height_linear);
12375 #endif
12376 if (sw_mode == dml2_sw_linear) {
12377 DML_ASSERT(p1_pte_row_height_linear >= 8);
12378 }
12379 rq_regs->rq_regs_c.pte_row_height_linear = math_log2_approx(p1_pte_row_height_linear) - 3;
12380 }
12381 } else {
12382 rq_regs->rq_regs_l.pte_row_height_linear = 0;
12383 rq_regs->rq_regs_c.pte_row_height_linear = 0;
12384 }
12385
12386 rq_regs->rq_regs_l.swath_height = log_and_substract_if_non_zero(dml_get_swath_height_l(mode_lib, pipe_idx), 0);
12387 rq_regs->rq_regs_c.swath_height = log_and_substract_if_non_zero(dml_get_swath_height_c(mode_lib, pipe_idx), 0);
12388
12389 // FIXME_DCN4, programming guide has dGPU condition
12390 if (pixel_chunk_bytes >= 32 * 1024 || (dual_plane && p1_pixel_chunk_bytes >= 32 * 1024)) { //32kb
12391 rq_regs->drq_expansion_mode = 0;
12392 } else {
12393 rq_regs->drq_expansion_mode = 2;
12394 }
12395 rq_regs->prq_expansion_mode = 1;
12396 rq_regs->crq_expansion_mode = 1;
12397 rq_regs->mrq_expansion_mode = 1;
12398
12399 stored_swath_l_bytes = dml_get_det_stored_buffer_size_l_bytes(mode_lib, pipe_idx);
12400 stored_swath_c_bytes = dml_get_det_stored_buffer_size_c_bytes(mode_lib, pipe_idx);
12401 is_phantom_pipe = dml_get_is_phantom_pipe(display_cfg, mode_lib, pipe_idx);
12402
12403 // Note: detile_buf_plane1_addr is in unit of 1KB
12404 if (dual_plane) {
12405 if (is_phantom_pipe) {
12406 detile_buf_plane1_addr = (unsigned int)((1024.0 * 1024.0) / 2.0 / 1024.0); // half to chroma
12407 } else {
12408 if (stored_swath_l_bytes / stored_swath_c_bytes <= 1.5) {
12409 detile_buf_plane1_addr = (unsigned int)(detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma
12410 #ifdef __DML_VBA_DEBUG__
12411 DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d (1/2 to chroma)\n", __func__, detile_buf_plane1_addr);
12412 #endif
12413 } else {
12414 detile_buf_plane1_addr = (unsigned int)(dml_round_to_multiple((unsigned int)((2.0 * detile_buf_size_in_bytes) / 3.0), 1024, 0) / 1024.0); // 2/3 to luma
12415 #ifdef __DML_VBA_DEBUG__
12416 DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d (1/3 chroma)\n", __func__, detile_buf_plane1_addr);
12417 #endif
12418 }
12419 }
12420 }
12421 rq_regs->plane1_base_address = detile_buf_plane1_addr;
12422
12423 #ifdef __DML_VBA_DEBUG__
12424 DML_LOG_VERBOSE("DML_DLG: %s: is_phantom_pipe = %d\n", __func__, is_phantom_pipe);
12425 DML_LOG_VERBOSE("DML_DLG: %s: stored_swath_l_bytes = %f\n", __func__, stored_swath_l_bytes);
12426 DML_LOG_VERBOSE("DML_DLG: %s: stored_swath_c_bytes = %f\n", __func__, stored_swath_c_bytes);
12427 DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_size_in_bytes = %d\n", __func__, detile_buf_size_in_bytes);
12428 DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d\n", __func__, detile_buf_plane1_addr);
12429 DML_LOG_VERBOSE("DML_DLG: %s: plane1_base_address = %d\n", __func__, rq_regs->plane1_base_address);
12430 #endif
12431 //DML_LOG_VERBOSE_rq_regs_st(rq_regs);
12432 DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx);
12433 }
12434
rq_dlg_get_dlg_reg(struct dml2_core_internal_scratch * s,struct dml2_display_dlg_regs * disp_dlg_regs,struct dml2_display_ttu_regs * disp_ttu_regs,const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,const unsigned int pipe_idx)12435 static void rq_dlg_get_dlg_reg(
12436 struct dml2_core_internal_scratch *s,
12437 struct dml2_display_dlg_regs *disp_dlg_regs,
12438 struct dml2_display_ttu_regs *disp_ttu_regs,
12439 const struct dml2_display_cfg *display_cfg,
12440 const struct dml2_core_internal_display_mode_lib *mode_lib,
12441 const unsigned int pipe_idx)
12442 {
12443 struct dml2_core_shared_rq_dlg_get_dlg_reg_locals *l = &s->rq_dlg_get_dlg_reg_locals;
12444
12445 memset(l, 0, sizeof(struct dml2_core_shared_rq_dlg_get_dlg_reg_locals));
12446
12447 DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe_idx=%d\n", __func__, pipe_idx);
12448
12449 l->plane_idx = dml_get_plane_idx(mode_lib, pipe_idx);
12450 DML_ASSERT(l->plane_idx < DML2_MAX_PLANES);
12451
12452 l->source_format = dml2_444_8;
12453 l->odm_mode = dml2_odm_mode_bypass;
12454 l->dual_plane = false;
12455 l->htotal = 0;
12456 l->hactive = 0;
12457 l->hblank_end = 0;
12458 l->vblank_end = 0;
12459 l->interlaced = false;
12460 l->pclk_freq_in_mhz = 0.0;
12461 l->refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz;
12462 l->ref_freq_to_pix_freq = 0.0;
12463
12464 if (l->plane_idx < DML2_MAX_PLANES) {
12465
12466 l->timing = &display_cfg->stream_descriptors[display_cfg->plane_descriptors[l->plane_idx].stream_index].timing;
12467 l->source_format = display_cfg->plane_descriptors[l->plane_idx].pixel_format;
12468 l->odm_mode = mode_lib->mp.ODMMode[l->plane_idx];
12469
12470 l->dual_plane = is_dual_plane(l->source_format);
12471
12472 l->htotal = l->timing->h_total;
12473 l->hactive = l->timing->h_active;
12474 l->hblank_end = l->timing->h_blank_end;
12475 l->vblank_end = l->timing->v_blank_end;
12476 l->interlaced = l->timing->interlaced;
12477 l->pclk_freq_in_mhz = (double)l->timing->pixel_clock_khz / 1000;
12478 l->ref_freq_to_pix_freq = l->refclk_freq_in_mhz / l->pclk_freq_in_mhz;
12479
12480 DML_LOG_VERBOSE("DML_DLG::%s: plane_idx = %d\n", __func__, l->plane_idx);
12481 DML_LOG_VERBOSE("DML_DLG: %s: htotal = %d\n", __func__, l->htotal);
12482 DML_LOG_VERBOSE("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, l->refclk_freq_in_mhz);
12483 DML_LOG_VERBOSE("DML_DLG: %s: dlg_ref_clk_mhz = %3.2f\n", __func__, display_cfg->overrides.hw.dlg_ref_clk_mhz);
12484 DML_LOG_VERBOSE("DML_DLG: %s: soc.refclk_mhz = %d\n", __func__, mode_lib->soc.dchub_refclk_mhz);
12485 DML_LOG_VERBOSE("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, l->pclk_freq_in_mhz);
12486 DML_LOG_VERBOSE("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq);
12487 DML_LOG_VERBOSE("DML_DLG: %s: interlaced = %d\n", __func__, l->interlaced);
12488
12489 DML_ASSERT(l->refclk_freq_in_mhz != 0);
12490 DML_ASSERT(l->pclk_freq_in_mhz != 0);
12491 DML_ASSERT(l->ref_freq_to_pix_freq < 4.0);
12492
12493 // Need to figure out which side of odm combine we're in
12494 // Assume the pipe instance under the same plane is in order
12495
12496 if (l->odm_mode == dml2_odm_mode_bypass) {
12497 disp_dlg_regs->refcyc_h_blank_end = (unsigned int)((double)l->hblank_end * l->ref_freq_to_pix_freq);
12498 } else if (l->odm_mode == dml2_odm_mode_combine_2to1 || l->odm_mode == dml2_odm_mode_combine_3to1 || l->odm_mode == dml2_odm_mode_combine_4to1) {
12499 // find out how many pipe are in this plane
12500 l->num_active_pipes = mode_lib->mp.num_active_pipes;
12501 l->first_pipe_idx_in_plane = DML2_MAX_PLANES;
12502 l->pipe_idx_in_combine = 0; // pipe index within the plane
12503 l->odm_combine_factor = 2;
12504
12505 if (l->odm_mode == dml2_odm_mode_combine_3to1)
12506 l->odm_combine_factor = 3;
12507 else if (l->odm_mode == dml2_odm_mode_combine_4to1)
12508 l->odm_combine_factor = 4;
12509
12510 for (unsigned int i = 0; i < l->num_active_pipes; i++) {
12511 if (dml_get_plane_idx(mode_lib, i) == l->plane_idx) {
12512 if (i < l->first_pipe_idx_in_plane) {
12513 l->first_pipe_idx_in_plane = i;
12514 }
12515 }
12516 }
12517 l->pipe_idx_in_combine = pipe_idx - l->first_pipe_idx_in_plane; // DML assumes the pipes in the same plane will have continuous indexing (i.e. plane 0 use pipe 0, 1, and plane 1 uses pipe 2, 3, etc.)
12518
12519 disp_dlg_regs->refcyc_h_blank_end = (unsigned int)(((double)l->hblank_end + (double)l->pipe_idx_in_combine * (double)l->hactive / (double)l->odm_combine_factor) * l->ref_freq_to_pix_freq);
12520 DML_LOG_VERBOSE("DML_DLG: %s: pipe_idx = %d\n", __func__, pipe_idx);
12521 DML_LOG_VERBOSE("DML_DLG: %s: first_pipe_idx_in_plane = %d\n", __func__, l->first_pipe_idx_in_plane);
12522 DML_LOG_VERBOSE("DML_DLG: %s: pipe_idx_in_combine = %d\n", __func__, l->pipe_idx_in_combine);
12523 DML_LOG_VERBOSE("DML_DLG: %s: odm_combine_factor = %d\n", __func__, l->odm_combine_factor);
12524 }
12525 DML_LOG_VERBOSE("DML_DLG: %s: refcyc_h_blank_end = %d\n", __func__, disp_dlg_regs->refcyc_h_blank_end);
12526
12527 DML_ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)math_pow(2, 13));
12528
12529 disp_dlg_regs->ref_freq_to_pix_freq = (unsigned int)(l->ref_freq_to_pix_freq * math_pow(2, 19));
12530 disp_dlg_regs->refcyc_per_htotal = (unsigned int)(l->ref_freq_to_pix_freq * (double)l->htotal * math_pow(2, 8));
12531 disp_dlg_regs->dlg_vblank_end = l->interlaced ? (l->vblank_end / 2) : l->vblank_end; // 15 bits
12532
12533 l->min_ttu_vblank = mode_lib->mp.MinTTUVBlank[mode_lib->mp.pipe_plane[pipe_idx]];
12534 l->min_dst_y_next_start = (unsigned int)(mode_lib->mp.MIN_DST_Y_NEXT_START[mode_lib->mp.pipe_plane[pipe_idx]]);
12535
12536 DML_LOG_VERBOSE("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, l->min_ttu_vblank);
12537 DML_LOG_VERBOSE("DML_DLG: %s: min_dst_y_next_start = %d\n", __func__, l->min_dst_y_next_start);
12538 DML_LOG_VERBOSE("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq);
12539
12540 l->vready_after_vcount0 = (unsigned int)(mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[mode_lib->mp.pipe_plane[pipe_idx]]);
12541 disp_dlg_regs->vready_after_vcount0 = l->vready_after_vcount0;
12542
12543 DML_LOG_VERBOSE("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, disp_dlg_regs->vready_after_vcount0);
12544
12545 l->dst_x_after_scaler = (unsigned int)(mode_lib->mp.DSTXAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]);
12546 l->dst_y_after_scaler = (unsigned int)(mode_lib->mp.DSTYAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]);
12547
12548 DML_LOG_VERBOSE("DML_DLG: %s: dst_x_after_scaler = %d\n", __func__, l->dst_x_after_scaler);
12549 DML_LOG_VERBOSE("DML_DLG: %s: dst_y_after_scaler = %d\n", __func__, l->dst_y_after_scaler);
12550
12551 l->dst_y_prefetch = mode_lib->mp.dst_y_prefetch[mode_lib->mp.pipe_plane[pipe_idx]];
12552 l->dst_y_per_vm_vblank = mode_lib->mp.dst_y_per_vm_vblank[mode_lib->mp.pipe_plane[pipe_idx]];
12553 l->dst_y_per_row_vblank = mode_lib->mp.dst_y_per_row_vblank[mode_lib->mp.pipe_plane[pipe_idx]];
12554 l->dst_y_per_vm_flip = mode_lib->mp.dst_y_per_vm_flip[mode_lib->mp.pipe_plane[pipe_idx]];
12555 l->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip[mode_lib->mp.pipe_plane[pipe_idx]];
12556
12557 DML_LOG_VERBOSE("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, l->dst_y_prefetch);
12558 DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, l->dst_y_per_vm_flip);
12559 DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, l->dst_y_per_row_flip);
12560 DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, l->dst_y_per_vm_vblank);
12561 DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, l->dst_y_per_row_vblank);
12562
12563 if (l->dst_y_prefetch > 0 && l->dst_y_per_vm_vblank > 0 && l->dst_y_per_row_vblank > 0) {
12564 DML_ASSERT(l->dst_y_prefetch > (l->dst_y_per_vm_vblank + l->dst_y_per_row_vblank));
12565 }
12566
12567 l->vratio_pre_l = mode_lib->mp.VRatioPrefetchY[mode_lib->mp.pipe_plane[pipe_idx]];
12568 l->vratio_pre_c = mode_lib->mp.VRatioPrefetchC[mode_lib->mp.pipe_plane[pipe_idx]];
12569
12570 DML_LOG_VERBOSE("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, l->vratio_pre_l);
12571 DML_LOG_VERBOSE("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, l->vratio_pre_c);
12572
12573 // Active
12574 l->refcyc_per_line_delivery_pre_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12575 l->refcyc_per_line_delivery_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12576
12577 DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_l);
12578 DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_l);
12579
12580 l->refcyc_per_line_delivery_pre_c = 0.0;
12581 l->refcyc_per_line_delivery_c = 0.0;
12582
12583 if (l->dual_plane) {
12584 l->refcyc_per_line_delivery_pre_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12585 l->refcyc_per_line_delivery_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12586
12587 DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_c);
12588 DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_c);
12589 }
12590
12591 disp_dlg_regs->refcyc_per_vm_dmdata = (unsigned int)(mode_lib->mp.Tdmdl_vm[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
12592 disp_dlg_regs->dmdata_dl_delta = (unsigned int)(mode_lib->mp.Tdmdl[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
12593
12594 l->refcyc_per_req_delivery_pre_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12595 l->refcyc_per_req_delivery_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12596
12597 DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_l);
12598 DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_l);
12599
12600 l->refcyc_per_req_delivery_pre_c = 0.0;
12601 l->refcyc_per_req_delivery_c = 0.0;
12602 if (l->dual_plane) {
12603 l->refcyc_per_req_delivery_pre_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12604 l->refcyc_per_req_delivery_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12605
12606 DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_c);
12607 DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_c);
12608 }
12609
12610 // TTU - Cursor
12611 DML_ASSERT(display_cfg->plane_descriptors[l->plane_idx].cursor.num_cursors <= 1);
12612
12613 // Assign to register structures
12614 disp_dlg_regs->min_dst_y_next_start = (unsigned int)((double)l->min_dst_y_next_start * math_pow(2, 2));
12615 DML_ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)math_pow(2, 18));
12616
12617 disp_dlg_regs->dst_y_after_scaler = l->dst_y_after_scaler; // in terms of line
12618 disp_dlg_regs->refcyc_x_after_scaler = (unsigned int)((double)l->dst_x_after_scaler * l->ref_freq_to_pix_freq); // in terms of refclk
12619 disp_dlg_regs->dst_y_prefetch = (unsigned int)(l->dst_y_prefetch * math_pow(2, 2));
12620 disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int)(l->dst_y_per_vm_vblank * math_pow(2, 2));
12621 disp_dlg_regs->dst_y_per_row_vblank = (unsigned int)(l->dst_y_per_row_vblank * math_pow(2, 2));
12622 disp_dlg_regs->dst_y_per_vm_flip = (unsigned int)(l->dst_y_per_vm_flip * math_pow(2, 2));
12623 disp_dlg_regs->dst_y_per_row_flip = (unsigned int)(l->dst_y_per_row_flip * math_pow(2, 2));
12624
12625 disp_dlg_regs->vratio_prefetch = (unsigned int)(l->vratio_pre_l * math_pow(2, 19));
12626 disp_dlg_regs->vratio_prefetch_c = (unsigned int)(l->vratio_pre_c * math_pow(2, 19));
12627
12628 DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank);
12629 DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank);
12630 DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip);
12631 DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip);
12632
12633 disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(mode_lib->mp.TimePerVMGroupVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
12634 disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(mode_lib->mp.TimePerVMGroupFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
12635 disp_dlg_regs->refcyc_per_vm_req_vblank = (unsigned int)(mode_lib->mp.TimePerVMRequestVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz * math_pow(2, 10));
12636 disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(mode_lib->mp.TimePerVMRequestFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz * math_pow(2, 10));
12637
12638 l->dst_y_per_pte_row_nom_l = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L[mode_lib->mp.pipe_plane[pipe_idx]];
12639 l->dst_y_per_pte_row_nom_c = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C[mode_lib->mp.pipe_plane[pipe_idx]];
12640 l->refcyc_per_pte_group_nom_l = mode_lib->mp.time_per_pte_group_nom_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12641 l->refcyc_per_pte_group_nom_c = mode_lib->mp.time_per_pte_group_nom_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12642 l->refcyc_per_pte_group_vblank_l = mode_lib->mp.time_per_pte_group_vblank_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12643 l->refcyc_per_pte_group_vblank_c = mode_lib->mp.time_per_pte_group_vblank_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12644 l->refcyc_per_pte_group_flip_l = mode_lib->mp.time_per_pte_group_flip_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12645 l->refcyc_per_pte_group_flip_c = mode_lib->mp.time_per_pte_group_flip_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12646 l->refcyc_per_tdlut_group = mode_lib->mp.time_per_tdlut_group[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12647
12648 disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int)(l->dst_y_per_pte_row_nom_l * math_pow(2, 2));
12649 disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int)(l->dst_y_per_pte_row_nom_c * math_pow(2, 2));
12650
12651 disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int)(l->refcyc_per_pte_group_nom_l);
12652 disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(l->refcyc_per_pte_group_nom_c);
12653 disp_dlg_regs->refcyc_per_pte_group_vblank_l = (unsigned int)(l->refcyc_per_pte_group_vblank_l);
12654 disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int)(l->refcyc_per_pte_group_vblank_c);
12655 disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int)(l->refcyc_per_pte_group_flip_l);
12656 disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int)(l->refcyc_per_pte_group_flip_c);
12657 disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int)math_floor2(l->refcyc_per_line_delivery_pre_l, 1);
12658 disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int)math_floor2(l->refcyc_per_line_delivery_l, 1);
12659 disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int)math_floor2(l->refcyc_per_line_delivery_pre_c, 1);
12660 disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int)math_floor2(l->refcyc_per_line_delivery_c, 1);
12661
12662 l->dst_y_per_meta_row_nom_l = mode_lib->mp.DST_Y_PER_META_ROW_NOM_L[mode_lib->mp.pipe_plane[pipe_idx]];
12663 l->dst_y_per_meta_row_nom_c = mode_lib->mp.DST_Y_PER_META_ROW_NOM_C[mode_lib->mp.pipe_plane[pipe_idx]];
12664 l->refcyc_per_meta_chunk_nom_l = mode_lib->mp.TimePerMetaChunkNominal[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12665 l->refcyc_per_meta_chunk_nom_c = mode_lib->mp.TimePerChromaMetaChunkNominal[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12666 l->refcyc_per_meta_chunk_vblank_l = mode_lib->mp.TimePerMetaChunkVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12667 l->refcyc_per_meta_chunk_vblank_c = mode_lib->mp.TimePerChromaMetaChunkVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12668 l->refcyc_per_meta_chunk_flip_l = mode_lib->mp.TimePerMetaChunkFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12669 l->refcyc_per_meta_chunk_flip_c = mode_lib->mp.TimePerChromaMetaChunkFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12670
12671 disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int)(l->dst_y_per_meta_row_nom_l * math_pow(2, 2));
12672 disp_dlg_regs->dst_y_per_meta_row_nom_c = (unsigned int)(l->dst_y_per_meta_row_nom_c * math_pow(2, 2));
12673 disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int)(l->refcyc_per_meta_chunk_nom_l);
12674 disp_dlg_regs->refcyc_per_meta_chunk_nom_c = (unsigned int)(l->refcyc_per_meta_chunk_nom_c);
12675 disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = (unsigned int)(l->refcyc_per_meta_chunk_vblank_l);
12676 disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = (unsigned int)(l->refcyc_per_meta_chunk_vblank_c);
12677 disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int)(l->refcyc_per_meta_chunk_flip_l);
12678 disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int)(l->refcyc_per_meta_chunk_flip_c);
12679
12680 disp_dlg_regs->refcyc_per_tdlut_group = (unsigned int)(l->refcyc_per_tdlut_group);
12681 disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off
12682
12683 disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int)(l->refcyc_per_req_delivery_pre_l * math_pow(2, 10));
12684 disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int)(l->refcyc_per_req_delivery_l * math_pow(2, 10));
12685 disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int)(l->refcyc_per_req_delivery_pre_c * math_pow(2, 10));
12686 disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int)(l->refcyc_per_req_delivery_c * math_pow(2, 10));
12687 disp_ttu_regs->qos_level_low_wm = 0;
12688
12689 disp_ttu_regs->qos_level_high_wm = (unsigned int)(4.0 * (double)l->htotal * l->ref_freq_to_pix_freq);
12690
12691 disp_ttu_regs->qos_level_flip = 14;
12692 disp_ttu_regs->qos_level_fixed_l = 8;
12693 disp_ttu_regs->qos_level_fixed_c = 8;
12694 disp_ttu_regs->qos_ramp_disable_l = 0;
12695 disp_ttu_regs->qos_ramp_disable_c = 0;
12696 disp_ttu_regs->min_ttu_vblank = (unsigned int)(l->min_ttu_vblank * l->refclk_freq_in_mhz);
12697
12698 // CHECK for HW registers' range, DML_ASSERT or clamp
12699 DML_ASSERT(l->refcyc_per_req_delivery_pre_l < math_pow(2, 13));
12700 DML_ASSERT(l->refcyc_per_req_delivery_l < math_pow(2, 13));
12701 DML_ASSERT(l->refcyc_per_req_delivery_pre_c < math_pow(2, 13));
12702 DML_ASSERT(l->refcyc_per_req_delivery_c < math_pow(2, 13));
12703 if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int)math_pow(2, 23))
12704 disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(math_pow(2, 23) - 1);
12705
12706 if (disp_dlg_regs->refcyc_per_vm_group_flip >= (unsigned int)math_pow(2, 23))
12707 disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(math_pow(2, 23) - 1);
12708
12709 if (disp_dlg_regs->refcyc_per_vm_req_vblank >= (unsigned int)math_pow(2, 23))
12710 disp_dlg_regs->refcyc_per_vm_req_vblank = (unsigned int)(math_pow(2, 23) - 1);
12711
12712 if (disp_dlg_regs->refcyc_per_vm_req_flip >= (unsigned int)math_pow(2, 23))
12713 disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(math_pow(2, 23) - 1);
12714
12715
12716 DML_ASSERT(disp_dlg_regs->dst_y_after_scaler < 8U);
12717 DML_ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)math_pow(2, 13));
12718
12719 if (disp_dlg_regs->dst_y_per_pte_row_nom_l >= (unsigned int)math_pow(2, 17)) {
12720 DML_LOG_VERBOSE("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_L %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_l, (unsigned int)math_pow(2, 17) - 1);
12721 l->dst_y_per_pte_row_nom_l = (unsigned int)math_pow(2, 17) - 1;
12722 }
12723 if (l->dual_plane) {
12724 if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int)math_pow(2, 17)) {
12725 DML_LOG_VERBOSE("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_C %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_c, (unsigned int)math_pow(2, 17) - 1);
12726 l->dst_y_per_pte_row_nom_c = (unsigned int)math_pow(2, 17) - 1;
12727 }
12728 }
12729
12730 if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int)math_pow(2, 23))
12731 disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int)(math_pow(2, 23) - 1);
12732 if (l->dual_plane) {
12733 if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int)math_pow(2, 23))
12734 disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(math_pow(2, 23) - 1);
12735 }
12736 DML_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)math_pow(2, 13));
12737 if (l->dual_plane) {
12738 DML_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int)math_pow(2, 13));
12739 }
12740
12741 DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)math_pow(2, 13));
12742 DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)math_pow(2, 13));
12743 DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)math_pow(2, 13));
12744 DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)math_pow(2, 13));
12745 DML_ASSERT(disp_ttu_regs->qos_level_low_wm < (unsigned int)math_pow(2, 14));
12746 DML_ASSERT(disp_ttu_regs->qos_level_high_wm < (unsigned int)math_pow(2, 14));
12747 DML_ASSERT(disp_ttu_regs->min_ttu_vblank < (unsigned int)math_pow(2, 24));
12748
12749 DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx);
12750
12751 }
12752 }
12753
rq_dlg_get_arb_params(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_display_arb_regs * arb_param)12754 static void rq_dlg_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *arb_param)
12755 {
12756 double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz;
12757
12758 arb_param->max_req_outstanding = mode_lib->soc.max_outstanding_reqs;
12759 arb_param->min_req_outstanding = mode_lib->soc.max_outstanding_reqs; // turn off the sat level feature if this set to max
12760 arb_param->sdpif_request_rate_limit = (3 * mode_lib->ip.words_per_channel * mode_lib->soc.clk_table.dram_config.channel_count) / 4;
12761 arb_param->sdpif_request_rate_limit = arb_param->sdpif_request_rate_limit < 96 ? 96 : arb_param->sdpif_request_rate_limit;
12762 arb_param->sat_level_us = 60;
12763 arb_param->hvm_max_qos_commit_threshold = 0xf;
12764 arb_param->hvm_min_req_outstand_commit_threshold = 0xa;
12765 arb_param->compbuf_reserved_space_kbytes = dml_get_compbuf_reserved_space_64b(mode_lib) * 64 / 1024;
12766 arb_param->compbuf_size = mode_lib->mp.CompressedBufferSizeInkByte / mode_lib->ip.compressed_buffer_segment_size_in_kbytes;
12767 arb_param->allow_sdpif_rate_limit_when_cstate_req = dml_get_hw_debug5(mode_lib);
12768 arb_param->dcfclk_deep_sleep_hysteresis = dml_get_dcfclk_deep_sleep_hysteresis(mode_lib);
12769 arb_param->pstate_stall_threshold = (unsigned int)(mode_lib->ip_caps.fams2.max_allow_delay_us * refclk_freq_in_mhz);
12770
12771 #ifdef __DML_VBA_DEBUG__
12772 DML_LOG_VERBOSE("DML::%s: max_req_outstanding = %d\n", __func__, arb_param->max_req_outstanding);
12773 DML_LOG_VERBOSE("DML::%s: sdpif_request_rate_limit = %d\n", __func__, arb_param->sdpif_request_rate_limit);
12774 DML_LOG_VERBOSE("DML::%s: compbuf_reserved_space_kbytes = %d\n", __func__, arb_param->compbuf_reserved_space_kbytes);
12775 DML_LOG_VERBOSE("DML::%s: allow_sdpif_rate_limit_when_cstate_req = %d\n", __func__, arb_param->allow_sdpif_rate_limit_when_cstate_req);
12776 DML_LOG_VERBOSE("DML::%s: dcfclk_deep_sleep_hysteresis = %d\n", __func__, arb_param->dcfclk_deep_sleep_hysteresis);
12777 #endif
12778
12779 }
12780
dml2_core_calcs_get_watermarks(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_dchub_watermark_regs * out)12781 void dml2_core_calcs_get_watermarks(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *out)
12782 {
12783 rq_dlg_get_wm_regs(display_cfg, mode_lib, out);
12784 }
12785
dml2_core_calcs_get_arb_params(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_display_arb_regs * out)12786 void dml2_core_calcs_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *out)
12787 {
12788 rq_dlg_get_arb_params(display_cfg, mode_lib, out);
12789 }
12790
dml2_core_calcs_get_pipe_regs(const struct dml2_display_cfg * display_cfg,struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_dchub_per_pipe_register_set * out,int pipe_index)12791 void dml2_core_calcs_get_pipe_regs(const struct dml2_display_cfg *display_cfg,
12792 struct dml2_core_internal_display_mode_lib *mode_lib,
12793 struct dml2_dchub_per_pipe_register_set *out, int pipe_index)
12794 {
12795 rq_dlg_get_rq_reg(&out->rq_regs, display_cfg, mode_lib, pipe_index);
12796 rq_dlg_get_dlg_reg(&mode_lib->scratch, &out->dlg_regs, &out->ttu_regs, display_cfg, mode_lib, pipe_index);
12797 out->det_size = dml_get_det_buffer_size_kbytes(mode_lib, pipe_index) / mode_lib->ip.config_return_buffer_segment_size_in_kbytes;
12798 }
12799
dml2_core_calcs_get_global_sync_programming(const struct dml2_core_internal_display_mode_lib * mode_lib,union dml2_global_sync_programming * out,int pipe_index)12800 void dml2_core_calcs_get_global_sync_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, union dml2_global_sync_programming *out, int pipe_index)
12801 {
12802 out->dcn4x.vready_offset_pixels = dml_get_vready_offset(mode_lib, pipe_index);
12803 out->dcn4x.vstartup_lines = dml_get_vstartup_calculated(mode_lib, pipe_index);
12804 out->dcn4x.vupdate_offset_pixels = dml_get_vupdate_offset(mode_lib, pipe_index);
12805 out->dcn4x.vupdate_vupdate_width_pixels = dml_get_vupdate_width(mode_lib, pipe_index);
12806 out->dcn4x.pstate_keepout_start_lines = dml_get_pstate_keepout_dst_lines(mode_lib, pipe_index);
12807 }
12808
dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_per_stream_programming * out,int pipe_index)12809 void dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index)
12810 {
12811 dml2_core_calcs_get_global_sync_programming(mode_lib, &out->global_sync, pipe_index);
12812 }
12813
dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib * mode_lib,const struct display_configuation_with_meta * display_cfg,struct dmub_cmd_fams2_global_config * fams2_global_config)12814 void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib,
12815 const struct display_configuation_with_meta *display_cfg,
12816 struct dmub_cmd_fams2_global_config *fams2_global_config)
12817 {
12818 fams2_global_config->features.bits.enable = display_cfg->stage3.fams2_required;
12819
12820 if (fams2_global_config->features.bits.enable) {
12821 fams2_global_config->features.bits.enable_stall_recovery = true;
12822 fams2_global_config->features.bits.allow_delay_check_mode = FAMS2_ALLOW_DELAY_CHECK_FROM_START;
12823
12824 fams2_global_config->max_allow_delay_us = mode_lib->ip_caps.fams2.max_allow_delay_us;
12825 fams2_global_config->lock_wait_time_us = mode_lib->ip_caps.fams2.lock_timeout_us;
12826 fams2_global_config->recovery_timeout_us = mode_lib->ip_caps.fams2.recovery_timeout_us;
12827 fams2_global_config->hwfq_flip_programming_delay_us = mode_lib->ip_caps.fams2.flip_programming_delay_us;
12828
12829 fams2_global_config->num_streams = display_cfg->display_config.num_streams;
12830 }
12831 }
12832
dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib * mode_lib,const struct display_configuation_with_meta * display_cfg,union dmub_cmd_fams2_config * fams2_base_programming,union dmub_cmd_fams2_config * fams2_sub_programming,enum dml2_pstate_method pstate_method,int plane_index)12833 void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib,
12834 const struct display_configuation_with_meta *display_cfg,
12835 union dmub_cmd_fams2_config *fams2_base_programming,
12836 union dmub_cmd_fams2_config *fams2_sub_programming,
12837 enum dml2_pstate_method pstate_method,
12838 int plane_index)
12839 {
12840 const struct dml2_plane_parameters *plane_descriptor = &display_cfg->display_config.plane_descriptors[plane_index];
12841 const struct dml2_stream_parameters *stream_descriptor = &display_cfg->display_config.stream_descriptors[plane_descriptor->stream_index];
12842 const struct dml2_pstate_meta *stream_pstate_meta = &display_cfg->stage3.stream_pstate_meta[plane_descriptor->stream_index];
12843
12844 struct dmub_fams2_cmd_stream_static_base_state *base_programming = &fams2_base_programming->stream_v1.base;
12845 union dmub_fams2_cmd_stream_static_sub_state *sub_programming = &fams2_sub_programming->stream_v1.sub_state;
12846
12847 unsigned int i;
12848
12849 if (display_cfg->display_config.overrides.all_streams_blanked) {
12850 /* stream is blanked, so do nothing */
12851 return;
12852 }
12853
12854 /* from display configuration */
12855 base_programming->htotal = (uint16_t)stream_descriptor->timing.h_total;
12856 base_programming->vtotal = (uint16_t)stream_descriptor->timing.v_total;
12857 base_programming->vblank_start = (uint16_t)(stream_pstate_meta->nom_vtotal -
12858 stream_descriptor->timing.v_front_porch);
12859 base_programming->vblank_end = (uint16_t)(stream_pstate_meta->nom_vtotal -
12860 stream_descriptor->timing.v_front_porch -
12861 stream_descriptor->timing.v_active);
12862 base_programming->config.bits.is_drr = stream_descriptor->timing.drr_config.enabled;
12863
12864 /* from meta */
12865 base_programming->otg_vline_time_ns =
12866 (unsigned int)(stream_pstate_meta->otg_vline_time_us * 1000.0);
12867 base_programming->scheduling_delay_otg_vlines = (uint8_t)stream_pstate_meta->scheduling_delay_otg_vlines;
12868 base_programming->contention_delay_otg_vlines = (uint8_t)stream_pstate_meta->contention_delay_otg_vlines;
12869 base_programming->vline_int_ack_delay_otg_vlines = (uint8_t)stream_pstate_meta->vertical_interrupt_ack_delay_otg_vlines;
12870 base_programming->drr_keepout_otg_vline = (uint16_t)(stream_pstate_meta->nom_vtotal -
12871 stream_descriptor->timing.v_front_porch -
12872 stream_pstate_meta->method_drr.programming_delay_otg_vlines);
12873 base_programming->allow_to_target_delay_otg_vlines = (uint8_t)stream_pstate_meta->allow_to_target_delay_otg_vlines;
12874 base_programming->max_vtotal = (uint16_t)stream_pstate_meta->max_vtotal;
12875
12876 /* from core */
12877 base_programming->config.bits.min_ttu_vblank_usable = true;
12878 for (i = 0; i < display_cfg->display_config.num_planes; i++) {
12879 /* check if all planes support p-state in blank */
12880 if (display_cfg->display_config.plane_descriptors[i].stream_index == plane_descriptor->stream_index &&
12881 mode_lib->mp.MinTTUVBlank[i] <= mode_lib->mp.Watermark.DRAMClockChangeWatermark) {
12882 base_programming->config.bits.min_ttu_vblank_usable = false;
12883 break;
12884 }
12885 }
12886
12887 switch (pstate_method) {
12888 case dml2_pstate_method_vactive:
12889 case dml2_pstate_method_fw_vactive_drr:
12890 /* legacy vactive */
12891 base_programming->type = FAMS2_STREAM_TYPE_VACTIVE;
12892 sub_programming->legacy.vactive_det_fill_delay_otg_vlines =
12893 (uint8_t)stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines;
12894 base_programming->allow_start_otg_vline =
12895 (uint16_t)stream_pstate_meta->method_vactive.common.allow_start_otg_vline;
12896 base_programming->allow_end_otg_vline =
12897 (uint16_t)stream_pstate_meta->method_vactive.common.allow_end_otg_vline;
12898 base_programming->config.bits.clamp_vtotal_min = true;
12899 break;
12900 case dml2_pstate_method_vblank:
12901 case dml2_pstate_method_fw_vblank_drr:
12902 /* legacy vblank */
12903 base_programming->type = FAMS2_STREAM_TYPE_VBLANK;
12904 base_programming->allow_start_otg_vline =
12905 (uint16_t)stream_pstate_meta->method_vblank.common.allow_start_otg_vline;
12906 base_programming->allow_end_otg_vline =
12907 (uint16_t)stream_pstate_meta->method_vblank.common.allow_end_otg_vline;
12908 base_programming->config.bits.clamp_vtotal_min = true;
12909 break;
12910 case dml2_pstate_method_fw_drr:
12911 /* drr */
12912 base_programming->type = FAMS2_STREAM_TYPE_DRR;
12913 sub_programming->drr.programming_delay_otg_vlines =
12914 (uint8_t)stream_pstate_meta->method_drr.programming_delay_otg_vlines;
12915 sub_programming->drr.nom_stretched_vtotal =
12916 (uint16_t)stream_pstate_meta->method_drr.stretched_vtotal;
12917 base_programming->allow_start_otg_vline =
12918 (uint16_t)stream_pstate_meta->method_drr.common.allow_start_otg_vline;
12919 base_programming->allow_end_otg_vline =
12920 (uint16_t)stream_pstate_meta->method_drr.common.allow_end_otg_vline;
12921 /* drr only clamps to vtotal min for single display */
12922 base_programming->config.bits.clamp_vtotal_min = display_cfg->display_config.num_streams == 1;
12923 sub_programming->drr.only_stretch_if_required = true;
12924 break;
12925 case dml2_pstate_method_fw_svp:
12926 case dml2_pstate_method_fw_svp_drr:
12927 /* subvp */
12928 base_programming->type = FAMS2_STREAM_TYPE_SUBVP;
12929 sub_programming->subvp.vratio_numerator =
12930 (uint16_t)(plane_descriptor->composition.scaler_info.plane0.v_ratio * 1000.0);
12931 sub_programming->subvp.vratio_denominator = 1000;
12932 sub_programming->subvp.programming_delay_otg_vlines =
12933 (uint8_t)stream_pstate_meta->method_subvp.programming_delay_otg_vlines;
12934 sub_programming->subvp.prefetch_to_mall_otg_vlines =
12935 (uint8_t)stream_pstate_meta->method_subvp.prefetch_to_mall_delay_otg_vlines;
12936 sub_programming->subvp.phantom_vtotal =
12937 (uint16_t)stream_pstate_meta->method_subvp.phantom_vtotal;
12938 sub_programming->subvp.phantom_vactive =
12939 (uint16_t)stream_pstate_meta->method_subvp.phantom_vactive;
12940 sub_programming->subvp.config.bits.is_multi_planar =
12941 plane_descriptor->surface.plane1.height > 0;
12942 sub_programming->subvp.config.bits.is_yuv420 =
12943 plane_descriptor->pixel_format == dml2_420_8 ||
12944 plane_descriptor->pixel_format == dml2_420_10 ||
12945 plane_descriptor->pixel_format == dml2_420_12;
12946
12947 base_programming->allow_start_otg_vline =
12948 (uint16_t)stream_pstate_meta->method_subvp.common.allow_start_otg_vline;
12949 base_programming->allow_end_otg_vline =
12950 (uint16_t)stream_pstate_meta->method_subvp.common.allow_end_otg_vline;
12951 base_programming->config.bits.clamp_vtotal_min = true;
12952 break;
12953 case dml2_pstate_method_reserved_hw:
12954 case dml2_pstate_method_reserved_fw:
12955 case dml2_pstate_method_reserved_fw_drr_clamped:
12956 case dml2_pstate_method_reserved_fw_drr_var:
12957 case dml2_pstate_method_na:
12958 case dml2_pstate_method_count:
12959 default:
12960 /* this should never happen */
12961 break;
12962 }
12963 }
12964
dml2_core_calcs_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_mcache_surface_allocation * out,int plane_idx)12965 void dml2_core_calcs_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_mcache_surface_allocation *out, int plane_idx)
12966 {
12967 unsigned int n;
12968
12969 out->num_mcaches_plane0 = dml_get_plane_num_mcaches_plane0(mode_lib, plane_idx);
12970 out->num_mcaches_plane1 = dml_get_plane_num_mcaches_plane1(mode_lib, plane_idx);
12971 out->shift_granularity.p0 = dml_get_plane_mcache_shift_granularity_plane0(mode_lib, plane_idx);
12972 out->shift_granularity.p1 = dml_get_plane_mcache_shift_granularity_plane1(mode_lib, plane_idx);
12973
12974 for (n = 0; n < out->num_mcaches_plane0; n++)
12975 out->mcache_x_offsets_plane0[n] = dml_get_plane_array_mcache_offsets_plane0(mode_lib, plane_idx, n);
12976
12977 for (n = 0; n < out->num_mcaches_plane1; n++)
12978 out->mcache_x_offsets_plane1[n] = dml_get_plane_array_mcache_offsets_plane1(mode_lib, plane_idx, n);
12979
12980 out->last_slice_sharing.mall_comb_mcache_p0 = dml_get_plane_mall_comb_mcache_l(mode_lib, plane_idx);
12981 out->last_slice_sharing.mall_comb_mcache_p1 = dml_get_plane_mall_comb_mcache_c(mode_lib, plane_idx);
12982 out->last_slice_sharing.plane0_plane1 = dml_get_plane_lc_comb_mcache(mode_lib, plane_idx);
12983 out->informative.meta_row_bytes_plane0 = dml_get_plane_mcache_row_bytes_plane0(mode_lib, plane_idx);
12984 out->informative.meta_row_bytes_plane1 = dml_get_plane_mcache_row_bytes_plane1(mode_lib, plane_idx);
12985
12986 out->valid = true;
12987 }
12988
dml2_core_calcs_get_mall_allocation(struct dml2_core_internal_display_mode_lib * mode_lib,unsigned int * out,int pipe_index)12989 void dml2_core_calcs_get_mall_allocation(struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int *out, int pipe_index)
12990 {
12991 *out = dml_get_surface_size_in_mall_bytes(mode_lib, pipe_index);
12992 }
12993
dml2_core_calcs_get_plane_support_info(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct core_plane_support_info * out,int plane_idx)12994 void dml2_core_calcs_get_plane_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_plane_support_info *out, int plane_idx)
12995 {
12996 out->mall_svp_size_requirement_ways = 0;
12997
12998 out->nominal_vblank_pstate_latency_hiding_us =
12999 (int)(display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_idx].stream_index].timing.h_total /
13000 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_idx].stream_index].timing.pixel_clock_khz / 1000) * mode_lib->ms.TWait[plane_idx]);
13001
13002 out->dram_change_latency_hiding_margin_in_active = (int)mode_lib->ms.VActiveLatencyHidingMargin[plane_idx];
13003
13004 out->active_latency_hiding_us = (int)mode_lib->ms.VActiveLatencyHidingUs[plane_idx];
13005
13006 out->vactive_det_fill_delay_us[dml2_pstate_type_uclk] =
13007 (unsigned int)math_ceil(mode_lib->ms.pstate_vactive_det_fill_delay_us[dml2_pstate_type_uclk][plane_idx]);
13008 }
13009
dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct core_stream_support_info * out,int plane_index)13010 void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index)
13011 {
13012 double phantom_processing_delay_pix;
13013 unsigned int phantom_processing_delay_lines;
13014 unsigned int phantom_min_v_active_lines;
13015 unsigned int phantom_v_active_lines;
13016 unsigned int phantom_v_startup_lines;
13017 unsigned int phantom_v_blank_lines;
13018 unsigned int main_v_blank_lines;
13019 unsigned int rem;
13020
13021 phantom_processing_delay_pix = (double)((mode_lib->ip.subvp_fw_processing_delay_us + mode_lib->ip.subvp_pstate_allow_width_us) *
13022 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.pixel_clock_khz / 1000));
13023 phantom_processing_delay_lines = (unsigned int)(phantom_processing_delay_pix / (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total);
13024 dml2_core_div_rem(phantom_processing_delay_pix,
13025 display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total,
13026 &rem);
13027 if (rem)
13028 phantom_processing_delay_lines++;
13029
13030 phantom_v_startup_lines = dml_get_plane_max_vstartup_lines(mode_lib, plane_index);
13031 phantom_min_v_active_lines = (unsigned int)math_ceil((double)dml_get_plane_subviewport_lines_needed_in_mall(mode_lib, plane_index) /
13032 display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane0.v_ratio);
13033 phantom_v_active_lines = phantom_processing_delay_lines + phantom_min_v_active_lines + mode_lib->ip.subvp_swath_height_margin_lines;
13034
13035 // phantom_vblank = max(vbp(vstartup) + vactive + vfp(always 1) + vsync(can be 1), main_vblank)
13036 phantom_v_blank_lines = phantom_v_startup_lines + 1 + 1;
13037 main_v_blank_lines = display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.v_active;
13038 if (phantom_v_blank_lines > main_v_blank_lines)
13039 phantom_v_blank_lines = main_v_blank_lines;
13040
13041 out->phantom_v_active = phantom_v_active_lines;
13042 // phantom_vtotal = vactive + vblank
13043 out->phantom_v_total = phantom_v_active_lines + phantom_v_blank_lines;
13044
13045 out->phantom_min_v_active = phantom_min_v_active_lines;
13046 out->phantom_v_startup = phantom_v_startup_lines;
13047
13048 out->vblank_reserved_time_us = display_cfg->plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000;
13049 #if defined(__DML_VBA_DEBUG__)
13050 DML_LOG_VERBOSE("DML::%s: subvp_fw_processing_delay_us = %d\n", __func__, mode_lib->ip.subvp_fw_processing_delay_us);
13051 DML_LOG_VERBOSE("DML::%s: subvp_pstate_allow_width_us = %d\n", __func__, mode_lib->ip.subvp_pstate_allow_width_us);
13052 DML_LOG_VERBOSE("DML::%s: subvp_swath_height_margin_lines = %d\n", __func__, mode_lib->ip.subvp_swath_height_margin_lines);
13053 DML_LOG_VERBOSE("DML::%s: vblank_reserved_time_us = %u\n", __func__, out->vblank_reserved_time_us);
13054 #endif
13055 }
13056
dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_display_cfg_programming * out)13057 void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_cfg_programming *out)
13058 {
13059 unsigned int k, n;
13060
13061 out->informative.mode_support_info.ModeIsSupported = mode_lib->ms.support.ModeSupport;
13062 out->informative.mode_support_info.ImmediateFlipSupport = mode_lib->ms.support.ImmediateFlipSupport;
13063 out->informative.mode_support_info.WritebackLatencySupport = mode_lib->ms.support.WritebackLatencySupport;
13064 out->informative.mode_support_info.ScaleRatioAndTapsSupport = mode_lib->ms.support.ScaleRatioAndTapsSupport;
13065 out->informative.mode_support_info.SourceFormatPixelAndScanSupport = mode_lib->ms.support.SourceFormatPixelAndScanSupport;
13066 out->informative.mode_support_info.P2IWith420 = mode_lib->ms.support.P2IWith420;
13067 out->informative.mode_support_info.DSCOnlyIfNecessaryWithBPP = false;
13068 out->informative.mode_support_info.DSC422NativeNotSupported = mode_lib->ms.support.DSC422NativeNotSupported;
13069 out->informative.mode_support_info.LinkRateDoesNotMatchDPVersion = mode_lib->ms.support.LinkRateDoesNotMatchDPVersion;
13070 out->informative.mode_support_info.LinkRateForMultistreamNotIndicated = mode_lib->ms.support.LinkRateForMultistreamNotIndicated;
13071 out->informative.mode_support_info.BPPForMultistreamNotIndicated = mode_lib->ms.support.BPPForMultistreamNotIndicated;
13072 out->informative.mode_support_info.MultistreamWithHDMIOreDP = mode_lib->ms.support.MultistreamWithHDMIOreDP;
13073 out->informative.mode_support_info.MSOOrODMSplitWithNonDPLink = mode_lib->ms.support.MSOOrODMSplitWithNonDPLink;
13074 out->informative.mode_support_info.NotEnoughLanesForMSO = mode_lib->ms.support.NotEnoughLanesForMSO;
13075 out->informative.mode_support_info.NumberOfOTGSupport = mode_lib->ms.support.NumberOfOTGSupport;
13076 out->informative.mode_support_info.NumberOfHDMIFRLSupport = mode_lib->ms.support.NumberOfHDMIFRLSupport;
13077 out->informative.mode_support_info.NumberOfDP2p0Support = mode_lib->ms.support.NumberOfDP2p0Support;
13078 out->informative.mode_support_info.WritebackScaleRatioAndTapsSupport = mode_lib->ms.support.WritebackScaleRatioAndTapsSupport;
13079 out->informative.mode_support_info.CursorSupport = mode_lib->ms.support.CursorSupport;
13080 out->informative.mode_support_info.PitchSupport = mode_lib->ms.support.PitchSupport;
13081 out->informative.mode_support_info.ViewportExceedsSurface = mode_lib->ms.support.ViewportExceedsSurface;
13082 out->informative.mode_support_info.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = false;
13083 out->informative.mode_support_info.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe;
13084 out->informative.mode_support_info.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen;
13085 out->informative.mode_support_info.InvalidCombinationOfMALLUseForPState = mode_lib->ms.support.InvalidCombinationOfMALLUseForPState;
13086 out->informative.mode_support_info.ExceededMALLSize = mode_lib->ms.support.ExceededMALLSize;
13087 out->informative.mode_support_info.EnoughWritebackUnits = mode_lib->ms.support.EnoughWritebackUnits;
13088 out->informative.mode_support_info.temp_read_or_ppt_support = mode_lib->ms.support.global_temp_read_or_ppt_supported;
13089 out->informative.mode_support_info.g6_temp_read_support = mode_lib->ms.support.g6_temp_read_support;
13090
13091 out->informative.mode_support_info.ExceededMultistreamSlots = mode_lib->ms.support.ExceededMultistreamSlots;
13092 out->informative.mode_support_info.NotEnoughDSCUnits = mode_lib->ms.support.NotEnoughDSCUnits;
13093 out->informative.mode_support_info.NotEnoughDSCSlices = mode_lib->ms.support.NotEnoughDSCSlices;
13094 out->informative.mode_support_info.PixelsPerLinePerDSCUnitSupport = mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport;
13095 out->informative.mode_support_info.DSCCLKRequiredMoreThanSupported = mode_lib->ms.support.DSCCLKRequiredMoreThanSupported;
13096 out->informative.mode_support_info.DTBCLKRequiredMoreThanSupported = mode_lib->ms.support.DTBCLKRequiredMoreThanSupported;
13097 out->informative.mode_support_info.LinkCapacitySupport = mode_lib->ms.support.LinkCapacitySupport;
13098
13099 out->informative.mode_support_info.ROBSupport = mode_lib->ms.support.ROBSupport;
13100 out->informative.mode_support_info.OutstandingRequestsSupport = mode_lib->ms.support.OutstandingRequestsSupport;
13101 out->informative.mode_support_info.OutstandingRequestsUrgencyAvoidance = mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance;
13102 out->informative.mode_support_info.PTEBufferSizeNotExceeded = mode_lib->ms.support.PTEBufferSizeNotExceeded;
13103 out->informative.mode_support_info.DCCMetaBufferSizeNotExceeded = mode_lib->ms.support.DCCMetaBufferSizeNotExceeded;
13104
13105 out->informative.mode_support_info.TotalVerticalActiveBandwidthSupport = mode_lib->ms.support.AvgBandwidthSupport;
13106 out->informative.mode_support_info.VActiveBandwidthSupport = mode_lib->ms.support.UrgVactiveBandwidthSupport;
13107 out->informative.mode_support_info.USRRetrainingSupport = mode_lib->ms.support.USRRetrainingSupport;
13108
13109 out->informative.mode_support_info.PrefetchSupported = mode_lib->ms.support.PrefetchSupported;
13110 out->informative.mode_support_info.DynamicMetadataSupported = mode_lib->ms.support.DynamicMetadataSupported;
13111 out->informative.mode_support_info.VRatioInPrefetchSupported = mode_lib->ms.support.VRatioInPrefetchSupported;
13112 out->informative.mode_support_info.DISPCLK_DPPCLK_Support = mode_lib->ms.support.DISPCLK_DPPCLK_Support;
13113 out->informative.mode_support_info.TotalAvailablePipesSupport = mode_lib->ms.support.TotalAvailablePipesSupport;
13114 out->informative.mode_support_info.NumberOfTDLUT33cubeSupport = mode_lib->ms.support.NumberOfTDLUT33cubeSupport;
13115 out->informative.mode_support_info.ViewportSizeSupport = mode_lib->ms.support.ViewportSizeSupport;
13116 out->informative.mode_support_info.qos_bandwidth_support = mode_lib->ms.support.qos_bandwidth_support;
13117 out->informative.mode_support_info.dcfclk_support = mode_lib->ms.support.dcfclk_support;
13118
13119 for (k = 0; k < out->display_config.num_planes; k++) {
13120
13121 out->informative.mode_support_info.FCLKChangeSupport[k] = mode_lib->ms.support.FCLKChangeSupport[k];
13122 out->informative.mode_support_info.MPCCombineEnable[k] = mode_lib->ms.support.MPCCombineEnable[k];
13123 out->informative.mode_support_info.ODMMode[k] = mode_lib->ms.support.ODMMode[k];
13124 out->informative.mode_support_info.DPPPerSurface[k] = mode_lib->ms.support.DPPPerSurface[k];
13125 out->informative.mode_support_info.DSCEnabled[k] = mode_lib->ms.support.DSCEnabled[k];
13126 out->informative.mode_support_info.FECEnabled[k] = mode_lib->ms.support.FECEnabled[k];
13127 out->informative.mode_support_info.NumberOfDSCSlices[k] = mode_lib->ms.support.NumberOfDSCSlices[k];
13128 out->informative.mode_support_info.OutputBpp[k] = mode_lib->ms.support.OutputBpp[k];
13129
13130 if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_unknown)
13131 out->informative.mode_support_info.OutputType[k] = dml2_output_type_unknown;
13132 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_dp)
13133 out->informative.mode_support_info.OutputType[k] = dml2_output_type_dp;
13134 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_edp)
13135 out->informative.mode_support_info.OutputType[k] = dml2_output_type_edp;
13136 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_dp2p0)
13137 out->informative.mode_support_info.OutputType[k] = dml2_output_type_dp2p0;
13138 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_hdmi)
13139 out->informative.mode_support_info.OutputType[k] = dml2_output_type_hdmi;
13140 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_hdmifrl)
13141 out->informative.mode_support_info.OutputType[k] = dml2_output_type_hdmifrl;
13142
13143 if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_unknown)
13144 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_unknown;
13145 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr)
13146 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr;
13147 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr2)
13148 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr2;
13149 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr3)
13150 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr3;
13151 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr10)
13152 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr10;
13153 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr13p5)
13154 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr13p5;
13155 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr20)
13156 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr20;
13157 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_3x3)
13158 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_3x3;
13159 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_6x3)
13160 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_6x3;
13161 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_6x4)
13162 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_6x4;
13163 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_8x4)
13164 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_8x4;
13165 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_10x4)
13166 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_10x4;
13167 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_12x4)
13168 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_12x4;
13169 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_16x4)
13170 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_16x4;
13171 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_20x4)
13172 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_20x4;
13173
13174 out->informative.mode_support_info.AlignedYPitch[k] = mode_lib->ms.support.AlignedYPitch[k];
13175 out->informative.mode_support_info.AlignedCPitch[k] = mode_lib->ms.support.AlignedCPitch[k];
13176 }
13177
13178 out->informative.watermarks.urgent_us = dml_get_wm_urgent(mode_lib);
13179 out->informative.watermarks.writeback_urgent_us = dml_get_wm_writeback_urgent(mode_lib);
13180 out->informative.watermarks.writeback_pstate_us = dml_get_wm_writeback_dram_clock_change(mode_lib);
13181 out->informative.watermarks.writeback_fclk_pstate_us = dml_get_wm_writeback_fclk_change(mode_lib);
13182
13183 out->informative.watermarks.cstate_exit_us = dml_get_wm_stutter_exit(mode_lib);
13184 out->informative.watermarks.cstate_enter_plus_exit_us = dml_get_wm_stutter_enter_exit(mode_lib);
13185 out->informative.watermarks.z8_cstate_exit_us = dml_get_wm_z8_stutter_exit(mode_lib);
13186 out->informative.watermarks.z8_cstate_enter_plus_exit_us = dml_get_wm_z8_stutter_enter_exit(mode_lib);
13187 out->informative.watermarks.pstate_change_us = dml_get_wm_dram_clock_change(mode_lib);
13188 out->informative.watermarks.fclk_pstate_change_us = dml_get_wm_fclk_change(mode_lib);
13189 out->informative.watermarks.usr_retraining_us = dml_get_wm_usr_retraining(mode_lib);
13190 out->informative.watermarks.temp_read_or_ppt_watermark_us = dml_get_wm_temp_read_or_ppt(mode_lib);
13191
13192 out->informative.mall.total_surface_size_in_mall_bytes = 0;
13193 out->informative.dpp.total_num_dpps_required = 0;
13194 for (k = 0; k < out->display_config.num_planes; ++k) {
13195 out->informative.mall.total_surface_size_in_mall_bytes += mode_lib->mp.SurfaceSizeInTheMALL[k];
13196 out->informative.dpp.total_num_dpps_required += mode_lib->mp.NoOfDPP[k];
13197 }
13198
13199 out->informative.qos.min_return_latency_in_dcfclk = mode_lib->mp.min_return_latency_in_dcfclk;
13200 out->informative.qos.urgent_latency_us = dml_get_urgent_latency(mode_lib);
13201
13202 out->informative.qos.max_urgent_latency_us = dml_get_max_urgent_latency_us(mode_lib);
13203 out->informative.qos.avg_non_urgent_latency_us = dml_get_avg_non_urgent_latency_us(mode_lib);
13204 out->informative.qos.avg_urgent_latency_us = dml_get_avg_urgent_latency_us(mode_lib);
13205
13206 out->informative.qos.wm_memory_trip_us = dml_get_wm_memory_trip(mode_lib);
13207 out->informative.qos.meta_trip_memory_us = dml_get_meta_trip_memory_us(mode_lib);
13208 out->informative.qos.fraction_of_urgent_bandwidth = dml_get_fraction_of_urgent_bandwidth(mode_lib);
13209 out->informative.qos.fraction_of_urgent_bandwidth_immediate_flip = dml_get_fraction_of_urgent_bandwidth_imm_flip(mode_lib);
13210 out->informative.qos.fraction_of_urgent_bandwidth_mall = dml_get_fraction_of_urgent_bandwidth_mall(mode_lib);
13211
13212 out->informative.qos.avg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_avg_bw_required_sdp(mode_lib);
13213 out->informative.qos.avg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_avg_bw_required_dram(mode_lib);
13214 out->informative.qos.avg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_avg_bw_required_sdp(mode_lib);
13215 out->informative.qos.avg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_avg_bw_required_dram(mode_lib);
13216
13217 out->informative.qos.avg_bw_available.sys_active.sdp_bw_mbps = dml_get_sys_active_avg_bw_available_sdp(mode_lib);
13218 out->informative.qos.avg_bw_available.sys_active.dram_bw_mbps = dml_get_sys_active_avg_bw_available_dram(mode_lib);
13219 out->informative.qos.avg_bw_available.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_avg_bw_available_sdp(mode_lib);
13220 out->informative.qos.avg_bw_available.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_avg_bw_available_dram(mode_lib);
13221
13222 out->informative.qos.urg_bw_available.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_available_sdp(mode_lib);
13223 out->informative.qos.urg_bw_available.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_available_dram(mode_lib);
13224 out->informative.qos.urg_bw_available.sys_active.dram_vm_only_bw_mbps = dml_get_sys_active_urg_bw_available_dram_vm_only(mode_lib);
13225
13226 out->informative.qos.urg_bw_available.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_available_sdp(mode_lib);
13227 out->informative.qos.urg_bw_available.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_available_dram(mode_lib);
13228 out->informative.qos.urg_bw_available.svp_prefetch.dram_vm_only_bw_mbps = dml_get_svp_prefetch_urg_bw_available_dram_vm_only(mode_lib);
13229
13230 out->informative.qos.urg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_required_sdp(mode_lib);
13231 out->informative.qos.urg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_required_dram(mode_lib);
13232 out->informative.qos.urg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_required_sdp(mode_lib);
13233 out->informative.qos.urg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_required_dram(mode_lib);
13234
13235 out->informative.qos.non_urg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_non_urg_required_sdp(mode_lib);
13236 out->informative.qos.non_urg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_non_urg_required_dram(mode_lib);
13237 out->informative.qos.non_urg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_sdp(mode_lib);
13238 out->informative.qos.non_urg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_dram(mode_lib);
13239
13240 out->informative.qos.urg_bw_required_with_flip.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_required_sdp_flip(mode_lib);
13241 out->informative.qos.urg_bw_required_with_flip.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_required_dram_flip(mode_lib);
13242 out->informative.qos.urg_bw_required_with_flip.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_required_sdp_flip(mode_lib);
13243 out->informative.qos.urg_bw_required_with_flip.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_required_dram_flip(mode_lib);
13244
13245 out->informative.qos.non_urg_bw_required_with_flip.sys_active.sdp_bw_mbps = dml_get_sys_active_non_urg_required_sdp_flip(mode_lib);
13246 out->informative.qos.non_urg_bw_required_with_flip.sys_active.dram_bw_mbps = dml_get_sys_active_non_urg_required_dram_flip(mode_lib);
13247 out->informative.qos.non_urg_bw_required_with_flip.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_sdp_flip(mode_lib);
13248 out->informative.qos.non_urg_bw_required_with_flip.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_dram_flip(mode_lib);
13249
13250 out->informative.crb.comp_buffer_size_kbytes = dml_get_comp_buffer_size_kbytes(mode_lib);
13251 out->informative.crb.UnboundedRequestEnabled = dml_get_unbounded_request_enabled(mode_lib);
13252
13253 out->informative.crb.compbuf_reserved_space_64b = dml_get_compbuf_reserved_space_64b(mode_lib);
13254 out->informative.misc.hw_debug5 = dml_get_hw_debug5(mode_lib);
13255 out->informative.misc.dcfclk_deep_sleep_hysteresis = dml_get_dcfclk_deep_sleep_hysteresis(mode_lib);
13256
13257 out->informative.power_management.stutter_efficiency = dml_get_stutter_efficiency_no_vblank(mode_lib);
13258 out->informative.power_management.stutter_efficiency_with_vblank = dml_get_stutter_efficiency(mode_lib);
13259 out->informative.power_management.stutter_num_bursts = dml_get_stutter_num_bursts(mode_lib);
13260
13261 out->informative.power_management.z8.stutter_efficiency = dml_get_stutter_efficiency_no_vblank_z8(mode_lib);
13262 out->informative.power_management.z8.stutter_efficiency_with_vblank = dml_get_stutter_efficiency_z8(mode_lib);
13263 out->informative.power_management.z8.stutter_num_bursts = dml_get_stutter_num_bursts_z8(mode_lib);
13264 out->informative.power_management.z8.stutter_period = dml_get_stutter_period(mode_lib);
13265
13266 out->informative.power_management.z8.bestcase.stutter_efficiency = dml_get_stutter_efficiency_z8_bestcase(mode_lib);
13267 out->informative.power_management.z8.bestcase.stutter_num_bursts = dml_get_stutter_num_bursts_z8_bestcase(mode_lib);
13268 out->informative.power_management.z8.bestcase.stutter_period = dml_get_stutter_period_bestcase(mode_lib);
13269
13270 out->informative.misc.cstate_max_cap_mode = dml_get_cstate_max_cap_mode(mode_lib);
13271
13272 out->min_clocks.dcn4x.dpprefclk_khz = (unsigned int)dml_get_global_dppclk_khz(mode_lib);
13273
13274 out->informative.qos.max_active_fclk_change_latency_supported = dml_get_fclk_change_latency(mode_lib);
13275
13276 out->informative.misc.LowestPrefetchMargin = 10 * 1000 * 1000;
13277
13278 for (k = 0; k < out->display_config.num_planes; k++) {
13279
13280 if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us)
13281 && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.fclk_change_blackout_us)
13282 && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us))
13283 out->informative.misc.PrefetchMode[k] = 0;
13284 else if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.fclk_change_blackout_us)
13285 && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us))
13286 out->informative.misc.PrefetchMode[k] = 1;
13287 else if (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us)
13288 out->informative.misc.PrefetchMode[k] = 2;
13289 else
13290 out->informative.misc.PrefetchMode[k] = 3;
13291
13292 out->informative.misc.min_ttu_vblank_us[k] = mode_lib->mp.MinTTUVBlank[k];
13293 out->informative.mall.subviewport_lines_needed_in_mall[k] = mode_lib->mp.SubViewportLinesNeededInMALL[k];
13294 out->informative.crb.det_size_in_kbytes[k] = mode_lib->mp.DETBufferSizeInKByte[k];
13295 out->informative.crb.DETBufferSizeY[k] = mode_lib->mp.DETBufferSizeY[k];
13296 out->informative.misc.ImmediateFlipSupportedForPipe[k] = mode_lib->mp.ImmediateFlipSupportedForPipe[k];
13297 out->informative.misc.UsesMALLForStaticScreen[k] = mode_lib->mp.is_using_mall_for_ss[k];
13298 out->informative.plane_info[k].dpte_row_height_plane0 = mode_lib->mp.dpte_row_height[k];
13299 out->informative.plane_info[k].dpte_row_height_plane1 = mode_lib->mp.dpte_row_height_chroma[k];
13300 out->informative.plane_info[k].meta_row_height_plane0 = mode_lib->mp.meta_row_height[k];
13301 out->informative.plane_info[k].meta_row_height_plane1 = mode_lib->mp.meta_row_height_chroma[k];
13302 out->informative.dcc_control[k].max_uncompressed_block_plane0 = mode_lib->mp.DCCYMaxUncompressedBlock[k];
13303 out->informative.dcc_control[k].max_compressed_block_plane0 = mode_lib->mp.DCCYMaxCompressedBlock[k];
13304 out->informative.dcc_control[k].independent_block_plane0 = mode_lib->mp.DCCYIndependentBlock[k];
13305 out->informative.dcc_control[k].max_uncompressed_block_plane1 = mode_lib->mp.DCCCMaxUncompressedBlock[k];
13306 out->informative.dcc_control[k].max_compressed_block_plane1 = mode_lib->mp.DCCCMaxCompressedBlock[k];
13307 out->informative.dcc_control[k].independent_block_plane1 = mode_lib->mp.DCCCIndependentBlock[k];
13308 out->informative.misc.dst_x_after_scaler[k] = mode_lib->mp.DSTXAfterScaler[k];
13309 out->informative.misc.dst_y_after_scaler[k] = mode_lib->mp.DSTYAfterScaler[k];
13310 out->informative.misc.prefetch_source_lines_plane0[k] = mode_lib->mp.PrefetchSourceLinesY[k];
13311 out->informative.misc.prefetch_source_lines_plane1[k] = mode_lib->mp.PrefetchSourceLinesC[k];
13312 out->informative.misc.vready_at_or_after_vsync[k] = mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k];
13313 out->informative.misc.min_dst_y_next_start[k] = mode_lib->mp.MIN_DST_Y_NEXT_START[k];
13314 out->informative.plane_info[k].swath_width_plane0 = mode_lib->mp.SwathWidthY[k];
13315 out->informative.plane_info[k].swath_height_plane0 = mode_lib->mp.SwathHeightY[k];
13316 out->informative.plane_info[k].swath_height_plane1 = mode_lib->mp.SwathHeightC[k];
13317 out->informative.misc.CursorDstXOffset[k] = mode_lib->mp.CursorDstXOffset[k];
13318 out->informative.misc.CursorDstYOffset[k] = mode_lib->mp.CursorDstYOffset[k];
13319 out->informative.misc.CursorChunkHDLAdjust[k] = mode_lib->mp.CursorChunkHDLAdjust[k];
13320 out->informative.misc.dpte_group_bytes[k] = mode_lib->mp.dpte_group_bytes[k];
13321 out->informative.misc.vm_group_bytes[k] = mode_lib->mp.vm_group_bytes[k];
13322 out->informative.misc.DisplayPipeRequestDeliveryTimeLuma[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[k];
13323 out->informative.misc.DisplayPipeRequestDeliveryTimeChroma[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[k];
13324 out->informative.misc.DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[k];
13325 out->informative.misc.DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[k];
13326 out->informative.misc.TimePerVMGroupVBlank[k] = mode_lib->mp.TimePerVMGroupVBlank[k];
13327 out->informative.misc.TimePerVMGroupFlip[k] = mode_lib->mp.TimePerVMGroupFlip[k];
13328 out->informative.misc.TimePerVMRequestVBlank[k] = mode_lib->mp.TimePerVMRequestVBlank[k];
13329 out->informative.misc.TimePerVMRequestFlip[k] = mode_lib->mp.TimePerVMRequestFlip[k];
13330 out->informative.misc.Tdmdl_vm[k] = mode_lib->mp.Tdmdl_vm[k];
13331 out->informative.misc.Tdmdl[k] = mode_lib->mp.Tdmdl[k];
13332 out->informative.misc.VStartup[k] = mode_lib->mp.VStartup[k];
13333 out->informative.misc.VUpdateOffsetPix[k] = mode_lib->mp.VUpdateOffsetPix[k];
13334 out->informative.misc.VUpdateWidthPix[k] = mode_lib->mp.VUpdateWidthPix[k];
13335 out->informative.misc.VReadyOffsetPix[k] = mode_lib->mp.VReadyOffsetPix[k];
13336
13337 out->informative.misc.DST_Y_PER_PTE_ROW_NOM_L[k] = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L[k];
13338 out->informative.misc.DST_Y_PER_PTE_ROW_NOM_C[k] = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C[k];
13339 out->informative.misc.time_per_pte_group_nom_luma[k] = mode_lib->mp.time_per_pte_group_nom_luma[k];
13340 out->informative.misc.time_per_pte_group_nom_chroma[k] = mode_lib->mp.time_per_pte_group_nom_chroma[k];
13341 out->informative.misc.time_per_pte_group_vblank_luma[k] = mode_lib->mp.time_per_pte_group_vblank_luma[k];
13342 out->informative.misc.time_per_pte_group_vblank_chroma[k] = mode_lib->mp.time_per_pte_group_vblank_chroma[k];
13343 out->informative.misc.time_per_pte_group_flip_luma[k] = mode_lib->mp.time_per_pte_group_flip_luma[k];
13344 out->informative.misc.time_per_pte_group_flip_chroma[k] = mode_lib->mp.time_per_pte_group_flip_chroma[k];
13345 out->informative.misc.VRatioPrefetchY[k] = mode_lib->mp.VRatioPrefetchY[k];
13346 out->informative.misc.VRatioPrefetchC[k] = mode_lib->mp.VRatioPrefetchC[k];
13347 out->informative.misc.DestinationLinesForPrefetch[k] = mode_lib->mp.dst_y_prefetch[k];
13348 out->informative.misc.DestinationLinesToRequestVMInVBlank[k] = mode_lib->mp.dst_y_per_vm_vblank[k];
13349 out->informative.misc.DestinationLinesToRequestRowInVBlank[k] = mode_lib->mp.dst_y_per_row_vblank[k];
13350 out->informative.misc.DestinationLinesToRequestVMInImmediateFlip[k] = mode_lib->mp.dst_y_per_vm_flip[k];
13351 out->informative.misc.DestinationLinesToRequestRowInImmediateFlip[k] = mode_lib->mp.dst_y_per_row_flip[k];
13352 out->informative.misc.DisplayPipeLineDeliveryTimeLuma[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[k];
13353 out->informative.misc.DisplayPipeLineDeliveryTimeChroma[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[k];
13354 out->informative.misc.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[k];
13355 out->informative.misc.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[k];
13356
13357 out->informative.misc.WritebackRequiredBandwidth = mode_lib->mp.TotalWRBandwidth / 1000.0;
13358 out->informative.misc.WritebackAllowDRAMClockChangeEndPosition[k] = mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k];
13359 out->informative.misc.WritebackAllowFCLKChangeEndPosition[k] = mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k];
13360 out->informative.misc.DSCCLK_calculated[k] = mode_lib->mp.DSCCLK[k];
13361 out->informative.misc.BIGK_FRAGMENT_SIZE[k] = mode_lib->mp.BIGK_FRAGMENT_SIZE[k];
13362 out->informative.misc.PTE_BUFFER_MODE[k] = mode_lib->mp.PTE_BUFFER_MODE[k];
13363 out->informative.misc.DSCDelay[k] = mode_lib->mp.DSCDelay[k];
13364 out->informative.misc.MaxActiveDRAMClockChangeLatencySupported[k] = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported[k];
13365
13366 if (mode_lib->mp.impacted_prefetch_margin_us[k] < out->informative.misc.LowestPrefetchMargin)
13367 out->informative.misc.LowestPrefetchMargin = mode_lib->mp.impacted_prefetch_margin_us[k];
13368 }
13369
13370 // For this DV informative layer, all pipes in the same planes will just use the same id
13371 // will have the optimization and helper layer later on
13372 // only work when we can have high "mcache" that fit everything without thrashing the cache
13373 for (k = 0; k < out->display_config.num_planes; k++) {
13374 out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane0 = dml_get_plane_num_mcaches_plane0(mode_lib, k);
13375 out->informative.non_optimized_mcache_allocation[k].informative.meta_row_bytes_plane0 = dml_get_plane_mcache_row_bytes_plane0(mode_lib, k);
13376
13377 for (n = 0; n < out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane0; n++) {
13378 out->informative.non_optimized_mcache_allocation[k].mcache_x_offsets_plane0[n] = dml_get_plane_array_mcache_offsets_plane0(mode_lib, k, n);
13379 out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane0[n] = k;
13380 }
13381
13382 out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane1 = dml_get_plane_num_mcaches_plane1(mode_lib, k);
13383 out->informative.non_optimized_mcache_allocation[k].informative.meta_row_bytes_plane1 = dml_get_plane_mcache_row_bytes_plane1(mode_lib, k);
13384
13385 for (n = 0; n < out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane1; n++) {
13386 out->informative.non_optimized_mcache_allocation[k].mcache_x_offsets_plane1[n] = dml_get_plane_array_mcache_offsets_plane1(mode_lib, k, n);
13387 out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane1[n] = k;
13388 }
13389 }
13390 out->informative.qos.max_non_urgent_latency_us = dml_get_max_non_urgent_latency_us(mode_lib);
13391
13392 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) {
13393 if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024
13394 / mode_lib->ms.support.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= out->informative.qos.max_non_urgent_latency_us) {
13395 out->informative.misc.ROBUrgencyAvoidance = true;
13396 } else {
13397 out->informative.misc.ROBUrgencyAvoidance = false;
13398 }
13399 } else {
13400 out->informative.misc.ROBUrgencyAvoidance = true;
13401 }
13402 }
13403