1 // SPDX-License-Identifier: MIT 2 // 3 // Copyright 2024 Advanced Micro Devices, Inc. 4 5 6 #include "dml2_internal_shared_types.h" 7 #include "dml2_core_dcn4_calcs.h" 8 #include "dml2_debug.h" 9 #include "lib_float_math.h" 10 #include "lib_frl_cap_check.h" 11 #include "dml_top_types.h" 12 13 #define DML2_MAX_FMT_420_BUFFER_WIDTH 4096 14 #define DML_MAX_NUM_OF_SLICES_PER_DSC 4 15 #define DML_MAX_COMPRESSION_RATIO 4 16 //#define DML_MODE_SUPPORT_USE_DPM_DRAM_BW 17 //#define DML_GLOBAL_PREFETCH_CHECK 18 #define ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE 19 #define DML_MAX_VSTARTUP_START 1023 20 21 const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type) 22 { 23 switch (bw_type) { 24 case (dml2_core_internal_bw_sdp): 25 return("dml2_core_internal_bw_sdp"); 26 case (dml2_core_internal_bw_dram): 27 return("dml2_core_internal_bw_dram"); 28 case (dml2_core_internal_bw_max): 29 return("dml2_core_internal_bw_max"); 30 default: 31 return("dml2_core_internal_bw_unknown"); 32 } 33 } 34 35 const char *dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type) 36 { 37 switch (dml2_core_internal_soc_state_type) { 38 case (dml2_core_internal_soc_state_sys_idle): 39 return("dml2_core_internal_soc_state_sys_idle"); 40 case (dml2_core_internal_soc_state_sys_active): 41 return("dml2_core_internal_soc_state_sys_active"); 42 case (dml2_core_internal_soc_state_svp_prefetch): 43 return("dml2_core_internal_soc_state_svp_prefetch"); 44 case dml2_core_internal_soc_state_max: 45 default: 46 return("dml2_core_internal_soc_state_unknown"); 47 } 48 } 49 50 static double dml2_core_div_rem(double dividend, unsigned int divisor, unsigned int *remainder) 51 { 52 *remainder = ((dividend / divisor) - (int)(dividend / divisor) > 0); 53 return dividend / divisor; 54 } 55 56 static void dml2_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only) 57 { 58 DML_LOG_VERBOSE("DML: ===================================== \n"); 59 DML_LOG_VERBOSE("DML: DML_MODE_SUPPORT_INFO_ST\n"); 60 if (!fail_only || support->ScaleRatioAndTapsSupport == 0) 61 DML_LOG_VERBOSE("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport); 62 if (!fail_only || support->SourceFormatPixelAndScanSupport == 0) 63 DML_LOG_VERBOSE("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport); 64 if (!fail_only || support->ViewportSizeSupport == 0) 65 DML_LOG_VERBOSE("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); 66 if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1) 67 DML_LOG_VERBOSE("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion); 68 if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1) 69 DML_LOG_VERBOSE("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated); 70 if (!fail_only || support->BPPForMultistreamNotIndicated == 1) 71 DML_LOG_VERBOSE("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated); 72 if (!fail_only || support->MultistreamWithHDMIOreDP == 1) 73 DML_LOG_VERBOSE("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP); 74 if (!fail_only || support->ExceededMultistreamSlots == 1) 75 DML_LOG_VERBOSE("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); 76 if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1) 77 DML_LOG_VERBOSE("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink); 78 if (!fail_only || support->NotEnoughLanesForMSO == 1) 79 DML_LOG_VERBOSE("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO); 80 if (!fail_only || support->P2IWith420 == 1) 81 DML_LOG_VERBOSE("DML: support: P2IWith420 = %d\n", support->P2IWith420); 82 if (!fail_only || support->DSC422NativeNotSupported == 1) 83 DML_LOG_VERBOSE("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); 84 if (!fail_only || support->DSCSlicesODMModeSupported == 0) 85 DML_LOG_VERBOSE("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); 86 if (!fail_only || support->NotEnoughDSCUnits == 1) 87 DML_LOG_VERBOSE("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits); 88 if (!fail_only || support->NotEnoughDSCSlices == 1) 89 DML_LOG_VERBOSE("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices); 90 if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) 91 DML_LOG_VERBOSE("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); 92 if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) 93 DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); 94 if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1) 95 DML_LOG_VERBOSE("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported); 96 if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) 97 DML_LOG_VERBOSE("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); 98 if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1) 99 DML_LOG_VERBOSE("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported); 100 if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) 101 DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); 102 if (!fail_only || support->ROBSupport == 0) 103 DML_LOG_VERBOSE("DML: support: ROBSupport = %d\n", support->ROBSupport); 104 if (!fail_only || support->OutstandingRequestsSupport == 0) 105 DML_LOG_VERBOSE("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport); 106 if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0) 107 DML_LOG_VERBOSE("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance); 108 if (!fail_only || support->DISPCLK_DPPCLK_Support == 0) 109 DML_LOG_VERBOSE("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); 110 if (!fail_only || support->TotalAvailablePipesSupport == 0) 111 DML_LOG_VERBOSE("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); 112 if (!fail_only || support->NumberOfOTGSupport == 0) 113 DML_LOG_VERBOSE("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); 114 if (!fail_only || support->NumberOfHDMIFRLSupport == 0) 115 DML_LOG_VERBOSE("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); 116 if (!fail_only || support->NumberOfDP2p0Support == 0) 117 DML_LOG_VERBOSE("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); 118 if (!fail_only || support->EnoughWritebackUnits == 0) 119 DML_LOG_VERBOSE("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); 120 if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) 121 DML_LOG_VERBOSE("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); 122 if (!fail_only || support->WritebackLatencySupport == 0) 123 DML_LOG_VERBOSE("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); 124 if (!fail_only || support->CursorSupport == 0) 125 DML_LOG_VERBOSE("DML: support: CursorSupport = %d\n", support->CursorSupport); 126 if (!fail_only || support->PitchSupport == 0) 127 DML_LOG_VERBOSE("DML: support: PitchSupport = %d\n", support->PitchSupport); 128 if (!fail_only || support->ViewportExceedsSurface == 1) 129 DML_LOG_VERBOSE("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); 130 if (!fail_only || support->PrefetchSupported == 0) 131 DML_LOG_VERBOSE("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); 132 if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0) 133 DML_LOG_VERBOSE("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); 134 if (!fail_only || support->AvgBandwidthSupport == 0) 135 DML_LOG_VERBOSE("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); 136 if (!fail_only || support->DynamicMetadataSupported == 0) 137 DML_LOG_VERBOSE("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); 138 if (!fail_only || support->VRatioInPrefetchSupported == 0) 139 DML_LOG_VERBOSE("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); 140 if (!fail_only || support->PTEBufferSizeNotExceeded == 0) 141 DML_LOG_VERBOSE("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); 142 if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 0) 143 DML_LOG_VERBOSE("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); 144 if (!fail_only || support->ExceededMALLSize == 1) 145 DML_LOG_VERBOSE("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); 146 if (!fail_only || support->g6_temp_read_support == 0) 147 DML_LOG_VERBOSE("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support); 148 if (!fail_only || support->ImmediateFlipSupport == 0) 149 DML_LOG_VERBOSE("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); 150 if (!fail_only || support->LinkCapacitySupport == 0) 151 DML_LOG_VERBOSE("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); 152 153 if (!fail_only || support->ModeSupport == 0) 154 DML_LOG_VERBOSE("DML: support: ModeSupport = %d\n", support->ModeSupport); 155 DML_LOG_VERBOSE("DML: ===================================== \n"); 156 } 157 158 static void get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg) 159 { 160 for (unsigned int k = 0; k < display_cfg->num_planes; k++) { 161 double bpc = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.bpc; 162 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_disable) { 163 switch (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format) { 164 case dml2_444: 165 out_bpp[k] = bpc * 3; 166 break; 167 case dml2_s422: 168 out_bpp[k] = bpc * 2; 169 break; 170 case dml2_n422: 171 out_bpp[k] = bpc * 2; 172 break; 173 case dml2_420: 174 default: 175 out_bpp[k] = bpc * 1.5; 176 break; 177 } 178 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable) { 179 out_bpp[k] = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.dsc_compressed_bpp_x16 / 16; 180 } else { 181 out_bpp[k] = 0; 182 } 183 DML_LOG_VERBOSE("DML::%s: k=%d bpc=%f\n", __func__, k, bpc); 184 DML_LOG_VERBOSE("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable); 185 DML_LOG_VERBOSE("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]); 186 } 187 } 188 189 static unsigned int dml_round_to_multiple(unsigned int num, unsigned int multiple, bool up) 190 { 191 unsigned int remainder; 192 193 if (multiple == 0) 194 return num; 195 196 remainder = num % multiple; 197 if (remainder == 0) 198 return num; 199 200 if (up) 201 return (num + multiple - remainder); 202 else 203 return (num - remainder); 204 } 205 206 static unsigned int dml_get_num_active_pipes(unsigned int num_planes, const struct core_display_cfg_support_info *cfg_support_info) 207 { 208 unsigned int num_active_pipes = 0; 209 210 for (unsigned int k = 0; k < num_planes; k++) { 211 num_active_pipes = num_active_pipes + (unsigned int)cfg_support_info->plane_support_info[k].dpps_used; 212 } 213 214 DML_LOG_VERBOSE("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes); 215 return num_active_pipes; 216 } 217 218 static void dml_calc_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane) 219 { 220 unsigned int pipe_idx = 0; 221 222 for (unsigned int k = 0; k < DML2_MAX_PLANES; ++k) { 223 pipe_plane[k] = __DML2_CALCS_PIPE_NO_PLANE__; 224 } 225 226 for (unsigned int plane_idx = 0; plane_idx < DML2_MAX_PLANES; plane_idx++) { 227 for (int i = 0; i < cfg_support_info->plane_support_info[plane_idx].dpps_used; i++) { 228 pipe_plane[pipe_idx] = plane_idx; 229 pipe_idx++; 230 } 231 } 232 } 233 234 static bool dml_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg) 235 { 236 bool is_phantom = false; 237 238 if (plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe || 239 plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) { 240 is_phantom = true; 241 } 242 243 return is_phantom; 244 } 245 246 static bool dml_get_is_phantom_pipe(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx) 247 { 248 unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; 249 250 bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_idx]); 251 DML_LOG_VERBOSE("DML::%s: pipe_idx=%d legacy_svp_config=%0d is_phantom=%d\n", __func__, pipe_idx, display_cfg->plane_descriptors[plane_idx].overrides.legacy_svp_config, is_phantom); 252 return is_phantom; 253 } 254 255 #define dml_get_per_pipe_var_func(variable, type, interval_var) static type dml_get_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx) \ 256 { \ 257 unsigned int plane_idx; \ 258 plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; \ 259 return (type) interval_var[plane_idx]; \ 260 } 261 262 dml_get_per_pipe_var_func(dpte_group_size_in_bytes, unsigned int, mode_lib->mp.dpte_group_bytes); 263 dml_get_per_pipe_var_func(vm_group_size_in_bytes, unsigned int, mode_lib->mp.vm_group_bytes); 264 dml_get_per_pipe_var_func(swath_height_l, unsigned int, mode_lib->mp.SwathHeightY); 265 dml_get_per_pipe_var_func(swath_height_c, unsigned int, mode_lib->mp.SwathHeightC); 266 dml_get_per_pipe_var_func(dpte_row_height_linear_l, unsigned int, mode_lib->mp.dpte_row_height_linear); 267 dml_get_per_pipe_var_func(dpte_row_height_linear_c, unsigned int, mode_lib->mp.dpte_row_height_linear_chroma); 268 269 dml_get_per_pipe_var_func(vstartup_calculated, unsigned int, mode_lib->mp.VStartup); 270 dml_get_per_pipe_var_func(vupdate_offset, unsigned int, mode_lib->mp.VUpdateOffsetPix); 271 dml_get_per_pipe_var_func(vupdate_width, unsigned int, mode_lib->mp.VUpdateWidthPix); 272 dml_get_per_pipe_var_func(vready_offset, unsigned int, mode_lib->mp.VReadyOffsetPix); 273 dml_get_per_pipe_var_func(pstate_keepout_dst_lines, unsigned int, mode_lib->mp.pstate_keepout_dst_lines); 274 dml_get_per_pipe_var_func(det_stored_buffer_size_l_bytes, unsigned int, mode_lib->mp.DETBufferSizeY); 275 dml_get_per_pipe_var_func(det_stored_buffer_size_c_bytes, unsigned int, mode_lib->mp.DETBufferSizeC); 276 dml_get_per_pipe_var_func(det_buffer_size_kbytes, unsigned int, mode_lib->mp.DETBufferSizeInKByte); 277 dml_get_per_pipe_var_func(surface_size_in_mall_bytes, unsigned int, mode_lib->mp.SurfaceSizeInTheMALL); 278 279 #define dml_get_per_plane_var_func(variable, type, interval_var) static type dml_get_plane_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int plane_idx) \ 280 { \ 281 return (type) interval_var[plane_idx]; \ 282 } 283 284 dml_get_per_plane_var_func(num_mcaches_plane0, unsigned int, mode_lib->ms.num_mcaches_l); 285 dml_get_per_plane_var_func(mcache_row_bytes_plane0, unsigned int, mode_lib->ms.mcache_row_bytes_l); 286 dml_get_per_plane_var_func(mcache_shift_granularity_plane0, unsigned int, mode_lib->ms.mcache_shift_granularity_l); 287 dml_get_per_plane_var_func(num_mcaches_plane1, unsigned int, mode_lib->ms.num_mcaches_c); 288 dml_get_per_plane_var_func(mcache_row_bytes_plane1, unsigned int, mode_lib->ms.mcache_row_bytes_c); 289 dml_get_per_plane_var_func(mcache_shift_granularity_plane1, unsigned int, mode_lib->ms.mcache_shift_granularity_c); 290 dml_get_per_plane_var_func(mall_comb_mcache_l, unsigned int, mode_lib->ms.mall_comb_mcache_l); 291 dml_get_per_plane_var_func(mall_comb_mcache_c, unsigned int, mode_lib->ms.mall_comb_mcache_c); 292 dml_get_per_plane_var_func(lc_comb_mcache, unsigned int, mode_lib->ms.lc_comb_mcache); 293 dml_get_per_plane_var_func(subviewport_lines_needed_in_mall, unsigned int, mode_lib->ms.SubViewportLinesNeededInMALL); 294 dml_get_per_plane_var_func(max_vstartup_lines, unsigned int, mode_lib->ms.MaxVStartupLines); 295 296 #define dml_get_per_plane_array_var_func(variable, type, interval_var) static type dml_get_plane_array_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int plane_idx, unsigned int array_idx) \ 297 { \ 298 return (type) interval_var[plane_idx][array_idx]; \ 299 } 300 301 dml_get_per_plane_array_var_func(mcache_offsets_plane0, unsigned int, mode_lib->ms.mcache_offsets_l); 302 dml_get_per_plane_array_var_func(mcache_offsets_plane1, unsigned int, mode_lib->ms.mcache_offsets_c); 303 304 #define dml_get_var_func(var, type, internal_var) static type dml_get_##var(const struct dml2_core_internal_display_mode_lib *mode_lib) \ 305 { \ 306 return (type) internal_var; \ 307 } 308 309 dml_get_var_func(wm_urgent, double, mode_lib->mp.Watermark.UrgentWatermark); 310 dml_get_var_func(wm_stutter_exit, double, mode_lib->mp.Watermark.StutterExitWatermark); 311 dml_get_var_func(wm_stutter_enter_exit, double, mode_lib->mp.Watermark.StutterEnterPlusExitWatermark); 312 dml_get_var_func(wm_z8_stutter_exit, double, mode_lib->mp.Watermark.Z8StutterExitWatermark); 313 dml_get_var_func(wm_z8_stutter_enter_exit, double, mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark); 314 dml_get_var_func(wm_memory_trip, double, mode_lib->mp.UrgentLatency); 315 dml_get_var_func(meta_trip_memory_us, double, mode_lib->mp.MetaTripToMemory); 316 317 dml_get_var_func(wm_fclk_change, double, mode_lib->mp.Watermark.FCLKChangeWatermark); 318 dml_get_var_func(wm_usr_retraining, double, mode_lib->mp.Watermark.USRRetrainingWatermark); 319 dml_get_var_func(wm_temp_read_or_ppt, double, mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us); 320 dml_get_var_func(wm_dram_clock_change, double, mode_lib->mp.Watermark.DRAMClockChangeWatermark); 321 dml_get_var_func(fraction_of_urgent_bandwidth, double, mode_lib->mp.FractionOfUrgentBandwidth); 322 dml_get_var_func(fraction_of_urgent_bandwidth_imm_flip, double, mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip); 323 dml_get_var_func(fraction_of_urgent_bandwidth_mall, double, mode_lib->mp.FractionOfUrgentBandwidthMALL); 324 dml_get_var_func(wm_writeback_dram_clock_change, double, mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark); 325 dml_get_var_func(wm_writeback_fclk_change, double, mode_lib->mp.Watermark.WritebackFCLKChangeWatermark); 326 dml_get_var_func(stutter_efficiency, double, mode_lib->mp.StutterEfficiency); 327 dml_get_var_func(stutter_efficiency_no_vblank, double, mode_lib->mp.StutterEfficiencyNotIncludingVBlank); 328 dml_get_var_func(stutter_num_bursts, double, mode_lib->mp.NumberOfStutterBurstsPerFrame); 329 dml_get_var_func(stutter_efficiency_z8, double, mode_lib->mp.Z8StutterEfficiency); 330 dml_get_var_func(stutter_efficiency_no_vblank_z8, double, mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank); 331 dml_get_var_func(stutter_num_bursts_z8, double, mode_lib->mp.Z8NumberOfStutterBurstsPerFrame); 332 dml_get_var_func(stutter_period, double, mode_lib->mp.StutterPeriod); 333 dml_get_var_func(stutter_efficiency_z8_bestcase, double, mode_lib->mp.Z8StutterEfficiencyBestCase); 334 dml_get_var_func(stutter_num_bursts_z8_bestcase, double, mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase); 335 dml_get_var_func(stutter_period_bestcase, double, mode_lib->mp.StutterPeriodBestCase); 336 dml_get_var_func(fclk_change_latency, double, mode_lib->mp.MaxActiveFCLKChangeLatencySupported); 337 dml_get_var_func(global_dppclk_khz, double, mode_lib->mp.GlobalDPPCLK * 1000.0); 338 339 dml_get_var_func(sys_active_avg_bw_required_sdp, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); 340 dml_get_var_func(sys_active_avg_bw_required_dram, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); 341 342 dml_get_var_func(svp_prefetch_avg_bw_required_sdp, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); 343 dml_get_var_func(svp_prefetch_avg_bw_required_dram, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); 344 345 dml_get_var_func(sys_active_avg_bw_available_sdp, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); 346 dml_get_var_func(sys_active_avg_bw_available_dram, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); 347 348 dml_get_var_func(svp_prefetch_avg_bw_available_sdp, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); 349 dml_get_var_func(svp_prefetch_avg_bw_available_dram, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); 350 351 dml_get_var_func(sys_active_urg_bw_available_sdp, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); 352 dml_get_var_func(sys_active_urg_bw_available_dram, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); 353 dml_get_var_func(sys_active_urg_bw_available_dram_vm_only, double, mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]); 354 355 dml_get_var_func(svp_prefetch_urg_bw_available_sdp, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); 356 dml_get_var_func(svp_prefetch_urg_bw_available_dram, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); 357 dml_get_var_func(svp_prefetch_urg_bw_available_dram_vm_only, double, mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_svp_prefetch]); 358 359 dml_get_var_func(urgent_latency, double, mode_lib->mp.UrgentLatency); 360 dml_get_var_func(max_urgent_latency_us, double, mode_lib->ms.support.max_urgent_latency_us); 361 dml_get_var_func(max_non_urgent_latency_us, double, mode_lib->ms.support.max_non_urgent_latency_us); 362 dml_get_var_func(avg_non_urgent_latency_us, double, mode_lib->ms.support.avg_non_urgent_latency_us); 363 dml_get_var_func(avg_urgent_latency_us, double, mode_lib->ms.support.avg_urgent_latency_us); 364 365 dml_get_var_func(sys_active_urg_bw_required_sdp, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); 366 dml_get_var_func(sys_active_urg_bw_required_dram, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); 367 dml_get_var_func(svp_prefetch_urg_bw_required_sdp, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); 368 dml_get_var_func(svp_prefetch_urg_bw_required_dram, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); 369 370 dml_get_var_func(sys_active_non_urg_required_sdp, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); 371 dml_get_var_func(sys_active_non_urg_required_dram, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); 372 dml_get_var_func(svp_prefetch_non_urg_bw_required_sdp, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); 373 dml_get_var_func(svp_prefetch_non_urg_bw_required_dram, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); 374 375 dml_get_var_func(sys_active_urg_bw_required_sdp_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); 376 dml_get_var_func(sys_active_urg_bw_required_dram_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); 377 dml_get_var_func(svp_prefetch_urg_bw_required_sdp_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); 378 dml_get_var_func(svp_prefetch_urg_bw_required_dram_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); 379 380 dml_get_var_func(sys_active_non_urg_required_sdp_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); 381 dml_get_var_func(sys_active_non_urg_required_dram_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); 382 dml_get_var_func(svp_prefetch_non_urg_bw_required_sdp_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); 383 dml_get_var_func(svp_prefetch_non_urg_bw_required_dram_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); 384 385 dml_get_var_func(comp_buffer_size_kbytes, unsigned int, mode_lib->mp.CompressedBufferSizeInkByte); 386 387 dml_get_var_func(unbounded_request_enabled, bool, mode_lib->mp.UnboundedRequestEnabled); 388 dml_get_var_func(wm_writeback_urgent, double, mode_lib->mp.Watermark.WritebackUrgentWatermark); 389 dml_get_var_func(cstate_max_cap_mode, bool, mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); 390 dml_get_var_func(compbuf_reserved_space_64b, unsigned int, mode_lib->mp.compbuf_reserved_space_64b); 391 dml_get_var_func(hw_debug5, bool, mode_lib->mp.hw_debug5); 392 dml_get_var_func(dcfclk_deep_sleep_hysteresis, unsigned int, mode_lib->mp.dcfclk_deep_sleep_hysteresis); 393 394 static void CalculateMaxDETAndMinCompressedBufferSize( 395 unsigned int ConfigReturnBufferSizeInKByte, 396 unsigned int ConfigReturnBufferSegmentSizeInKByte, 397 unsigned int ROBBufferSizeInKByte, 398 unsigned int MaxNumDPP, 399 unsigned int nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size 400 unsigned int nomDETInKByteOverrideValue, // VBA_DELTA 401 bool is_mrq_present, 402 403 // Output 404 unsigned int *MaxTotalDETInKByte, 405 unsigned int *nomDETInKByte, 406 unsigned int *MinCompressedBufferSizeInKByte) 407 { 408 if (is_mrq_present) 409 *MaxTotalDETInKByte = (unsigned int) math_ceil2((double)(ConfigReturnBufferSizeInKByte + ROBBufferSizeInKByte)*4/5, 64); 410 else 411 *MaxTotalDETInKByte = ConfigReturnBufferSizeInKByte - ConfigReturnBufferSegmentSizeInKByte; 412 413 *nomDETInKByte = (unsigned int)(math_floor2((double)*MaxTotalDETInKByte / (double)MaxNumDPP, ConfigReturnBufferSegmentSizeInKByte)); 414 *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte; 415 416 DML_LOG_VERBOSE("DML::%s: is_mrq_present = %u\n", __func__, is_mrq_present); 417 DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte); 418 DML_LOG_VERBOSE("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, ROBBufferSizeInKByte); 419 DML_LOG_VERBOSE("DML::%s: MaxNumDPP = %u\n", __func__, MaxNumDPP); 420 DML_LOG_VERBOSE("DML::%s: MaxTotalDETInKByte = %u\n", __func__, *MaxTotalDETInKByte); 421 DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, *nomDETInKByte); 422 DML_LOG_VERBOSE("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, *MinCompressedBufferSizeInKByte); 423 424 if (nomDETInKByteOverrideEnable) { 425 *nomDETInKByte = nomDETInKByteOverrideValue; 426 DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u (overrided)\n", __func__, *nomDETInKByte); 427 } 428 } 429 430 static void PixelClockAdjustmentForProgressiveToInterlaceUnit(const struct dml2_display_cfg *display_cfg, bool ptoi_supported, double *PixelClockBackEnd) 431 { 432 //unsigned int num_active_planes = display_cfg->num_planes; 433 434 //Progressive To Interlace Unit Effect 435 for (unsigned int k = 0; k < display_cfg->num_planes; ++k) { 436 PixelClockBackEnd[k] = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); 437 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && ptoi_supported == true) { 438 // FIXME_STAGE2... can sw pass the pixel rate for interlaced directly 439 //display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz = 2 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz; 440 } 441 } 442 } 443 444 static bool dml_is_420(enum dml2_source_format_class source_format) 445 { 446 bool val = false; 447 448 switch (source_format) { 449 case dml2_444_8: 450 val = 0; 451 break; 452 case dml2_444_16: 453 val = 0; 454 break; 455 case dml2_444_32: 456 val = 0; 457 break; 458 case dml2_444_64: 459 val = 0; 460 break; 461 case dml2_420_8: 462 val = 1; 463 break; 464 case dml2_420_10: 465 val = 1; 466 break; 467 case dml2_420_12: 468 val = 1; 469 break; 470 case dml2_422_planar_8: 471 val = 0; 472 break; 473 case dml2_422_planar_10: 474 val = 0; 475 break; 476 case dml2_422_planar_12: 477 val = 0; 478 break; 479 case dml2_422_packed_8: 480 val = 0; 481 break; 482 case dml2_422_packed_10: 483 val = 0; 484 break; 485 case dml2_422_packed_12: 486 val = 0; 487 break; 488 case dml2_rgbe_alpha: 489 val = 0; 490 break; 491 case dml2_rgbe: 492 val = 0; 493 break; 494 case dml2_mono_8: 495 val = 0; 496 break; 497 case dml2_mono_16: 498 val = 0; 499 break; 500 default: 501 DML_ASSERT(0); 502 break; 503 } 504 return val; 505 } 506 507 static unsigned int dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode) 508 { 509 if (sw_mode == dml2_sw_linear) 510 return 256; 511 else if (sw_mode == dml2_sw_256b_2d) 512 return 256; 513 else if (sw_mode == dml2_sw_4kb_2d) 514 return 4096; 515 else if (sw_mode == dml2_sw_64kb_2d) 516 return 65536; 517 else if (sw_mode == dml2_sw_256kb_2d) 518 return 262144; 519 else if (sw_mode == dml2_gfx11_sw_linear) 520 return 256; 521 else if (sw_mode == dml2_gfx11_sw_64kb_d) 522 return 65536; 523 else if (sw_mode == dml2_gfx11_sw_64kb_d_t) 524 return 65536; 525 else if (sw_mode == dml2_gfx11_sw_64kb_d_x) 526 return 65536; 527 else if (sw_mode == dml2_gfx11_sw_64kb_r_x) 528 return 65536; 529 else if (sw_mode == dml2_gfx11_sw_256kb_d_x) 530 return 262144; 531 else if (sw_mode == dml2_gfx11_sw_256kb_r_x) 532 return 262144; 533 else { 534 DML_ASSERT(0); 535 return 256; 536 } 537 } 538 539 static bool dml_is_vertical_rotation(enum dml2_rotation_angle Scan) 540 { 541 bool is_vert = false; 542 if (Scan == dml2_rotation_90 || Scan == dml2_rotation_270) { 543 is_vert = true; 544 } else { 545 is_vert = false; 546 } 547 return is_vert; 548 } 549 550 static unsigned int dml_get_gfx_version(enum dml2_swizzle_mode sw_mode) 551 { 552 unsigned int version = 0; 553 554 if (sw_mode == dml2_sw_linear || 555 sw_mode == dml2_sw_256b_2d || 556 sw_mode == dml2_sw_4kb_2d || 557 sw_mode == dml2_sw_64kb_2d || 558 sw_mode == dml2_sw_256kb_2d) { 559 version = 12; 560 } else if (sw_mode == dml2_gfx11_sw_linear || 561 sw_mode == dml2_gfx11_sw_64kb_d || 562 sw_mode == dml2_gfx11_sw_64kb_d_t || 563 sw_mode == dml2_gfx11_sw_64kb_d_x || 564 sw_mode == dml2_gfx11_sw_64kb_r_x || 565 sw_mode == dml2_gfx11_sw_256kb_d_x || 566 sw_mode == dml2_gfx11_sw_256kb_r_x) { 567 version = 11; 568 } else { 569 DML_LOG_VERBOSE("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode); 570 DML_ASSERT(0); 571 } 572 573 return version; 574 } 575 576 static void CalculateBytePerPixelAndBlockSizes( 577 enum dml2_source_format_class SourcePixelFormat, 578 enum dml2_swizzle_mode SurfaceTiling, 579 unsigned int pitch_y, 580 unsigned int pitch_c, 581 582 // Output 583 unsigned int *BytePerPixelY, 584 unsigned int *BytePerPixelC, 585 double *BytePerPixelDETY, 586 double *BytePerPixelDETC, 587 unsigned int *BlockHeight256BytesY, 588 unsigned int *BlockHeight256BytesC, 589 unsigned int *BlockWidth256BytesY, 590 unsigned int *BlockWidth256BytesC, 591 unsigned int *MacroTileHeightY, 592 unsigned int *MacroTileHeightC, 593 unsigned int *MacroTileWidthY, 594 unsigned int *MacroTileWidthC, 595 bool *surf_linear128_l, 596 bool *surf_linear128_c) 597 { 598 *BytePerPixelDETY = 0; 599 *BytePerPixelDETC = 0; 600 *BytePerPixelY = 1; 601 *BytePerPixelC = 1; 602 603 if (SourcePixelFormat == dml2_444_64) { 604 *BytePerPixelDETY = 8; 605 *BytePerPixelDETC = 0; 606 *BytePerPixelY = 8; 607 *BytePerPixelC = 0; 608 } else if (SourcePixelFormat == dml2_444_32 || SourcePixelFormat == dml2_rgbe) { 609 *BytePerPixelDETY = 4; 610 *BytePerPixelDETC = 0; 611 *BytePerPixelY = 4; 612 *BytePerPixelC = 0; 613 } else if (SourcePixelFormat == dml2_444_16 || SourcePixelFormat == dml2_mono_16) { 614 *BytePerPixelDETY = 2; 615 *BytePerPixelDETC = 0; 616 *BytePerPixelY = 2; 617 *BytePerPixelC = 0; 618 } else if (SourcePixelFormat == dml2_444_8 || SourcePixelFormat == dml2_mono_8) { 619 *BytePerPixelDETY = 1; 620 *BytePerPixelDETC = 0; 621 *BytePerPixelY = 1; 622 *BytePerPixelC = 0; 623 } else if (SourcePixelFormat == dml2_rgbe_alpha) { 624 *BytePerPixelDETY = 4; 625 *BytePerPixelDETC = 1; 626 *BytePerPixelY = 4; 627 *BytePerPixelC = 1; 628 } else if (SourcePixelFormat == dml2_420_8) { 629 *BytePerPixelDETY = 1; 630 *BytePerPixelDETC = 2; 631 *BytePerPixelY = 1; 632 *BytePerPixelC = 2; 633 } else if (SourcePixelFormat == dml2_420_12) { 634 *BytePerPixelDETY = 2; 635 *BytePerPixelDETC = 4; 636 *BytePerPixelY = 2; 637 *BytePerPixelC = 4; 638 } else if (SourcePixelFormat == dml2_420_10) { 639 *BytePerPixelDETY = (double)(4.0 / 3); 640 *BytePerPixelDETC = (double)(8.0 / 3); 641 *BytePerPixelY = 2; 642 *BytePerPixelC = 4; 643 } else { 644 DML_LOG_VERBOSE("ERROR: DML::%s: SourcePixelFormat = %u not supported!\n", __func__, SourcePixelFormat); 645 DML_ASSERT(0); 646 } 647 648 DML_LOG_VERBOSE("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat); 649 DML_LOG_VERBOSE("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY); 650 DML_LOG_VERBOSE("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC); 651 DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY); 652 DML_LOG_VERBOSE("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC); 653 DML_LOG_VERBOSE("DML::%s: pitch_y = %u\n", __func__, pitch_y); 654 DML_LOG_VERBOSE("DML::%s: pitch_c = %u\n", __func__, pitch_c); 655 DML_LOG_VERBOSE("DML::%s: surf_linear128_l = %u\n", __func__, *surf_linear128_l); 656 DML_LOG_VERBOSE("DML::%s: surf_linear128_c = %u\n", __func__, *surf_linear128_c); 657 658 if (dml_get_gfx_version(SurfaceTiling) == 11) { 659 *surf_linear128_l = 0; 660 *surf_linear128_c = 0; 661 } else { 662 if (SurfaceTiling == dml2_sw_linear) { 663 *surf_linear128_l = (((pitch_y * *BytePerPixelY) % 256) != 0); 664 665 if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) 666 *surf_linear128_c = (((pitch_c * *BytePerPixelC) % 256) != 0); 667 } 668 } 669 670 if (!(dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha)) { 671 if (SurfaceTiling == dml2_sw_linear) { 672 *BlockHeight256BytesY = 1; 673 } else if (SourcePixelFormat == dml2_444_64) { 674 *BlockHeight256BytesY = 4; 675 } else if (SourcePixelFormat == dml2_444_8) { 676 *BlockHeight256BytesY = 16; 677 } else { 678 *BlockHeight256BytesY = 8; 679 } 680 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 681 *BlockHeight256BytesC = 0; 682 *BlockWidth256BytesC = 0; 683 } else { // dual plane 684 if (SurfaceTiling == dml2_sw_linear) { 685 *BlockHeight256BytesY = 1; 686 *BlockHeight256BytesC = 1; 687 } else if (SourcePixelFormat == dml2_rgbe_alpha) { 688 *BlockHeight256BytesY = 8; 689 *BlockHeight256BytesC = 16; 690 } else if (SourcePixelFormat == dml2_420_8) { 691 *BlockHeight256BytesY = 16; 692 *BlockHeight256BytesC = 8; 693 } else { 694 *BlockHeight256BytesY = 8; 695 *BlockHeight256BytesC = 8; 696 } 697 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 698 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC; 699 } 700 DML_LOG_VERBOSE("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY); 701 DML_LOG_VERBOSE("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY); 702 DML_LOG_VERBOSE("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC); 703 DML_LOG_VERBOSE("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC); 704 705 if (dml_get_gfx_version(SurfaceTiling) == 11) { 706 if (SurfaceTiling == dml2_gfx11_sw_linear) { 707 *MacroTileHeightY = *BlockHeight256BytesY; 708 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY; 709 *MacroTileHeightC = *BlockHeight256BytesC; 710 if (*MacroTileHeightC == 0) { 711 *MacroTileWidthC = 0; 712 } else { 713 *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC; 714 } 715 } else if (SurfaceTiling == dml2_gfx11_sw_64kb_d || SurfaceTiling == dml2_gfx11_sw_64kb_d_t || SurfaceTiling == dml2_gfx11_sw_64kb_d_x || SurfaceTiling == dml2_gfx11_sw_64kb_r_x) { 716 *MacroTileHeightY = 16 * *BlockHeight256BytesY; 717 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY; 718 *MacroTileHeightC = 16 * *BlockHeight256BytesC; 719 if (*MacroTileHeightC == 0) { 720 *MacroTileWidthC = 0; 721 } else { 722 *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC; 723 } 724 } else { 725 *MacroTileHeightY = 32 * *BlockHeight256BytesY; 726 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY; 727 *MacroTileHeightC = 32 * *BlockHeight256BytesC; 728 if (*MacroTileHeightC == 0) { 729 *MacroTileWidthC = 0; 730 } else { 731 *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC; 732 } 733 } 734 } else { 735 unsigned int macro_tile_size_bytes = dml_get_tile_block_size_bytes(SurfaceTiling); 736 unsigned int macro_tile_scale = 1; // macro tile to 256B req scaling 737 738 if (SurfaceTiling == dml2_sw_linear) { 739 macro_tile_scale = 1; 740 } else if (SurfaceTiling == dml2_sw_4kb_2d) { 741 macro_tile_scale = 4; 742 } else if (SurfaceTiling == dml2_sw_64kb_2d) { 743 macro_tile_scale = 16; 744 } else if (SurfaceTiling == dml2_sw_256kb_2d) { 745 macro_tile_scale = 32; 746 } else { 747 DML_LOG_VERBOSE("ERROR: Invalid SurfaceTiling setting! val=%u\n", SurfaceTiling); 748 DML_ASSERT(0); 749 } 750 751 *MacroTileHeightY = macro_tile_scale * *BlockHeight256BytesY; 752 *MacroTileWidthY = macro_tile_size_bytes / *BytePerPixelY / *MacroTileHeightY; 753 *MacroTileHeightC = macro_tile_scale * *BlockHeight256BytesC; 754 if (*MacroTileHeightC == 0) { 755 *MacroTileWidthC = 0; 756 } else { 757 *MacroTileWidthC = macro_tile_size_bytes / *BytePerPixelC / *MacroTileHeightC; 758 } 759 } 760 761 DML_LOG_VERBOSE("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY); 762 DML_LOG_VERBOSE("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY); 763 DML_LOG_VERBOSE("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC); 764 DML_LOG_VERBOSE("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC); 765 } 766 767 static void CalculateSinglePipeDPPCLKAndSCLThroughput( 768 double HRatio, 769 double HRatioChroma, 770 double VRatio, 771 double VRatioChroma, 772 double MaxDCHUBToPSCLThroughput, 773 double MaxPSCLToLBThroughput, 774 double PixelClock, 775 enum dml2_source_format_class SourcePixelFormat, 776 unsigned int HTaps, 777 unsigned int HTapsChroma, 778 unsigned int VTaps, 779 unsigned int VTapsChroma, 780 781 // Output 782 double *PSCL_THROUGHPUT, 783 double *PSCL_THROUGHPUT_CHROMA, 784 double *DPPCLKUsingSingleDPP) 785 { 786 double DPPCLKUsingSingleDPPLuma; 787 double DPPCLKUsingSingleDPPChroma; 788 789 if (HRatio > 1) { 790 *PSCL_THROUGHPUT = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / math_ceil2((double)HTaps / 6.0, 1.0)); 791 } else { 792 *PSCL_THROUGHPUT = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); 793 } 794 795 DPPCLKUsingSingleDPPLuma = PixelClock * math_max3(VTaps / 6 * math_min2(1, HRatio), HRatio * VRatio / *PSCL_THROUGHPUT, 1); 796 797 if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock) 798 DPPCLKUsingSingleDPPLuma = 2 * PixelClock; 799 800 if (!dml_is_420(SourcePixelFormat) && SourcePixelFormat != dml2_rgbe_alpha) { 801 *PSCL_THROUGHPUT_CHROMA = 0; 802 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma; 803 } else { 804 if (HRatioChroma > 1) { 805 *PSCL_THROUGHPUT_CHROMA = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatioChroma / math_ceil2((double)HTapsChroma / 6.0, 1.0)); 806 } else { 807 *PSCL_THROUGHPUT_CHROMA = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); 808 } 809 DPPCLKUsingSingleDPPChroma = PixelClock * math_max3(VTapsChroma / 6 * math_min2(1, HRatioChroma), 810 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1); 811 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock) 812 DPPCLKUsingSingleDPPChroma = 2 * PixelClock; 813 *DPPCLKUsingSingleDPP = math_max2(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma); 814 } 815 } 816 817 static void CalculateSwathWidth( 818 const struct dml2_display_cfg *display_cfg, 819 bool ForceSingleDPP, 820 unsigned int NumberOfActiveSurfaces, 821 enum dml2_odm_mode ODMMode[], 822 unsigned int BytePerPixY[], 823 unsigned int BytePerPixC[], 824 unsigned int Read256BytesBlockHeightY[], 825 unsigned int Read256BytesBlockHeightC[], 826 unsigned int Read256BytesBlockWidthY[], 827 unsigned int Read256BytesBlockWidthC[], 828 bool surf_linear128_l[], 829 bool surf_linear128_c[], 830 unsigned int DPPPerSurface[], 831 832 // Output 833 unsigned int req_per_swath_ub_l[], 834 unsigned int req_per_swath_ub_c[], 835 unsigned int SwathWidthSingleDPPY[], // post-rotated plane width 836 unsigned int SwathWidthSingleDPPC[], 837 unsigned int SwathWidthY[], // per-pipe 838 unsigned int SwathWidthC[], // per-pipe 839 unsigned int MaximumSwathHeightY[], 840 unsigned int MaximumSwathHeightC[], 841 unsigned int swath_width_luma_ub[], // per-pipe 842 unsigned int swath_width_chroma_ub[]) // per-pipe 843 { 844 (void)BytePerPixY; 845 enum dml2_odm_mode MainSurfaceODMMode; 846 double odm_hactive_factor = 1.0; 847 unsigned int req_width_horz_y; 848 unsigned int req_width_horz_c; 849 unsigned int surface_width_ub_l; 850 unsigned int surface_height_ub_l; 851 unsigned int surface_width_ub_c; 852 unsigned int surface_height_ub_c; 853 854 DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP); 855 DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); 856 857 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { 858 if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) { 859 SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.width; 860 } else { 861 SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.height; 862 } 863 864 DML_LOG_VERBOSE("DML::%s: k=%u ViewportWidth=%lu\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width); 865 DML_LOG_VERBOSE("DML::%s: k=%u ViewportHeight=%lu\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height); 866 DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]); 867 868 MainSurfaceODMMode = ODMMode[k]; 869 870 if (ForceSingleDPP) { 871 SwathWidthY[k] = SwathWidthSingleDPPY[k]; 872 } else { 873 if (MainSurfaceODMMode == dml2_odm_mode_combine_4to1) 874 odm_hactive_factor = 4.0; 875 else if (MainSurfaceODMMode == dml2_odm_mode_combine_3to1) 876 odm_hactive_factor = 3.0; 877 else if (MainSurfaceODMMode == dml2_odm_mode_combine_2to1) 878 odm_hactive_factor = 2.0; 879 880 if (MainSurfaceODMMode == dml2_odm_mode_combine_4to1 || MainSurfaceODMMode == dml2_odm_mode_combine_3to1 || MainSurfaceODMMode == dml2_odm_mode_combine_2to1) { 881 SwathWidthY[k] = (unsigned int)(math_min2((double)SwathWidthSingleDPPY[k], math_round((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active / odm_hactive_factor * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio))); 882 } else if (DPPPerSurface[k] == 2) { 883 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2; 884 } else { 885 SwathWidthY[k] = SwathWidthSingleDPPY[k]; 886 } 887 } 888 889 DML_LOG_VERBOSE("DML::%s: k=%u HActive=%lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active); 890 DML_LOG_VERBOSE("DML::%s: k=%u HRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); 891 DML_LOG_VERBOSE("DML::%s: k=%u MainSurfaceODMMode=%u\n", __func__, k, MainSurfaceODMMode); 892 DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthSingleDPPY=%u\n", __func__, k, SwathWidthSingleDPPY[k]); 893 DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthY=%u\n", __func__, k, SwathWidthY[k]); 894 895 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) { 896 SwathWidthC[k] = SwathWidthY[k] / 2; 897 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2; 898 } else { 899 SwathWidthC[k] = SwathWidthY[k]; 900 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k]; 901 } 902 903 if (ForceSingleDPP == true) { 904 SwathWidthY[k] = SwathWidthSingleDPPY[k]; 905 SwathWidthC[k] = SwathWidthSingleDPPC[k]; 906 } 907 908 req_width_horz_y = Read256BytesBlockWidthY[k]; 909 req_width_horz_c = Read256BytesBlockWidthC[k]; 910 911 if (surf_linear128_l[k]) 912 req_width_horz_y = req_width_horz_y / 2; 913 914 if (surf_linear128_c[k]) 915 req_width_horz_c = req_width_horz_c / 2; 916 917 surface_width_ub_l = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane0.width, req_width_horz_y); 918 surface_height_ub_l = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane0.height, Read256BytesBlockHeightY[k]); 919 surface_width_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.width, req_width_horz_c); 920 surface_height_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.height, Read256BytesBlockHeightC[k]); 921 922 DML_LOG_VERBOSE("DML::%s: k=%u surface_width_ub_l=%u\n", __func__, k, surface_width_ub_l); 923 DML_LOG_VERBOSE("DML::%s: k=%u surface_height_ub_l=%u\n", __func__, k, surface_height_ub_l); 924 DML_LOG_VERBOSE("DML::%s: k=%u surface_width_ub_c=%u\n", __func__, k, surface_width_ub_c); 925 DML_LOG_VERBOSE("DML::%s: k=%u surface_height_ub_c=%u\n", __func__, k, surface_height_ub_c); 926 DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y); 927 DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c); 928 DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockWidthY=%u\n", __func__, k, Read256BytesBlockWidthY[k]); 929 DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockHeightY=%u\n", __func__, k, Read256BytesBlockHeightY[k]); 930 DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockWidthC=%u\n", __func__, k, Read256BytesBlockWidthC[k]); 931 DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockHeightC=%u\n", __func__, k, Read256BytesBlockHeightC[k]); 932 DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y); 933 DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c); 934 DML_LOG_VERBOSE("DML::%s: k=%u ViewportStationary=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.stationary); 935 DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]); 936 937 req_per_swath_ub_l[k] = 0; 938 req_per_swath_ub_c[k] = 0; 939 if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) { 940 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; 941 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; 942 if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) { 943 swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_width_ub_l, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start + SwathWidthY[k] + req_width_horz_y - 1, req_width_horz_y) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start, req_width_horz_y))); 944 } else { 945 swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_width_ub_l, math_ceil2((double)SwathWidthY[k] - 1, req_width_horz_y) + req_width_horz_y)); 946 } 947 req_per_swath_ub_l[k] = swath_width_luma_ub[k] / req_width_horz_y; 948 949 if (BytePerPixC[k] > 0) { 950 if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) { 951 swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_width_ub_c, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start + SwathWidthC[k] + req_width_horz_c - 1, req_width_horz_c) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start, req_width_horz_c))); 952 } else { 953 swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_width_ub_c, math_ceil2((double)SwathWidthC[k] - 1, req_width_horz_c) + req_width_horz_c)); 954 } 955 req_per_swath_ub_c[k] = swath_width_chroma_ub[k] / req_width_horz_c; 956 } else { 957 swath_width_chroma_ub[k] = 0; 958 } 959 } else { 960 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; 961 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; 962 963 if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) { 964 swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_height_ub_l, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start + SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, Read256BytesBlockHeightY[k]) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start, Read256BytesBlockHeightY[k]))); 965 } else { 966 swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_height_ub_l, math_ceil2((double)SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k])); 967 } 968 req_per_swath_ub_l[k] = swath_width_luma_ub[k] / Read256BytesBlockHeightY[k]; 969 if (BytePerPixC[k] > 0) { 970 if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) { 971 swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_height_ub_c, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start + SwathWidthC[k] + Read256BytesBlockHeightC[k] - 1, Read256BytesBlockHeightC[k]) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start, Read256BytesBlockHeightC[k]))); 972 } else { 973 swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_height_ub_c, math_ceil2((double)SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k])); 974 } 975 req_per_swath_ub_c[k] = swath_width_chroma_ub[k] / Read256BytesBlockHeightC[k]; 976 } else { 977 swath_width_chroma_ub[k] = 0; 978 } 979 } 980 981 DML_LOG_VERBOSE("DML::%s: k=%u swath_width_luma_ub=%u\n", __func__, k, swath_width_luma_ub[k]); 982 DML_LOG_VERBOSE("DML::%s: k=%u swath_width_chroma_ub=%u\n", __func__, k, swath_width_chroma_ub[k]); 983 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightY=%u\n", __func__, k, MaximumSwathHeightY[k]); 984 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightC=%u\n", __func__, k, MaximumSwathHeightC[k]); 985 DML_LOG_VERBOSE("DML::%s: k=%u req_per_swath_ub_l=%u\n", __func__, k, req_per_swath_ub_l[k]); 986 DML_LOG_VERBOSE("DML::%s: k=%u req_per_swath_ub_c=%u\n", __func__, k, req_per_swath_ub_c[k]); 987 } 988 } 989 990 static bool UnboundedRequest(bool unb_req_force_en, bool unb_req_force_val, unsigned int TotalNumberOfActiveDPP, bool NoChromaOrLinear) 991 { 992 bool unb_req_ok = false; 993 bool unb_req_en = false; 994 995 unb_req_ok = (TotalNumberOfActiveDPP == 1 && NoChromaOrLinear); 996 unb_req_en = unb_req_ok; 997 998 if (unb_req_force_en) { 999 unb_req_en = unb_req_force_val && unb_req_ok; 1000 } 1001 DML_LOG_VERBOSE("DML::%s: unb_req_force_en = %u\n", __func__, unb_req_force_en); 1002 DML_LOG_VERBOSE("DML::%s: unb_req_force_val = %u\n", __func__, unb_req_force_val); 1003 DML_LOG_VERBOSE("DML::%s: unb_req_ok = %u\n", __func__, unb_req_ok); 1004 DML_LOG_VERBOSE("DML::%s: unb_req_en = %u\n", __func__, unb_req_en); 1005 return unb_req_en; 1006 } 1007 1008 static void CalculateDETBufferSize( 1009 struct dml2_core_shared_CalculateDETBufferSize_locals *l, 1010 const struct dml2_display_cfg *display_cfg, 1011 bool ForceSingleDPP, 1012 unsigned int NumberOfActiveSurfaces, 1013 bool UnboundedRequestEnabled, 1014 unsigned int nomDETInKByte, 1015 unsigned int MaxTotalDETInKByte, 1016 unsigned int ConfigReturnBufferSizeInKByte, 1017 unsigned int MinCompressedBufferSizeInKByte, 1018 unsigned int ConfigReturnBufferSegmentSizeInkByte, 1019 unsigned int CompressedBufferSegmentSizeInkByte, 1020 double ReadBandwidthLuma[], 1021 double ReadBandwidthChroma[], 1022 unsigned int full_swath_bytes_l[], 1023 unsigned int full_swath_bytes_c[], 1024 unsigned int DPPPerSurface[], 1025 // Output 1026 unsigned int DETBufferSizeInKByte[], 1027 unsigned int *CompressedBufferSizeInkByte) 1028 { 1029 memset(l, 0, sizeof(struct dml2_core_shared_CalculateDETBufferSize_locals)); 1030 1031 bool DETPieceAssignedToThisSurfaceAlready[DML2_MAX_PLANES]; 1032 bool NextPotentialSurfaceToAssignDETPieceFound; 1033 bool MinimizeReallocationSuccess = false; 1034 1035 DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP); 1036 DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte); 1037 DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); 1038 DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, UnboundedRequestEnabled); 1039 DML_LOG_VERBOSE("DML::%s: MaxTotalDETInKByte = %u\n", __func__, MaxTotalDETInKByte); 1040 DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte); 1041 DML_LOG_VERBOSE("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, MinCompressedBufferSizeInKByte); 1042 DML_LOG_VERBOSE("DML::%s: CompressedBufferSegmentSizeInkByte = %u\n", __func__, CompressedBufferSegmentSizeInkByte); 1043 1044 // Note: Will use default det size if that fits 2 swaths 1045 if (UnboundedRequestEnabled) { 1046 if (display_cfg->plane_descriptors[0].overrides.det_size_override_kb > 0) { 1047 DETBufferSizeInKByte[0] = display_cfg->plane_descriptors[0].overrides.det_size_override_kb; 1048 } else { 1049 DETBufferSizeInKByte[0] = (unsigned int)math_max2(128.0, math_ceil2(2.0 * ((double)full_swath_bytes_l[0] + (double)full_swath_bytes_c[0]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte)); 1050 } 1051 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0]; 1052 } else { 1053 l->DETBufferSizePoolInKByte = MaxTotalDETInKByte; 1054 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { 1055 DETBufferSizeInKByte[k] = 0; 1056 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) { 1057 l->max_minDET = nomDETInKByte - ConfigReturnBufferSegmentSizeInkByte; 1058 } else { 1059 l->max_minDET = nomDETInKByte; 1060 } 1061 l->minDET = 128; 1062 l->minDET_pipe = 0; 1063 1064 // add DET resource until can hold 2 full swaths 1065 while (l->minDET <= l->max_minDET && l->minDET_pipe == 0) { 1066 if (2.0 * ((double)full_swath_bytes_l[k] + (double)full_swath_bytes_c[k]) / 1024.0 <= l->minDET) 1067 l->minDET_pipe = l->minDET; 1068 l->minDET = l->minDET + ConfigReturnBufferSegmentSizeInkByte; 1069 } 1070 1071 DML_LOG_VERBOSE("DML::%s: k=%u minDET = %u\n", __func__, k, l->minDET); 1072 DML_LOG_VERBOSE("DML::%s: k=%u max_minDET = %u\n", __func__, k, l->max_minDET); 1073 DML_LOG_VERBOSE("DML::%s: k=%u minDET_pipe = %u\n", __func__, k, l->minDET_pipe); 1074 DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, full_swath_bytes_l[k]); 1075 DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, full_swath_bytes_c[k]); 1076 1077 if (l->minDET_pipe == 0) { 1078 l->minDET_pipe = (unsigned int)(math_max2(128, math_ceil2(((double)full_swath_bytes_l[k] + (double)full_swath_bytes_c[k]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte))); 1079 DML_LOG_VERBOSE("DML::%s: k=%u minDET_pipe = %u (assume each plane take half DET)\n", __func__, k, l->minDET_pipe); 1080 } 1081 1082 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { 1083 DETBufferSizeInKByte[k] = 0; 1084 } else if (display_cfg->plane_descriptors[k].overrides.det_size_override_kb > 0) { 1085 DETBufferSizeInKByte[k] = display_cfg->plane_descriptors[k].overrides.det_size_override_kb; 1086 l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * display_cfg->plane_descriptors[k].overrides.det_size_override_kb; 1087 } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * l->minDET_pipe <= l->DETBufferSizePoolInKByte) { 1088 DETBufferSizeInKByte[k] = l->minDET_pipe; 1089 l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * l->minDET_pipe; 1090 } 1091 1092 DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]); 1093 DML_LOG_VERBOSE("DML::%s: k=%u DETSizeOverride = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.det_size_override_kb); 1094 DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]); 1095 DML_LOG_VERBOSE("DML::%s: DETBufferSizePoolInKByte = %u\n", __func__, l->DETBufferSizePoolInKByte); 1096 } 1097 1098 if (display_cfg->minimize_det_reallocation) { 1099 MinimizeReallocationSuccess = true; 1100 // To minimize det reallocation, we don't distribute based on each surfaces bandwidth proportional to the global 1101 // but rather distribute DET across streams proportionally based on pixel rate, and only distribute based on 1102 // bandwidth between the planes on the same stream. This ensures that large scale re-distribution only on a 1103 // stream count and/or pixel rate change, which is must less likely then general bandwidth changes per plane. 1104 1105 // Calculate total pixel rate 1106 for (unsigned int k = 0; k < display_cfg->num_streams; ++k) { 1107 l->TotalPixelRate += display_cfg->stream_descriptors[k].timing.pixel_clock_khz; 1108 } 1109 1110 // Calculate per stream DET budget 1111 for (unsigned int k = 0; k < display_cfg->num_streams; ++k) { 1112 l->DETBudgetPerStream[k] = (unsigned int)((double) display_cfg->stream_descriptors[k].timing.pixel_clock_khz * MaxTotalDETInKByte / l->TotalPixelRate); 1113 l->RemainingDETBudgetPerStream[k] = l->DETBudgetPerStream[k]; 1114 } 1115 1116 // Calculate the per stream total bandwidth 1117 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { 1118 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { 1119 l->TotalBandwidthPerStream[display_cfg->plane_descriptors[k].stream_index] += (unsigned int)(ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); 1120 1121 // Check the minimum can be satisfied by budget 1122 if (l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] >= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k])) { 1123 l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] -= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k]); 1124 } else { 1125 MinimizeReallocationSuccess = false; 1126 break; 1127 } 1128 } 1129 } 1130 1131 if (MinimizeReallocationSuccess) { 1132 // Since a fixed budget per stream is sufficient to satisfy the minimums, just re-distribute each streams 1133 // budget proportionally across its planes 1134 l->ResidualDETAfterRounding = MaxTotalDETInKByte; 1135 1136 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { 1137 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { 1138 l->IdealDETBudget = (unsigned int)(((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / l->TotalBandwidthPerStream[display_cfg->plane_descriptors[k].stream_index]) 1139 * l->DETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index]); 1140 1141 if (l->IdealDETBudget > DETBufferSizeInKByte[k]) { 1142 l->DeltaDETBudget = l->IdealDETBudget - DETBufferSizeInKByte[k]; 1143 if (l->DeltaDETBudget > l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index]) 1144 l->DeltaDETBudget = l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index]; 1145 1146 /* split the additional budgeted DET among the pipes per plane */ 1147 DETBufferSizeInKByte[k] += (unsigned int)((double)l->DeltaDETBudget / (ForceSingleDPP ? 1 : DPPPerSurface[k])); 1148 l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] -= l->DeltaDETBudget; 1149 } 1150 1151 // Round down to segment size 1152 DETBufferSizeInKByte[k] = (DETBufferSizeInKByte[k] / ConfigReturnBufferSegmentSizeInkByte) * ConfigReturnBufferSegmentSizeInkByte; 1153 1154 l->ResidualDETAfterRounding -= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k]); 1155 } 1156 } 1157 } 1158 } 1159 1160 if (!MinimizeReallocationSuccess) { 1161 l->TotalBandwidth = 0; 1162 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { 1163 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { 1164 l->TotalBandwidth = l->TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; 1165 } 1166 } 1167 DML_LOG_VERBOSE("DML::%s: --- Before bandwidth adjustment ---\n", __func__); 1168 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { 1169 DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]); 1170 } 1171 DML_LOG_VERBOSE("DML::%s: --- DET allocation with bandwidth ---\n", __func__); 1172 DML_LOG_VERBOSE("DML::%s: TotalBandwidth = %f\n", __func__, l->TotalBandwidth); 1173 l->BandwidthOfSurfacesNotAssignedDETPiece = l->TotalBandwidth; 1174 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { 1175 1176 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { 1177 DETPieceAssignedToThisSurfaceAlready[k] = true; 1178 } else if (display_cfg->plane_descriptors[k].overrides.det_size_override_kb > 0 || (((double)(ForceSingleDPP ? 1 : DPPPerSurface[k]) * (double)DETBufferSizeInKByte[k] / (double)MaxTotalDETInKByte) >= ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / l->TotalBandwidth))) { 1179 DETPieceAssignedToThisSurfaceAlready[k] = true; 1180 l->BandwidthOfSurfacesNotAssignedDETPiece = l->BandwidthOfSurfacesNotAssignedDETPiece - ReadBandwidthLuma[k] - ReadBandwidthChroma[k]; 1181 } else { 1182 DETPieceAssignedToThisSurfaceAlready[k] = false; 1183 } 1184 DML_LOG_VERBOSE("DML::%s: k=%u DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, k, DETPieceAssignedToThisSurfaceAlready[k]); 1185 DML_LOG_VERBOSE("DML::%s: k=%u BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, l->BandwidthOfSurfacesNotAssignedDETPiece); 1186 } 1187 1188 for (unsigned int j = 0; j < NumberOfActiveSurfaces; ++j) { 1189 NextPotentialSurfaceToAssignDETPieceFound = false; 1190 l->NextSurfaceToAssignDETPiece = 0; 1191 1192 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { 1193 DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthLuma[k] = %f\n", __func__, j, k, ReadBandwidthLuma[k]); 1194 DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthChroma[k] = %f\n", __func__, j, k, ReadBandwidthChroma[k]); 1195 DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]); 1196 DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]); 1197 DML_LOG_VERBOSE("DML::%s: j=%u k=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, k, l->NextSurfaceToAssignDETPiece); 1198 if (!DETPieceAssignedToThisSurfaceAlready[k] && (!NextPotentialSurfaceToAssignDETPieceFound || 1199 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece])) { 1200 l->NextSurfaceToAssignDETPiece = k; 1201 NextPotentialSurfaceToAssignDETPieceFound = true; 1202 } 1203 DML_LOG_VERBOSE("DML::%s: j=%u k=%u, DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]); 1204 DML_LOG_VERBOSE("DML::%s: j=%u k=%u, NextPotentialSurfaceToAssignDETPieceFound = %u\n", __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound); 1205 } 1206 1207 if (NextPotentialSurfaceToAssignDETPieceFound) { 1208 l->NextDETBufferPieceInKByte = (unsigned int)(math_min2( 1209 math_round((double)l->DETBufferSizePoolInKByte * (ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]) / l->BandwidthOfSurfacesNotAssignedDETPiece / 1210 ((ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte)) 1211 * (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte, 1212 math_floor2((double)l->DETBufferSizePoolInKByte, (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte))); 1213 1214 DML_LOG_VERBOSE("DML::%s: j=%u, DETBufferSizePoolInKByte = %u\n", __func__, j, l->DETBufferSizePoolInKByte); 1215 DML_LOG_VERBOSE("DML::%s: j=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, l->NextSurfaceToAssignDETPiece); 1216 DML_LOG_VERBOSE("DML::%s: j=%u, ReadBandwidthLuma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]); 1217 DML_LOG_VERBOSE("DML::%s: j=%u, ReadBandwidthChroma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]); 1218 DML_LOG_VERBOSE("DML::%s: j=%u, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, j, l->BandwidthOfSurfacesNotAssignedDETPiece); 1219 DML_LOG_VERBOSE("DML::%s: j=%u, NextDETBufferPieceInKByte = %u\n", __func__, j, l->NextDETBufferPieceInKByte); 1220 DML_LOG_VERBOSE("DML::%s: j=%u, DETBufferSizeInKByte[%u] increases from %u ", __func__, j, l->NextSurfaceToAssignDETPiece, DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]); 1221 1222 DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] = DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] + l->NextDETBufferPieceInKByte / (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]); 1223 DML_LOG_VERBOSE("to %u\n", DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]); 1224 1225 l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - l->NextDETBufferPieceInKByte; 1226 DETPieceAssignedToThisSurfaceAlready[l->NextSurfaceToAssignDETPiece] = true; 1227 l->BandwidthOfSurfacesNotAssignedDETPiece = l->BandwidthOfSurfacesNotAssignedDETPiece - (ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]); 1228 } 1229 } 1230 } 1231 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte; 1232 } 1233 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByte / ConfigReturnBufferSegmentSizeInkByte; 1234 1235 DML_LOG_VERBOSE("DML::%s: --- After bandwidth adjustment ---\n", __func__); 1236 DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *CompressedBufferSizeInkByte); 1237 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { 1238 DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u (TotalReadBandWidth=%f)\n", __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); 1239 } 1240 } 1241 1242 static double CalculateRequiredDispclk( 1243 enum dml2_odm_mode ODMMode, 1244 double PixelClock, 1245 bool isTMDS420) 1246 { 1247 double DispClk; 1248 1249 if (ODMMode == dml2_odm_mode_combine_4to1) { 1250 DispClk = PixelClock / 4.0; 1251 } else if (ODMMode == dml2_odm_mode_combine_3to1) { 1252 DispClk = PixelClock / 3.0; 1253 } else if (ODMMode == dml2_odm_mode_combine_2to1) { 1254 DispClk = PixelClock / 2.0; 1255 } else { 1256 DispClk = PixelClock; 1257 } 1258 1259 if (isTMDS420) { 1260 double TMDS420MinPixClock = PixelClock / 2.0; 1261 DispClk = math_max2(DispClk, TMDS420MinPixClock); 1262 } 1263 1264 return DispClk; 1265 } 1266 1267 static double TruncToValidBPP( 1268 struct dml2_core_shared_TruncToValidBPP_locals *l, 1269 double LinkBitRate, 1270 unsigned int Lanes, 1271 unsigned int HTotal, 1272 unsigned int HActive, 1273 double PixelClock, 1274 double DesiredBPP, 1275 bool DSCEnable, 1276 enum dml2_output_encoder_class Output, 1277 enum dml2_output_format_class Format, 1278 unsigned int DSCInputBitPerComponent, 1279 unsigned int DSCSlices, 1280 unsigned int AudioRate, 1281 unsigned int AudioLayout, 1282 enum dml2_odm_mode ODMModeNoDSC, 1283 enum dml2_odm_mode ODMModeDSC, 1284 1285 // Output 1286 unsigned int *RequiredSlots) 1287 { 1288 (void)DSCInputBitPerComponent; 1289 (void)RequiredSlots; 1290 double MaxLinkBPP; 1291 unsigned int MinDSCBPP; 1292 double MaxDSCBPP; 1293 unsigned int NonDSCBPP0; 1294 unsigned int NonDSCBPP1; 1295 unsigned int NonDSCBPP2; 1296 enum dml2_odm_mode ODMMode; 1297 1298 enum lib_frl_cap_check_status hdmifrlresult = LIB_FRL_CAP_CHECK_OK; 1299 1300 l->hdmifrlparams.lanes = (int)Lanes; 1301 l->hdmifrlparams.f_pixel_clock_nominal = PixelClock * 1000000; 1302 l->hdmifrlparams.r_bit_nominal = LinkBitRate * 1000000; 1303 l->hdmifrlparams.layout = (int)AudioLayout; 1304 l->hdmifrlparams.f_audio = AudioRate * 1000; 1305 l->hdmifrlparams.h_active = (int)HActive; 1306 l->hdmifrlparams.h_blank = (int)(HTotal - HActive); 1307 l->hdmifrlparams.bpc = (int)(DesiredBPP / 3); 1308 l->hdmifrlparams.compressed = DSCEnable; 1309 l->hdmifrlparams.slices = (int)DSCSlices; 1310 l->hdmifrlparams.slice_width = (int)(math_ceil2((double)HActive / DSCSlices, 1.0)); 1311 l->hdmifrlparams.bpp_target = DesiredBPP; 1312 if (Format == dml2_420) { 1313 NonDSCBPP0 = 12; 1314 NonDSCBPP1 = 15; 1315 NonDSCBPP2 = 18; 1316 MinDSCBPP = 6; 1317 MaxDSCBPP = 16; 1318 l->hdmifrlparams.pixel_encoding = LIB_FRL_CAP_CHECK_PIXEL_ENCODING_420; 1319 l->hdmifrlparams.bpc = (int)(DesiredBPP / 1.5); 1320 } else if (Format == dml2_444) { 1321 NonDSCBPP0 = 24; 1322 NonDSCBPP1 = 30; 1323 NonDSCBPP2 = 36; 1324 MinDSCBPP = 8; 1325 MaxDSCBPP = 16; 1326 l->hdmifrlparams.pixel_encoding = LIB_FRL_CAP_CHECK_PIXEL_ENCODING_444; 1327 l->hdmifrlparams.bpc = (int)(DesiredBPP / 3.0); 1328 } else { 1329 l->hdmifrlparams.pixel_encoding = LIB_FRL_CAP_CHECK_PIXEL_ENCODING_422; 1330 l->hdmifrlparams.bpc = (int)(DesiredBPP / 2.0); 1331 1332 if (Output == dml2_hdmi || Output == dml2_hdmifrl) { 1333 NonDSCBPP0 = 24; 1334 NonDSCBPP1 = 24; 1335 NonDSCBPP2 = 24; 1336 } else { 1337 NonDSCBPP0 = 16; 1338 NonDSCBPP1 = 20; 1339 NonDSCBPP2 = 24; 1340 } 1341 if (Format == dml2_n422 || Output == dml2_hdmifrl) { 1342 MinDSCBPP = 7; 1343 MaxDSCBPP = 16; 1344 } else { 1345 MinDSCBPP = 8; 1346 MaxDSCBPP = 16; 1347 } 1348 } 1349 1350 if (Output == dml2_hdmifrl) { 1351 hdmifrlresult = frl_cap_check_intermediates(&l->hdmifrlparams, &l->hdmifrlinter); 1352 MaxLinkBPP = (1 - l->hdmifrlinter.overhead_max) * math_min2(l->hdmifrlinter.r_frl_char_min * 16.0 * (double)Lanes / l->hdmifrlinter.f_pixel_clock_max + 24.0 * (double)DML2_FRL_CHK_TB_BORROWED_MAX / (double)HActive, 1353 (l->hdmifrlinter.r_frl_char_min * 16.0 * (double)Lanes / l->hdmifrlinter.f_pixel_clock_max * (double)HTotal - 16.0 * (double)l->hdmifrlinter.blank_audio_min) / (double)HActive); 1354 } else if (Output == dml2_dp2p0) { 1355 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128.0 / 132.0 * 383.0 / 384.0 * 65536.0 / 65540.0; 1356 } else if (DSCEnable && Output == dml2_dp) { 1357 MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock * (1 - 2.4 / 100); 1358 } else { 1359 MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock; 1360 } 1361 1362 ODMMode = DSCEnable ? ODMModeDSC : ODMModeNoDSC; 1363 1364 if (ODMMode == dml2_odm_mode_split_1to2) { 1365 MaxLinkBPP = 2 * MaxLinkBPP; 1366 } 1367 1368 if (DesiredBPP == 0) { 1369 if (DSCEnable) { 1370 if (MaxLinkBPP < MinDSCBPP) { 1371 return __DML2_CALCS_DPP_INVALID__; 1372 } else if (MaxLinkBPP >= MaxDSCBPP) { 1373 return MaxDSCBPP; 1374 } else { 1375 return math_floor2(16.0 * MaxLinkBPP, 1.0) / 16.0; 1376 } 1377 } else { 1378 if (MaxLinkBPP >= NonDSCBPP2) { 1379 return NonDSCBPP2; 1380 } else if (MaxLinkBPP >= NonDSCBPP1) { 1381 return NonDSCBPP1; 1382 } else if (MaxLinkBPP >= NonDSCBPP0) { 1383 return NonDSCBPP0; 1384 } else { 1385 return __DML2_CALCS_DPP_INVALID__; 1386 } 1387 } 1388 } else { 1389 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0)) || 1390 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) { 1391 return __DML2_CALCS_DPP_INVALID__; 1392 } else if ((Output == dml2_hdmifrl && hdmifrlresult != LIB_FRL_CAP_CHECK_OK) || (Output != dml2_hdmifrl && MaxLinkBPP < DesiredBPP)) { 1393 return __DML2_CALCS_DPP_INVALID__; 1394 } else { 1395 return DesiredBPP; 1396 } 1397 } 1398 } 1399 1400 // updated for dcn4 1401 static unsigned int dscceComputeDelay( 1402 unsigned int bpc, 1403 double BPP, 1404 unsigned int sliceWidth, 1405 unsigned int numSlices, 1406 enum dml2_output_format_class pixelFormat, 1407 enum dml2_output_encoder_class Output) 1408 { 1409 // valid bpc = source bits per component in the set of {8, 10, 12} 1410 // valid bpp = increments of 1/16 of a bit 1411 // min = 6/7/8 in N420/N422/444, respectively 1412 // max = such that compression is 1:1 1413 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) 1414 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} 1415 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} 1416 1417 // fixed value 1418 unsigned int rcModelSize = 8192; 1419 1420 // N422/N420 operate at 2 pixels per clock 1421 unsigned int pixelsPerClock, padding_pixels, ssm_group_priming_delay, ssm_pipeline_delay, obsm_pipeline_delay, slice_padded_pixels, ixd_plus_padding, ixd_plus_padding_groups, cycles_per_group, group_delay, pipeline_delay, pixels, additional_group_delay, lines_to_reach_ixd, groups_to_reach_ixd, slice_width_groups, initial_xmit_delay, number_of_lines_to_reach_ixd, slice_width_modified; 1422 1423 if (pixelFormat == dml2_420) 1424 pixelsPerClock = 2; 1425 // #all other modes operate at 1 pixel per clock 1426 else if (pixelFormat == dml2_444) 1427 pixelsPerClock = 1; 1428 else if (pixelFormat == dml2_n422 || Output == dml2_hdmifrl) 1429 pixelsPerClock = 2; 1430 else 1431 pixelsPerClock = 1; 1432 1433 //initial transmit delay as per PPS 1434 initial_xmit_delay = (unsigned int)(math_round(rcModelSize / 2.0 / BPP / pixelsPerClock)); 1435 1436 //slice width as seen by dscc_bcl in pixels or pixels pairs (depending on number of pixels per pixel container based on pixel format) 1437 slice_width_modified = (pixelFormat == dml2_444 || pixelFormat == dml2_420 || Output == dml2_hdmifrl) ? sliceWidth / 2 : sliceWidth; 1438 1439 padding_pixels = ((slice_width_modified % 3) != 0) ? (3 - (slice_width_modified % 3)) * (initial_xmit_delay / slice_width_modified) : 0; 1440 1441 if ((3.0 * pixelsPerClock * BPP) >= ((double)((initial_xmit_delay + 2) / 3) * (double)(3 + (pixelFormat == dml2_n422)))) { 1442 if ((initial_xmit_delay + padding_pixels) % 3 == 1) { 1443 initial_xmit_delay++; 1444 } 1445 } 1446 1447 //sub-stream multiplexer balance fifo priming delay in groups as per dsc standard 1448 if (bpc == 8) 1449 ssm_group_priming_delay = 83; 1450 else if (bpc == 10) 1451 ssm_group_priming_delay = 91; 1452 else if (bpc == 12) 1453 ssm_group_priming_delay = 115; 1454 else if (bpc == 14) 1455 ssm_group_priming_delay = 123; 1456 else 1457 ssm_group_priming_delay = 128; 1458 1459 //slice width in groups is rounded up to the nearest group as DSC adds padded pixels such that there are an integer number of groups per slice 1460 slice_width_groups = (slice_width_modified + 2) / 3; 1461 1462 //determine number of padded pixels in the last group of a slice line, computed as 1463 slice_padded_pixels = 3 * slice_width_groups - slice_width_modified; 1464 1465 //determine integer number of complete slice lines required to reach initial transmit delay without ssm delay considered 1466 number_of_lines_to_reach_ixd = initial_xmit_delay / slice_width_modified; 1467 1468 //increase initial transmit delay by the number of padded pixels added to a slice line multipled by the integer number of complete lines to reach initial transmit delay 1469 //this step is necessary as each padded pixel added takes up a clock cycle and, therefore, adds to the overall delay 1470 ixd_plus_padding = initial_xmit_delay + slice_padded_pixels * number_of_lines_to_reach_ixd; 1471 1472 //convert the padded initial transmit delay from pixels to groups by rounding up to the nearest group as DSC processes in groups of pixels 1473 ixd_plus_padding_groups = (ixd_plus_padding + 2) / 3; 1474 1475 //number of groups required for a slice to reach initial transmit delay is the sum of the padded initial transmit delay plus the ssm group priming delay 1476 groups_to_reach_ixd = ixd_plus_padding_groups + ssm_group_priming_delay; 1477 1478 //number of lines required to reach padded initial transmit delay in groups in slices to the left of the last horizontal slice 1479 //needs to be rounded up as a complete slice lines are buffered prior to initial transmit delay being reached in the last horizontal slice 1480 lines_to_reach_ixd = (groups_to_reach_ixd + slice_width_groups - 1) / slice_width_groups; //round up lines to reach ixd to next 1481 1482 //determine if there are non-zero number of pixels reached in the group where initial transmit delay is reached 1483 //an additional group time (i.e., 3 pixel times) is required before the first output if there are no additional pixels beyond initial transmit delay 1484 additional_group_delay = ((initial_xmit_delay - number_of_lines_to_reach_ixd * slice_width_modified) % 3) == 0 ? 1 : 0; 1485 1486 //number of pipeline delay cycles in the ssm block (can be determined empirically or analytically by inspecting the ssm block) 1487 ssm_pipeline_delay = 2; 1488 1489 //number of pipe delay cycles in the obsm block (can be determined empirically or analytically by inspecting the obsm block) 1490 obsm_pipeline_delay = 1; 1491 1492 //a group of pixels is worth 6 pixels in N422/N420 mode or 3 pixels in all other modes 1493 if (pixelFormat == dml2_420 || pixelFormat == dml2_444 || pixelFormat == dml2_n422 || Output == dml2_hdmifrl) 1494 cycles_per_group = 6; 1495 else 1496 cycles_per_group = 3; 1497 //delay of the bit stream contruction layer in pixels is the sum of: 1498 //1. number of pixel containers in a slice line multipled by the number of lines required to reach initial transmit delay multipled by number of slices to the left of the last horizontal slice 1499 //2. number of pixel containers required to reach initial transmit delay (specifically, in the last horizontal slice) 1500 //3. additional group of delay if initial transmit delay is reached exactly in a group 1501 //4. ssm and obsm pipeline delay (i.e., clock cycles of delay) 1502 group_delay = (lines_to_reach_ixd * slice_width_groups * (numSlices - 1)) + groups_to_reach_ixd + additional_group_delay; 1503 pipeline_delay = ssm_pipeline_delay + obsm_pipeline_delay; 1504 1505 //pixel delay is group_delay (converted to pixels) + pipeline, however, first group is a special case since it is processed as soon as it arrives (i.e., in 3 cycles regardless of pixel format) 1506 pixels = (group_delay - 1) * cycles_per_group + 3 + pipeline_delay; 1507 1508 DML_LOG_VERBOSE("DML::%s: bpc: %u\n", __func__, bpc); 1509 DML_LOG_VERBOSE("DML::%s: BPP: %f\n", __func__, BPP); 1510 DML_LOG_VERBOSE("DML::%s: sliceWidth: %u\n", __func__, sliceWidth); 1511 DML_LOG_VERBOSE("DML::%s: numSlices: %u\n", __func__, numSlices); 1512 DML_LOG_VERBOSE("DML::%s: pixelFormat: %u\n", __func__, pixelFormat); 1513 DML_LOG_VERBOSE("DML::%s: Output: %u\n", __func__, Output); 1514 DML_LOG_VERBOSE("DML::%s: pixels: %u\n", __func__, pixels); 1515 return pixels; 1516 } 1517 1518 //updated in dcn4 1519 static unsigned int dscComputeDelay(enum dml2_output_format_class pixelFormat, enum dml2_output_encoder_class Output) 1520 { 1521 unsigned int Delay = 0; 1522 unsigned int dispclk_per_dscclk = 3; 1523 1524 // sfr 1525 Delay = Delay + 2; 1526 1527 if (pixelFormat == dml2_420 || pixelFormat == dml2_n422 || (Output == dml2_hdmifrl && pixelFormat != dml2_444)) { 1528 dispclk_per_dscclk = 3 * 2; 1529 } 1530 1531 if (pixelFormat == dml2_420) { 1532 //dscc top delay for pixel compression layer 1533 Delay = Delay + 16 * dispclk_per_dscclk; 1534 1535 // dscc - input deserializer 1536 Delay = Delay + 5; 1537 1538 // dscc - input cdc fifo 1539 Delay = Delay + 1 + 4 * dispclk_per_dscclk; 1540 1541 // dscc - output cdc fifo 1542 Delay = Delay + 3 + 1 * dispclk_per_dscclk; 1543 1544 // dscc - cdc uncertainty 1545 Delay = Delay + 3 + 3 * dispclk_per_dscclk; 1546 } else if (pixelFormat == dml2_n422 || (Output == dml2_hdmifrl && pixelFormat != dml2_444)) { 1547 //dscc top delay for pixel compression layer 1548 Delay = Delay + 16 * dispclk_per_dscclk; 1549 // dsccif 1550 Delay = Delay + 1; 1551 // dscc - input deserializer 1552 Delay = Delay + 5; 1553 // dscc - input cdc fifo 1554 Delay = Delay + 1 + 4 * dispclk_per_dscclk; 1555 1556 1557 // dscc - output cdc fifo 1558 Delay = Delay + 3 + 1 * dispclk_per_dscclk; 1559 // dscc - cdc uncertainty 1560 Delay = Delay + 3 + 3 * dispclk_per_dscclk; 1561 } else if (pixelFormat == dml2_s422) { 1562 //dscc top delay for pixel compression layer 1563 Delay = Delay + 17 * dispclk_per_dscclk; 1564 1565 // dscc - input deserializer 1566 Delay = Delay + 3; 1567 // dscc - input cdc fifo 1568 Delay = Delay + 1 + 4 * dispclk_per_dscclk; 1569 // dscc - output cdc fifo 1570 Delay = Delay + 3 + 1 * dispclk_per_dscclk; 1571 // dscc - cdc uncertainty 1572 Delay = Delay + 3 + 3 * dispclk_per_dscclk; 1573 } else { 1574 //dscc top delay for pixel compression layer 1575 Delay = Delay + 16 * dispclk_per_dscclk; 1576 // dscc - input deserializer 1577 Delay = Delay + 3; 1578 // dscc - input cdc fifo 1579 Delay = Delay + 1 + 4 * dispclk_per_dscclk; 1580 // dscc - output cdc fifo 1581 Delay = Delay + 3 + 1 * dispclk_per_dscclk; 1582 1583 // dscc - cdc uncertainty 1584 Delay = Delay + 3 + 3 * dispclk_per_dscclk; 1585 } 1586 1587 // sft 1588 Delay = Delay + 1; 1589 DML_LOG_VERBOSE("DML::%s: pixelFormat = %u\n", __func__, pixelFormat); 1590 DML_LOG_VERBOSE("DML::%s: Delay = %u\n", __func__, Delay); 1591 1592 return Delay; 1593 } 1594 1595 static unsigned int CalculateHostVMDynamicLevels( 1596 bool GPUVMEnable, 1597 bool HostVMEnable, 1598 unsigned int HostVMMinPageSize, 1599 unsigned int HostVMMaxNonCachedPageTableLevels) 1600 { 1601 unsigned int HostVMDynamicLevels = 0; 1602 1603 if (GPUVMEnable && HostVMEnable) { 1604 if (HostVMMinPageSize < 2048) 1605 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 1606 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) 1607 HostVMDynamicLevels = (unsigned int)math_max2(0, (double)HostVMMaxNonCachedPageTableLevels - 1); 1608 else 1609 HostVMDynamicLevels = (unsigned int)math_max2(0, (double)HostVMMaxNonCachedPageTableLevels - 2); 1610 } else { 1611 HostVMDynamicLevels = 0; 1612 } 1613 return HostVMDynamicLevels; 1614 } 1615 1616 static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_and_row_bytes_params *p) 1617 { 1618 unsigned int extra_dpde_bytes; 1619 unsigned int extra_mpde_bytes; 1620 unsigned int MacroTileSizeBytes; 1621 unsigned int vp_height_dpte_ub; 1622 1623 unsigned int meta_surface_bytes; 1624 unsigned int vm_bytes; 1625 unsigned int vp_height_meta_ub; 1626 unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this 1627 1628 *p->MetaRequestHeight = 8 * p->BlockHeight256Bytes; 1629 *p->MetaRequestWidth = 8 * p->BlockWidth256Bytes; 1630 if (p->SurfaceTiling == dml2_sw_linear) { 1631 *p->meta_row_height = 32; 1632 *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->MetaRequestWidth - 1, *p->MetaRequestWidth) - math_floor2(p->ViewportXStart, *p->MetaRequestWidth)); 1633 *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestHeight * p->BytePerPixel / 256.0); // FIXME_DCN4SW missing in old code but no dcc for linear anyways? 1634 } else if (!dml_is_vertical_rotation(p->RotationAngle)) { 1635 *p->meta_row_height = *p->MetaRequestHeight; 1636 if (p->ViewportStationary && p->NumberOfDPPs == 1) { 1637 *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->MetaRequestWidth - 1, *p->MetaRequestWidth) - math_floor2(p->ViewportXStart, *p->MetaRequestWidth)); 1638 } else { 1639 *p->meta_row_width = (unsigned int)(math_ceil2(p->SwathWidth - 1, *p->MetaRequestWidth) + *p->MetaRequestWidth); 1640 } 1641 *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestHeight * p->BytePerPixel / 256.0); 1642 } else { 1643 *p->meta_row_height = *p->MetaRequestWidth; 1644 if (p->ViewportStationary && p->NumberOfDPPs == 1) { 1645 *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + *p->MetaRequestHeight - 1, *p->MetaRequestHeight) - math_floor2(p->ViewportYStart, *p->MetaRequestHeight)); 1646 } else { 1647 *p->meta_row_width = (unsigned int)(math_ceil2(p->SwathWidth - 1, *p->MetaRequestHeight) + *p->MetaRequestHeight); 1648 } 1649 *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestWidth * p->BytePerPixel / 256.0); 1650 } 1651 1652 if (p->ViewportStationary && p->is_phantom && (p->NumberOfDPPs == 1 || !dml_is_vertical_rotation(p->RotationAngle))) { 1653 vp_height_meta_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + 64 * p->BlockHeight256Bytes - 1, 64 * p->BlockHeight256Bytes) - math_floor2(p->ViewportYStart, 64 * p->BlockHeight256Bytes)); 1654 } else if (!dml_is_vertical_rotation(p->RotationAngle)) { 1655 vp_height_meta_ub = (unsigned int)(math_ceil2(p->ViewportHeight - 1, 64 * p->BlockHeight256Bytes) + 64 * p->BlockHeight256Bytes); 1656 } else { 1657 vp_height_meta_ub = (unsigned int)(math_ceil2(p->SwathWidth - 1, 64 * p->BlockHeight256Bytes) + 64 * p->BlockHeight256Bytes); 1658 } 1659 1660 meta_surface_bytes = (unsigned int)(p->DCCMetaPitch * vp_height_meta_ub * p->BytePerPixel / 256.0); 1661 DML_LOG_VERBOSE("DML::%s: DCCMetaPitch = %u\n", __func__, p->DCCMetaPitch); 1662 DML_LOG_VERBOSE("DML::%s: meta_surface_bytes = %u\n", __func__, meta_surface_bytes); 1663 if (p->GPUVMEnable == true) { 1664 double meta_vmpg_bytes = 4.0 * 1024.0; 1665 *p->meta_pte_bytes_per_frame_ub = (unsigned int)((math_ceil2((double) (meta_surface_bytes - meta_vmpg_bytes) / (8 * meta_vmpg_bytes), 1) + 1) * 64); 1666 extra_mpde_bytes = 128 * (p->GPUVMMaxPageTableLevels - 1); 1667 } else { 1668 *p->meta_pte_bytes_per_frame_ub = 0; 1669 extra_mpde_bytes = 0; 1670 } 1671 1672 if (!p->DCCEnable || !p->mrq_present) { 1673 *p->meta_pte_bytes_per_frame_ub = 0; 1674 extra_mpde_bytes = 0; 1675 *p->meta_row_bytes = 0; 1676 } 1677 1678 if (!p->GPUVMEnable) { 1679 *p->PixelPTEBytesPerRow = 0; 1680 *p->PixelPTEBytesPerRowStorage = 0; 1681 *p->dpte_row_width_ub = 0; 1682 *p->dpte_row_height = 0; 1683 *p->dpte_row_height_linear = 0; 1684 *p->PixelPTEBytesPerRow_one_row_per_frame = 0; 1685 *p->dpte_row_width_ub_one_row_per_frame = 0; 1686 *p->dpte_row_height_one_row_per_frame = 0; 1687 *p->vmpg_width = 0; 1688 *p->vmpg_height = 0; 1689 *p->PixelPTEReqWidth = 0; 1690 *p->PixelPTEReqHeight = 0; 1691 *p->PTERequestSize = 0; 1692 *p->dpde0_bytes_per_frame_ub = 0; 1693 return 0; 1694 } 1695 1696 MacroTileSizeBytes = p->MacroTileWidth * p->BytePerPixel * p->MacroTileHeight; 1697 1698 if (p->ViewportStationary && p->is_phantom && (p->NumberOfDPPs == 1 || !dml_is_vertical_rotation(p->RotationAngle))) { 1699 vp_height_dpte_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + p->MacroTileHeight - 1, p->MacroTileHeight) - math_floor2(p->ViewportYStart, p->MacroTileHeight)); 1700 } else if (!dml_is_vertical_rotation(p->RotationAngle)) { 1701 vp_height_dpte_ub = (unsigned int)(math_ceil2((double)p->ViewportHeight - 1, p->MacroTileHeight) + p->MacroTileHeight); 1702 } else { 1703 vp_height_dpte_ub = (unsigned int)(math_ceil2((double)p->SwathWidth - 1, p->MacroTileHeight) + p->MacroTileHeight); 1704 } 1705 1706 if (p->GPUVMEnable == true && p->GPUVMMaxPageTableLevels > 1) { 1707 *p->dpde0_bytes_per_frame_ub = (unsigned int)(64 * (math_ceil2((double)(p->Pitch * vp_height_dpte_ub * p->BytePerPixel - MacroTileSizeBytes) / (double)(8 * 2097152), 1) + 1)); 1708 extra_dpde_bytes = 128 * (p->GPUVMMaxPageTableLevels - 2); 1709 } else { 1710 *p->dpde0_bytes_per_frame_ub = 0; 1711 extra_dpde_bytes = 0; 1712 } 1713 1714 vm_bytes = *p->meta_pte_bytes_per_frame_ub + extra_mpde_bytes + *p->dpde0_bytes_per_frame_ub + extra_dpde_bytes; 1715 1716 DML_LOG_VERBOSE("DML::%s: DCCEnable = %u\n", __func__, p->DCCEnable); 1717 DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable); 1718 DML_LOG_VERBOSE("DML::%s: SwModeLinear = %u\n", __func__, p->SurfaceTiling == dml2_sw_linear); 1719 DML_LOG_VERBOSE("DML::%s: BytePerPixel = %u\n", __func__, p->BytePerPixel); 1720 DML_LOG_VERBOSE("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, p->GPUVMMaxPageTableLevels); 1721 DML_LOG_VERBOSE("DML::%s: BlockHeight256Bytes = %u\n", __func__, p->BlockHeight256Bytes); 1722 DML_LOG_VERBOSE("DML::%s: BlockWidth256Bytes = %u\n", __func__, p->BlockWidth256Bytes); 1723 DML_LOG_VERBOSE("DML::%s: MacroTileHeight = %u\n", __func__, p->MacroTileHeight); 1724 DML_LOG_VERBOSE("DML::%s: MacroTileWidth = %u\n", __func__, p->MacroTileWidth); 1725 DML_LOG_VERBOSE("DML::%s: meta_pte_bytes_per_frame_ub = %u\n", __func__, *p->meta_pte_bytes_per_frame_ub); 1726 DML_LOG_VERBOSE("DML::%s: dpde0_bytes_per_frame_ub = %u\n", __func__, *p->dpde0_bytes_per_frame_ub); 1727 DML_LOG_VERBOSE("DML::%s: extra_mpde_bytes = %u\n", __func__, extra_mpde_bytes); 1728 DML_LOG_VERBOSE("DML::%s: extra_dpde_bytes = %u\n", __func__, extra_dpde_bytes); 1729 DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, vm_bytes); 1730 DML_LOG_VERBOSE("DML::%s: ViewportHeight = %u\n", __func__, p->ViewportHeight); 1731 DML_LOG_VERBOSE("DML::%s: SwathWidth = %u\n", __func__, p->SwathWidth); 1732 DML_LOG_VERBOSE("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub); 1733 1734 if (p->SurfaceTiling == dml2_sw_linear) { 1735 *p->PixelPTEReqHeight = 1; 1736 *p->PixelPTEReqWidth = p->GPUVMMinPageSizeKBytes * 1024 * 8 / p->BytePerPixel; 1737 PixelPTEReqWidth_linear = p->GPUVMMinPageSizeKBytes * 1024 * 8 / p->BytePerPixel; 1738 *p->PTERequestSize = 64; 1739 1740 *p->vmpg_height = 1; 1741 *p->vmpg_width = p->GPUVMMinPageSizeKBytes * 1024 / p->BytePerPixel; 1742 } else if (p->GPUVMMinPageSizeKBytes * 1024 >= dml_get_tile_block_size_bytes(p->SurfaceTiling)) { // 1 64B 8x1 PTE 1743 *p->PixelPTEReqHeight = p->MacroTileHeight; 1744 *p->PixelPTEReqWidth = 8 * 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel); 1745 *p->PTERequestSize = 64; 1746 1747 *p->vmpg_height = p->MacroTileHeight; 1748 *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel); 1749 1750 } else if (p->GPUVMMinPageSizeKBytes == 4 && dml_get_tile_block_size_bytes(p->SurfaceTiling) == 65536) { // 2 64B PTE requests to get 16 PTEs to cover the 64K tile 1751 // one 64KB tile, is 16x16x256B req 1752 *p->PixelPTEReqHeight = 16 * p->BlockHeight256Bytes; 1753 *p->PixelPTEReqWidth = 16 * p->BlockWidth256Bytes; 1754 *p->PTERequestSize = 128; 1755 1756 *p->vmpg_height = *p->PixelPTEReqHeight; 1757 *p->vmpg_width = *p->PixelPTEReqWidth; 1758 } else { 1759 // default for rest of calculation to go through, when vm is disable, the calulated pte related values shouldnt be used anyways 1760 *p->PixelPTEReqHeight = p->MacroTileHeight; 1761 *p->PixelPTEReqWidth = 8 * 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel); 1762 *p->PTERequestSize = 64; 1763 1764 *p->vmpg_height = p->MacroTileHeight; 1765 *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel); 1766 1767 if (p->GPUVMEnable == true) { 1768 DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes=%u and sw_mode=%u (tile_size=%d) not supported!\n", 1769 __func__, p->GPUVMMinPageSizeKBytes, p->SurfaceTiling, dml_get_tile_block_size_bytes(p->SurfaceTiling)); 1770 DML_ASSERT(0); 1771 } 1772 } 1773 1774 DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes); 1775 DML_LOG_VERBOSE("DML::%s: PixelPTEReqHeight = %u\n", __func__, *p->PixelPTEReqHeight); 1776 DML_LOG_VERBOSE("DML::%s: PixelPTEReqWidth = %u\n", __func__, *p->PixelPTEReqWidth); 1777 DML_LOG_VERBOSE("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear); 1778 DML_LOG_VERBOSE("DML::%s: PTERequestSize = %u\n", __func__, *p->PTERequestSize); 1779 DML_LOG_VERBOSE("DML::%s: Pitch = %u\n", __func__, p->Pitch); 1780 DML_LOG_VERBOSE("DML::%s: vmpg_width = %u\n", __func__, *p->vmpg_width); 1781 DML_LOG_VERBOSE("DML::%s: vmpg_height = %u\n", __func__, *p->vmpg_height); 1782 1783 *p->dpte_row_height_one_row_per_frame = vp_height_dpte_ub; 1784 *p->dpte_row_width_ub_one_row_per_frame = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height_one_row_per_frame / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * (double)*p->PixelPTEReqWidth); 1785 *p->PixelPTEBytesPerRow_one_row_per_frame = (unsigned int)((double)*p->dpte_row_width_ub_one_row_per_frame / (double)*p->PixelPTEReqWidth * *p->PTERequestSize); 1786 *p->dpte_row_height_linear = 0; 1787 1788 if (p->SurfaceTiling == dml2_sw_linear) { 1789 *p->dpte_row_height = (unsigned int)(math_min2(128, (double)(1ULL << (unsigned int)math_floor2(math_log((float)(p->PTEBufferSizeInRequests * *p->PixelPTEReqWidth / p->Pitch), 2.0), 1)))); 1790 *p->dpte_row_width_ub = (unsigned int)(math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height - 1), (double)*p->PixelPTEReqWidth) + *p->PixelPTEReqWidth); 1791 *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqWidth * *p->PTERequestSize); 1792 1793 // VBA_DELTA, VBA doesn't have programming value for pte row height linear. 1794 *p->dpte_row_height_linear = 1U << (unsigned int)math_floor2(math_log((float)(p->PTEBufferSizeInRequests * PixelPTEReqWidth_linear / p->Pitch), 2.0), 1); 1795 if (*p->dpte_row_height_linear > 128) 1796 *p->dpte_row_height_linear = 128; 1797 1798 #ifdef __DML_VBA_DEBUG__ 1799 DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *p->dpte_row_width_ub); 1800 #endif 1801 1802 } else if (!dml_is_vertical_rotation(p->RotationAngle)) { 1803 *p->dpte_row_height = *p->PixelPTEReqHeight; 1804 1805 if (p->GPUVMMinPageSizeKBytes > 64) { 1806 *p->dpte_row_width_ub = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * *p->PixelPTEReqWidth); 1807 } else if (p->ViewportStationary && (p->NumberOfDPPs == 1)) { 1808 *p->dpte_row_width_ub = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->PixelPTEReqWidth - 1, *p->PixelPTEReqWidth) - math_floor2(p->ViewportXStart, *p->PixelPTEReqWidth)); 1809 } else { 1810 *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqWidth, 1) + 1.0) * *p->PixelPTEReqWidth); 1811 } 1812 #ifdef __DML_VBA_DEBUG__ 1813 DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *p->dpte_row_width_ub); 1814 #endif 1815 1816 *p->PixelPTEBytesPerRow = *p->dpte_row_width_ub / *p->PixelPTEReqWidth * *p->PTERequestSize; 1817 } else { 1818 *p->dpte_row_height = (unsigned int)(math_min2(*p->PixelPTEReqWidth, p->MacroTileWidth)); 1819 1820 if (p->ViewportStationary && (p->NumberOfDPPs == 1)) { 1821 *p->dpte_row_width_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + *p->PixelPTEReqHeight - 1, *p->PixelPTEReqHeight) - math_floor2(p->ViewportYStart, *p->PixelPTEReqHeight)); 1822 } else { 1823 *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqHeight, 1) + 1) * *p->PixelPTEReqHeight); 1824 } 1825 1826 *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqHeight * *p->PTERequestSize); 1827 #ifdef __DML_VBA_DEBUG__ 1828 DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *p->dpte_row_width_ub); 1829 #endif 1830 } 1831 1832 if (p->GPUVMEnable != true) { 1833 *p->PixelPTEBytesPerRow = 0; 1834 *p->PixelPTEBytesPerRow_one_row_per_frame = 0; 1835 } 1836 1837 *p->PixelPTEBytesPerRowStorage = *p->PixelPTEBytesPerRow; 1838 1839 #ifdef __DML_VBA_DEBUG__ 1840 DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes); 1841 DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable); 1842 DML_LOG_VERBOSE("DML::%s: meta_row_height = %u\n", __func__, *p->meta_row_height); 1843 DML_LOG_VERBOSE("DML::%s: dpte_row_height = %u\n", __func__, *p->dpte_row_height); 1844 DML_LOG_VERBOSE("DML::%s: dpte_row_height_linear = %u\n", __func__, *p->dpte_row_height_linear); 1845 DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u\n", __func__, *p->dpte_row_width_ub); 1846 DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *p->PixelPTEBytesPerRow); 1847 DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *p->PixelPTEBytesPerRowStorage); 1848 DML_LOG_VERBOSE("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, p->PTEBufferSizeInRequests); 1849 DML_LOG_VERBOSE("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *p->dpte_row_height_one_row_per_frame); 1850 DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *p->dpte_row_width_ub_one_row_per_frame); 1851 DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *p->PixelPTEBytesPerRow_one_row_per_frame); 1852 #endif 1853 1854 return vm_bytes; 1855 } // CalculateVMAndRowBytes 1856 1857 static unsigned int CalculatePrefetchSourceLines( 1858 double VRatio, 1859 unsigned int VTaps, 1860 bool Interlace, 1861 bool ProgressiveToInterlaceUnitInOPP, 1862 unsigned int SwathHeight, 1863 enum dml2_rotation_angle RotationAngle, 1864 bool mirrored, 1865 bool ViewportStationary, 1866 unsigned int SwathWidth, 1867 unsigned int ViewportHeight, 1868 unsigned int ViewportXStart, 1869 unsigned int ViewportYStart, 1870 1871 // Output 1872 unsigned int *VInitPreFill, 1873 unsigned int *MaxNumSwath) 1874 { 1875 1876 unsigned int vp_start_rot = 0; 1877 unsigned int sw0_tmp = 0; 1878 unsigned int MaxPartialSwath = 0; 1879 double numLines = 0; 1880 1881 #ifdef __DML_VBA_DEBUG__ 1882 DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio); 1883 DML_LOG_VERBOSE("DML::%s: VTaps = %u\n", __func__, VTaps); 1884 DML_LOG_VERBOSE("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart); 1885 DML_LOG_VERBOSE("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart); 1886 DML_LOG_VERBOSE("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary); 1887 DML_LOG_VERBOSE("DML::%s: SwathHeight = %u\n", __func__, SwathHeight); 1888 #endif 1889 if (ProgressiveToInterlaceUnitInOPP) 1890 *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1) / 2.0, 1)); 1891 else 1892 *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1 + (Interlace ? 1 : 0) * 0.5 * VRatio) / 2.0, 1)); 1893 1894 if (ViewportStationary) { 1895 if (RotationAngle == dml2_rotation_180) { 1896 vp_start_rot = SwathHeight - (((unsigned int)(ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1); 1897 } else if ((RotationAngle == dml2_rotation_270 && !mirrored) || (RotationAngle == dml2_rotation_90 && mirrored)) { 1898 vp_start_rot = ViewportXStart; 1899 } else if ((RotationAngle == dml2_rotation_90 && !mirrored) || (RotationAngle == dml2_rotation_270 && mirrored)) { 1900 vp_start_rot = SwathHeight - (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1); 1901 } else { 1902 vp_start_rot = ViewportYStart; 1903 } 1904 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight); 1905 if (sw0_tmp < *VInitPreFill) { 1906 *MaxNumSwath = (unsigned int)(math_ceil2((*VInitPreFill - sw0_tmp) / (double)SwathHeight, 1) + 1); 1907 } else { 1908 *MaxNumSwath = 1; 1909 } 1910 MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(vp_start_rot + *VInitPreFill - 1) % SwathHeight)); 1911 } else { 1912 *MaxNumSwath = (unsigned int)(math_ceil2((*VInitPreFill - 1.0) / (double)SwathHeight, 1) + 1); 1913 if (*VInitPreFill > 1) { 1914 MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(*VInitPreFill - 2) % SwathHeight)); 1915 } else { 1916 MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(*VInitPreFill + SwathHeight - 2) % SwathHeight)); 1917 } 1918 } 1919 numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath; 1920 1921 #ifdef __DML_VBA_DEBUG__ 1922 DML_LOG_VERBOSE("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot); 1923 DML_LOG_VERBOSE("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill); 1924 DML_LOG_VERBOSE("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath); 1925 DML_LOG_VERBOSE("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath); 1926 DML_LOG_VERBOSE("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines); 1927 #endif 1928 return (unsigned int)(numLines); 1929 1930 } 1931 1932 static void CalculateRowBandwidth( 1933 bool GPUVMEnable, 1934 bool use_one_row_for_frame, 1935 enum dml2_source_format_class SourcePixelFormat, 1936 double VRatio, 1937 double VRatioChroma, 1938 bool DCCEnable, 1939 double LineTime, 1940 unsigned int PixelPTEBytesPerRowLuma, 1941 unsigned int PixelPTEBytesPerRowChroma, 1942 unsigned int dpte_row_height_luma, 1943 unsigned int dpte_row_height_chroma, 1944 1945 bool mrq_present, 1946 unsigned int meta_row_bytes_per_row_ub_l, 1947 unsigned int meta_row_bytes_per_row_ub_c, 1948 unsigned int meta_row_height_luma, 1949 unsigned int meta_row_height_chroma, 1950 1951 // Output 1952 double *dpte_row_bw, 1953 double *meta_row_bw) 1954 { 1955 (void)use_one_row_for_frame; 1956 if (!DCCEnable || !mrq_present) { 1957 *meta_row_bw = 0; 1958 } else if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) { 1959 *meta_row_bw = VRatio * meta_row_bytes_per_row_ub_l / (meta_row_height_luma * LineTime) 1960 + VRatioChroma * meta_row_bytes_per_row_ub_c / (meta_row_height_chroma * LineTime); 1961 } else { 1962 *meta_row_bw = VRatio * meta_row_bytes_per_row_ub_l / (meta_row_height_luma * LineTime); 1963 } 1964 1965 if (GPUVMEnable != true) { 1966 *dpte_row_bw = 0; 1967 } else if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) { 1968 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) 1969 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); 1970 } else { 1971 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); 1972 } 1973 } 1974 1975 static void CalculateMALLUseForStaticScreen( 1976 const struct dml2_display_cfg *display_cfg, 1977 unsigned int NumberOfActiveSurfaces, 1978 unsigned int MALLAllocatedForDCN, 1979 unsigned int SurfaceSizeInMALL[], 1980 bool one_row_per_frame_fits_in_buffer[], 1981 1982 // Output 1983 bool is_using_mall_for_ss[]) 1984 { 1985 1986 unsigned int SurfaceToAddToMALL; 1987 bool CanAddAnotherSurfaceToMALL; 1988 unsigned int TotalSurfaceSizeInMALL; 1989 1990 TotalSurfaceSizeInMALL = 0; 1991 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { 1992 is_using_mall_for_ss[k] = (display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable); 1993 if (is_using_mall_for_ss[k]) 1994 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; 1995 #ifdef __DML_VBA_DEBUG__ 1996 DML_LOG_VERBOSE("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, is_using_mall_for_ss[k]); 1997 DML_LOG_VERBOSE("DML::%s: k=%u, TotalSurfaceSizeInMALL = %u\n", __func__, k, TotalSurfaceSizeInMALL); 1998 #endif 1999 } 2000 2001 SurfaceToAddToMALL = 0; 2002 CanAddAnotherSurfaceToMALL = true; 2003 while (CanAddAnotherSurfaceToMALL) { 2004 CanAddAnotherSurfaceToMALL = false; 2005 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { 2006 if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCN * 1024 * 1024 && 2007 !is_using_mall_for_ss[k] && display_cfg->plane_descriptors[k].overrides.refresh_from_mall != dml2_refresh_from_mall_mode_override_force_disable && one_row_per_frame_fits_in_buffer[k] && 2008 (!CanAddAnotherSurfaceToMALL || SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) { 2009 CanAddAnotherSurfaceToMALL = true; 2010 SurfaceToAddToMALL = k; 2011 DML_LOG_VERBOSE("DML::%s: k=%u, UseMALLForStaticScreen = %u (dis, en, optimize)\n", __func__, k, display_cfg->plane_descriptors[k].overrides.refresh_from_mall); 2012 } 2013 } 2014 if (CanAddAnotherSurfaceToMALL) { 2015 is_using_mall_for_ss[SurfaceToAddToMALL] = true; 2016 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL]; 2017 2018 #ifdef __DML_VBA_DEBUG__ 2019 DML_LOG_VERBOSE("DML::%s: SurfaceToAddToMALL = %u\n", __func__, SurfaceToAddToMALL); 2020 DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALL = %u\n", __func__, TotalSurfaceSizeInMALL); 2021 #endif 2022 } 2023 } 2024 } 2025 2026 static void CalculateDCCConfiguration( 2027 bool DCCEnabled, 2028 bool DCCProgrammingAssumesScanDirectionUnknown, 2029 enum dml2_source_format_class SourcePixelFormat, 2030 unsigned int SurfaceWidthLuma, 2031 unsigned int SurfaceWidthChroma, 2032 unsigned int SurfaceHeightLuma, 2033 unsigned int SurfaceHeightChroma, 2034 unsigned int nomDETInKByte, 2035 unsigned int RequestHeight256ByteLuma, 2036 unsigned int RequestHeight256ByteChroma, 2037 enum dml2_swizzle_mode TilingFormat, 2038 unsigned int BytePerPixelY, 2039 unsigned int BytePerPixelC, 2040 double BytePerPixelDETY, 2041 double BytePerPixelDETC, 2042 enum dml2_rotation_angle RotationAngle, 2043 2044 // Output 2045 enum dml2_core_internal_request_type *RequestLuma, 2046 enum dml2_core_internal_request_type *RequestChroma, 2047 unsigned int *MaxUncompressedBlockLuma, 2048 unsigned int *MaxUncompressedBlockChroma, 2049 unsigned int *MaxCompressedBlockLuma, 2050 unsigned int *MaxCompressedBlockChroma, 2051 unsigned int *IndependentBlockLuma, 2052 unsigned int *IndependentBlockChroma) 2053 { 2054 (void)SurfaceWidthChroma; 2055 (void)SurfaceHeightChroma; 2056 (void)TilingFormat; 2057 (void)BytePerPixelDETY; 2058 (void)BytePerPixelDETC; 2059 unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024; 2060 2061 unsigned int segment_order_horz_contiguous_luma; 2062 unsigned int segment_order_horz_contiguous_chroma; 2063 unsigned int segment_order_vert_contiguous_luma; 2064 unsigned int segment_order_vert_contiguous_chroma; 2065 2066 unsigned int req128_horz_wc_l; 2067 unsigned int req128_horz_wc_c; 2068 unsigned int req128_vert_wc_l; 2069 unsigned int req128_vert_wc_c; 2070 2071 unsigned int yuv420; 2072 unsigned int horz_div_l; 2073 unsigned int horz_div_c; 2074 unsigned int vert_div_l; 2075 unsigned int vert_div_c; 2076 2077 unsigned int swath_buf_size; 2078 double detile_buf_vp_horz_limit; 2079 double detile_buf_vp_vert_limit; 2080 2081 unsigned int MAS_vp_horz_limit; 2082 unsigned int MAS_vp_vert_limit; 2083 unsigned int max_vp_horz_width; 2084 unsigned int max_vp_vert_height; 2085 unsigned int eff_surf_width_l; 2086 unsigned int eff_surf_width_c; 2087 unsigned int eff_surf_height_l; 2088 unsigned int eff_surf_height_c; 2089 2090 unsigned int full_swath_bytes_horz_wc_l; 2091 unsigned int full_swath_bytes_horz_wc_c; 2092 unsigned int full_swath_bytes_vert_wc_l; 2093 unsigned int full_swath_bytes_vert_wc_c; 2094 2095 if (dml_is_420(SourcePixelFormat)) 2096 yuv420 = 1; 2097 else 2098 yuv420 = 0; 2099 horz_div_l = 1; 2100 horz_div_c = 1; 2101 vert_div_l = 1; 2102 vert_div_c = 1; 2103 2104 if (BytePerPixelY == 1) 2105 vert_div_l = 0; 2106 if (BytePerPixelC == 1) 2107 vert_div_c = 0; 2108 2109 if (BytePerPixelC == 0) { 2110 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256; 2111 detile_buf_vp_horz_limit = (double)swath_buf_size / ((double)RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)); 2112 detile_buf_vp_vert_limit = (double)swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)); 2113 } else { 2114 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256; 2115 detile_buf_vp_horz_limit = (double)swath_buf_size / ((double)RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) + (double)RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); 2116 detile_buf_vp_vert_limit = (double)swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420)); 2117 } 2118 2119 if (SourcePixelFormat == dml2_420_10) { 2120 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; 2121 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; 2122 } 2123 2124 detile_buf_vp_horz_limit = math_floor2(detile_buf_vp_horz_limit - 1, 16); 2125 detile_buf_vp_vert_limit = math_floor2(detile_buf_vp_vert_limit - 1, 16); 2126 2127 MAS_vp_horz_limit = SourcePixelFormat == dml2_rgbe_alpha ? 3840 : 6144; 2128 MAS_vp_vert_limit = SourcePixelFormat == dml2_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144); 2129 max_vp_horz_width = (unsigned int)(math_min2((double)MAS_vp_horz_limit, detile_buf_vp_horz_limit)); 2130 max_vp_vert_height = (unsigned int)(math_min2((double)MAS_vp_vert_limit, detile_buf_vp_vert_limit)); 2131 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); 2132 eff_surf_width_c = eff_surf_width_l / (1 + yuv420); 2133 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); 2134 eff_surf_height_c = eff_surf_height_l / (1 + yuv420); 2135 2136 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; 2137 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; 2138 if (BytePerPixelC > 0) { 2139 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; 2140 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; 2141 } else { 2142 full_swath_bytes_horz_wc_c = 0; 2143 full_swath_bytes_vert_wc_c = 0; 2144 } 2145 2146 if (SourcePixelFormat == dml2_420_10) { 2147 full_swath_bytes_horz_wc_l = (unsigned int)(math_ceil2((double)full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0)); 2148 full_swath_bytes_horz_wc_c = (unsigned int)(math_ceil2((double)full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0)); 2149 full_swath_bytes_vert_wc_l = (unsigned int)(math_ceil2((double)full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0)); 2150 full_swath_bytes_vert_wc_c = (unsigned int)(math_ceil2((double)full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0)); 2151 } 2152 2153 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { 2154 req128_horz_wc_l = 0; 2155 req128_horz_wc_c = 0; 2156 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { 2157 req128_horz_wc_l = 0; 2158 req128_horz_wc_c = 1; 2159 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { 2160 req128_horz_wc_l = 1; 2161 req128_horz_wc_c = 0; 2162 } else { 2163 req128_horz_wc_l = 1; 2164 req128_horz_wc_c = 1; 2165 } 2166 2167 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { 2168 req128_vert_wc_l = 0; 2169 req128_vert_wc_c = 0; 2170 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { 2171 req128_vert_wc_l = 0; 2172 req128_vert_wc_c = 1; 2173 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { 2174 req128_vert_wc_l = 1; 2175 req128_vert_wc_c = 0; 2176 } else { 2177 req128_vert_wc_l = 1; 2178 req128_vert_wc_c = 1; 2179 } 2180 2181 if (BytePerPixelY == 2) { 2182 segment_order_horz_contiguous_luma = 0; 2183 segment_order_vert_contiguous_luma = 1; 2184 } else { 2185 segment_order_horz_contiguous_luma = 1; 2186 segment_order_vert_contiguous_luma = 0; 2187 } 2188 2189 if (BytePerPixelC == 2) { 2190 segment_order_horz_contiguous_chroma = 0; 2191 segment_order_vert_contiguous_chroma = 1; 2192 } else { 2193 segment_order_horz_contiguous_chroma = 1; 2194 segment_order_vert_contiguous_chroma = 0; 2195 } 2196 #ifdef __DML_VBA_DEBUG__ 2197 DML_LOG_VERBOSE("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled); 2198 DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte); 2199 DML_LOG_VERBOSE("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC); 2200 DML_LOG_VERBOSE("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l); 2201 DML_LOG_VERBOSE("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c); 2202 DML_LOG_VERBOSE("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l); 2203 DML_LOG_VERBOSE("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c); 2204 DML_LOG_VERBOSE("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma); 2205 DML_LOG_VERBOSE("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma); 2206 #endif 2207 if (DCCProgrammingAssumesScanDirectionUnknown == true) { 2208 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) { 2209 *RequestLuma = dml2_core_internal_request_type_256_bytes; 2210 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) { 2211 *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous; 2212 } else { 2213 *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous; 2214 } 2215 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) { 2216 *RequestChroma = dml2_core_internal_request_type_256_bytes; 2217 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) { 2218 *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous; 2219 } else { 2220 *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous; 2221 } 2222 } else if (!dml_is_vertical_rotation(RotationAngle)) { 2223 if (req128_horz_wc_l == 0) { 2224 *RequestLuma = dml2_core_internal_request_type_256_bytes; 2225 } else if (segment_order_horz_contiguous_luma == 0) { 2226 *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous; 2227 } else { 2228 *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous; 2229 } 2230 if (req128_horz_wc_c == 0) { 2231 *RequestChroma = dml2_core_internal_request_type_256_bytes; 2232 } else if (segment_order_horz_contiguous_chroma == 0) { 2233 *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous; 2234 } else { 2235 *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous; 2236 } 2237 } else { 2238 if (req128_vert_wc_l == 0) { 2239 *RequestLuma = dml2_core_internal_request_type_256_bytes; 2240 } else if (segment_order_vert_contiguous_luma == 0) { 2241 *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous; 2242 } else { 2243 *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous; 2244 } 2245 if (req128_vert_wc_c == 0) { 2246 *RequestChroma = dml2_core_internal_request_type_256_bytes; 2247 } else if (segment_order_vert_contiguous_chroma == 0) { 2248 *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous; 2249 } else { 2250 *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous; 2251 } 2252 } 2253 2254 if (*RequestLuma == dml2_core_internal_request_type_256_bytes) { 2255 *MaxUncompressedBlockLuma = 256; 2256 *MaxCompressedBlockLuma = 256; 2257 *IndependentBlockLuma = 0; 2258 } else if (*RequestLuma == dml2_core_internal_request_type_128_bytes_contiguous) { 2259 *MaxUncompressedBlockLuma = 256; 2260 *MaxCompressedBlockLuma = 128; 2261 *IndependentBlockLuma = 128; 2262 } else { 2263 *MaxUncompressedBlockLuma = 256; 2264 *MaxCompressedBlockLuma = 64; 2265 *IndependentBlockLuma = 64; 2266 } 2267 2268 if (*RequestChroma == dml2_core_internal_request_type_256_bytes) { 2269 *MaxUncompressedBlockChroma = 256; 2270 *MaxCompressedBlockChroma = 256; 2271 *IndependentBlockChroma = 0; 2272 } else if (*RequestChroma == dml2_core_internal_request_type_128_bytes_contiguous) { 2273 *MaxUncompressedBlockChroma = 256; 2274 *MaxCompressedBlockChroma = 128; 2275 *IndependentBlockChroma = 128; 2276 } else { 2277 *MaxUncompressedBlockChroma = 256; 2278 *MaxCompressedBlockChroma = 64; 2279 *IndependentBlockChroma = 64; 2280 } 2281 2282 if (DCCEnabled != true || BytePerPixelC == 0) { 2283 *MaxUncompressedBlockChroma = 0; 2284 *MaxCompressedBlockChroma = 0; 2285 *IndependentBlockChroma = 0; 2286 } 2287 2288 if (DCCEnabled != true) { 2289 *MaxUncompressedBlockLuma = 0; 2290 *MaxCompressedBlockLuma = 0; 2291 *IndependentBlockLuma = 0; 2292 } 2293 2294 #ifdef __DML_VBA_DEBUG__ 2295 DML_LOG_VERBOSE("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma); 2296 DML_LOG_VERBOSE("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma); 2297 DML_LOG_VERBOSE("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma); 2298 DML_LOG_VERBOSE("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma); 2299 DML_LOG_VERBOSE("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma); 2300 DML_LOG_VERBOSE("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma); 2301 #endif 2302 2303 } 2304 2305 static void calculate_mcache_row_bytes( 2306 struct dml2_core_internal_scratch *scratch, 2307 struct dml2_core_calcs_calculate_mcache_row_bytes_params *p) 2308 { 2309 (void)scratch; 2310 unsigned int vmpg_bytes = 0; 2311 unsigned int blk_bytes = 0; 2312 float meta_per_mvmpg_per_channel = 0; 2313 unsigned int est_blk_per_vmpg = 2; 2314 unsigned int mvmpg_per_row_ub = 0; 2315 unsigned int full_vp_width_mvmpg_aligned = 0; 2316 unsigned int full_vp_height_mvmpg_aligned = 0; 2317 unsigned int meta_per_mvmpg_per_channel_ub = 0; 2318 unsigned int mvmpg_per_mcache; 2319 2320 #ifdef __DML_VBA_DEBUG__ 2321 DML_LOG_VERBOSE("DML::%s: num_chans = %u\n", __func__, p->num_chans); 2322 DML_LOG_VERBOSE("DML::%s: mem_word_bytes = %u\n", __func__, p->mem_word_bytes); 2323 DML_LOG_VERBOSE("DML::%s: mcache_line_size_bytes = %u\n", __func__, p->mcache_line_size_bytes); 2324 DML_LOG_VERBOSE("DML::%s: mcache_size_bytes = %u\n", __func__, p->mcache_size_bytes); 2325 DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable); 2326 DML_LOG_VERBOSE("DML::%s: gpuvm_page_size_kbytes = %u\n", __func__, p->gpuvm_page_size_kbytes); 2327 DML_LOG_VERBOSE("DML::%s: vp_stationary = %u\n", __func__, p->vp_stationary); 2328 DML_LOG_VERBOSE("DML::%s: tiling_mode = %u\n", __func__, p->tiling_mode); 2329 DML_LOG_VERBOSE("DML::%s: vp_start_x = %u\n", __func__, p->vp_start_x); 2330 DML_LOG_VERBOSE("DML::%s: vp_start_y = %u\n", __func__, p->vp_start_y); 2331 DML_LOG_VERBOSE("DML::%s: full_vp_width = %u\n", __func__, p->full_vp_width); 2332 DML_LOG_VERBOSE("DML::%s: full_vp_height = %u\n", __func__, p->full_vp_height); 2333 DML_LOG_VERBOSE("DML::%s: blk_width = %u\n", __func__, p->blk_width); 2334 DML_LOG_VERBOSE("DML::%s: blk_height = %u\n", __func__, p->blk_height); 2335 DML_LOG_VERBOSE("DML::%s: vmpg_width = %u\n", __func__, p->vmpg_width); 2336 DML_LOG_VERBOSE("DML::%s: vmpg_height = %u\n", __func__, p->vmpg_height); 2337 DML_LOG_VERBOSE("DML::%s: full_swath_bytes = %u\n", __func__, p->full_swath_bytes); 2338 #endif 2339 DML_ASSERT(p->mcache_line_size_bytes != 0); 2340 DML_ASSERT(p->mcache_size_bytes != 0); 2341 2342 *p->mvmpg_width = 0; 2343 *p->mvmpg_height = 0; 2344 2345 if (p->full_vp_height == 0 && p->full_vp_width == 0) { 2346 *p->num_mcaches = 0; 2347 *p->mcache_row_bytes = 0; 2348 *p->mcache_row_bytes_per_channel = 0; 2349 } else { 2350 blk_bytes = dml_get_tile_block_size_bytes(p->tiling_mode); 2351 2352 // if gpuvm is not enable, the alignment boundary should be in terms of tiling block size 2353 vmpg_bytes = p->gpuvm_page_size_kbytes * 1024; 2354 2355 //With vmpg_bytes >= tile blk_bytes, the meta_row_width alignment equations are relative to the vmpg_width/height. 2356 // But for 4KB page with 64KB tile block, we need the meta for all pages in the tile block. 2357 // Therefore, the alignment is relative to the blk_width/height. The factor of 16 vmpg per 64KB tile block is applied at the end. 2358 *p->mvmpg_width = p->blk_width; 2359 *p->mvmpg_height = p->blk_height; 2360 if (p->gpuvm_enable) { 2361 if (vmpg_bytes >= blk_bytes) { 2362 *p->mvmpg_width = p->vmpg_width; 2363 *p->mvmpg_height = p->vmpg_height; 2364 } else if (!((blk_bytes == 65536) && (vmpg_bytes == 4096))) { 2365 DML_LOG_VERBOSE("ERROR: DML::%s: Tiling size and vm page size combination not supported\n", __func__); 2366 DML_ASSERT(0); 2367 } 2368 } 2369 2370 //For plane0 & 1, first calculate full_vp_width/height_l/c aligned to vmpg_width/height_l/c 2371 full_vp_width_mvmpg_aligned = (unsigned int)(math_floor2((p->vp_start_x + p->full_vp_width) + *p->mvmpg_width - 1, *p->mvmpg_width) - math_floor2(p->vp_start_x, *p->mvmpg_width)); 2372 full_vp_height_mvmpg_aligned = (unsigned int)(math_floor2((p->vp_start_y + p->full_vp_height) + *p->mvmpg_height - 1, *p->mvmpg_height) - math_floor2(p->vp_start_y, *p->mvmpg_height)); 2373 2374 *p->full_vp_access_width_mvmpg_aligned = p->surf_vert ? full_vp_height_mvmpg_aligned : full_vp_width_mvmpg_aligned; 2375 2376 //Use the equation for the exact alignment when possible. Note that the exact alignment cannot be used for horizontal access if vmpg_bytes > blk_bytes. 2377 if (!p->surf_vert) { //horizontal access 2378 if (p->vp_stationary == 1 && vmpg_bytes <= blk_bytes) 2379 *p->meta_row_width_ub = full_vp_width_mvmpg_aligned; 2380 else 2381 *p->meta_row_width_ub = (unsigned int)math_ceil2((double)p->full_vp_width - 1, *p->mvmpg_width) + *p->mvmpg_width; 2382 mvmpg_per_row_ub = *p->meta_row_width_ub / *p->mvmpg_width; 2383 } else { //vertical access 2384 if (p->vp_stationary == 1) 2385 *p->meta_row_width_ub = full_vp_height_mvmpg_aligned; 2386 else 2387 *p->meta_row_width_ub = (unsigned int)math_ceil2((double)p->full_vp_height - 1, *p->mvmpg_height) + *p->mvmpg_height; 2388 mvmpg_per_row_ub = *p->meta_row_width_ub / *p->mvmpg_height; 2389 } 2390 2391 if (p->gpuvm_enable) { 2392 meta_per_mvmpg_per_channel = (float)vmpg_bytes / (float)256 / p->num_chans; 2393 2394 //but using the est_blk_per_vmpg between 2 and 4, to be not as pessimestic 2395 if (p->surf_vert && vmpg_bytes > blk_bytes) { 2396 meta_per_mvmpg_per_channel = (float)est_blk_per_vmpg * blk_bytes / (float)256 / p->num_chans; 2397 } 2398 2399 *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel)); // dcc_dr_oh_nom 2400 } else { 2401 meta_per_mvmpg_per_channel = (float) blk_bytes / (float)256 / p->num_chans; 2402 2403 if (!p->surf_vert) 2404 *p->dcc_dram_bw_nom_overhead_factor = 1 + 1.0 / 256.0; 2405 else 2406 *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel)); 2407 } 2408 2409 meta_per_mvmpg_per_channel_ub = (unsigned int)math_ceil2((double)meta_per_mvmpg_per_channel, p->mcache_line_size_bytes); 2410 2411 //but for 4KB vmpg with 64KB tile blk 2412 if (p->gpuvm_enable && (blk_bytes == 65536) && (vmpg_bytes == 4096)) 2413 meta_per_mvmpg_per_channel_ub = 16 * meta_per_mvmpg_per_channel_ub; 2414 2415 // If this mcache_row_bytes for the full viewport of the surface is less than or equal to mcache_bytes, 2416 // then one mcache can be used for this request stream. If not, it is useful to know the width of the viewport that can be supported in the mcache_bytes. 2417 if (p->gpuvm_enable || p->surf_vert) { 2418 *p->mcache_row_bytes_per_channel = mvmpg_per_row_ub * meta_per_mvmpg_per_channel_ub; 2419 *p->mcache_row_bytes = *p->mcache_row_bytes_per_channel * p->num_chans; 2420 } else { // horizontal and gpuvm disable 2421 *p->mcache_row_bytes = *p->meta_row_width_ub * p->blk_height * p->bytes_per_pixel / 256; 2422 if (p->mcache_line_size_bytes != 0) 2423 *p->mcache_row_bytes_per_channel = (unsigned int)math_ceil2((double)*p->mcache_row_bytes / p->num_chans, p->mcache_line_size_bytes); 2424 } 2425 2426 *p->dcc_dram_bw_pref_overhead_factor = 1 + math_max2(1.0 / 256.0, *p->mcache_row_bytes / p->full_swath_bytes); // dcc_dr_oh_pref 2427 if (p->mcache_size_bytes != 0) 2428 *p->num_mcaches = (unsigned int)math_ceil2((double)*p->mcache_row_bytes_per_channel / p->mcache_size_bytes, 1); 2429 2430 mvmpg_per_mcache = p->mcache_size_bytes / meta_per_mvmpg_per_channel_ub; 2431 *p->mvmpg_per_mcache_lb = (unsigned int)math_floor2(mvmpg_per_mcache, 1); 2432 2433 #ifdef __DML_VBA_DEBUG__ 2434 DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable); 2435 DML_LOG_VERBOSE("DML::%s: vmpg_bytes = %u\n", __func__, vmpg_bytes); 2436 DML_LOG_VERBOSE("DML::%s: blk_bytes = %u\n", __func__, blk_bytes); 2437 DML_LOG_VERBOSE("DML::%s: meta_per_mvmpg_per_channel = %f\n", __func__, meta_per_mvmpg_per_channel); 2438 DML_LOG_VERBOSE("DML::%s: mvmpg_per_row_ub = %u\n", __func__, mvmpg_per_row_ub); 2439 DML_LOG_VERBOSE("DML::%s: meta_row_width_ub = %u\n", __func__, *p->meta_row_width_ub); 2440 DML_LOG_VERBOSE("DML::%s: mvmpg_width = %u\n", __func__, *p->mvmpg_width); 2441 DML_LOG_VERBOSE("DML::%s: mvmpg_height = %u\n", __func__, *p->mvmpg_height); 2442 DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_nom_overhead_factor); 2443 DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_pref_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_pref_overhead_factor); 2444 #endif 2445 } 2446 2447 #ifdef __DML_VBA_DEBUG__ 2448 DML_LOG_VERBOSE("DML::%s: mcache_row_bytes = %u\n", __func__, *p->mcache_row_bytes); 2449 DML_LOG_VERBOSE("DML::%s: mcache_row_bytes_per_channel = %u\n", __func__, *p->mcache_row_bytes_per_channel); 2450 DML_LOG_VERBOSE("DML::%s: num_mcaches = %u\n", __func__, *p->num_mcaches); 2451 #endif 2452 DML_ASSERT(*p->num_mcaches > 0); 2453 } 2454 2455 static void calculate_mcache_setting( 2456 struct dml2_core_internal_scratch *scratch, 2457 struct dml2_core_calcs_calculate_mcache_setting_params *p) 2458 { 2459 unsigned int n; 2460 2461 struct dml2_core_shared_calculate_mcache_setting_locals *l = &scratch->calculate_mcache_setting_locals; 2462 memset(l, 0, sizeof(struct dml2_core_shared_calculate_mcache_setting_locals)); 2463 2464 *p->num_mcaches_l = 0; 2465 *p->mcache_row_bytes_l = 0; 2466 *p->mcache_row_bytes_per_channel_l = 0; 2467 *p->dcc_dram_bw_nom_overhead_factor_l = 1.0; 2468 *p->dcc_dram_bw_pref_overhead_factor_l = 1.0; 2469 2470 *p->num_mcaches_c = 0; 2471 *p->mcache_row_bytes_c = 0; 2472 *p->mcache_row_bytes_per_channel_c = 0; 2473 *p->dcc_dram_bw_nom_overhead_factor_c = 1.0; 2474 *p->dcc_dram_bw_pref_overhead_factor_c = 1.0; 2475 2476 *p->mall_comb_mcache_l = 0; 2477 *p->mall_comb_mcache_c = 0; 2478 *p->lc_comb_mcache = 0; 2479 2480 if (!p->dcc_enable) 2481 return; 2482 2483 l->is_dual_plane = dml_is_420(p->source_format) || p->source_format == dml2_rgbe_alpha; 2484 2485 l->l_p.num_chans = p->num_chans; 2486 l->l_p.mem_word_bytes = p->mem_word_bytes; 2487 l->l_p.mcache_size_bytes = p->mcache_size_bytes; 2488 l->l_p.mcache_line_size_bytes = p->mcache_line_size_bytes; 2489 l->l_p.gpuvm_enable = p->gpuvm_enable; 2490 l->l_p.gpuvm_page_size_kbytes = p->gpuvm_page_size_kbytes; 2491 l->l_p.surf_vert = p->surf_vert; 2492 l->l_p.vp_stationary = p->vp_stationary; 2493 l->l_p.tiling_mode = p->tiling_mode; 2494 l->l_p.vp_start_x = p->vp_start_x_l; 2495 l->l_p.vp_start_y = p->vp_start_y_l; 2496 l->l_p.full_vp_width = p->full_vp_width_l; 2497 l->l_p.full_vp_height = p->full_vp_height_l; 2498 l->l_p.blk_width = p->blk_width_l; 2499 l->l_p.blk_height = p->blk_height_l; 2500 l->l_p.vmpg_width = p->vmpg_width_l; 2501 l->l_p.vmpg_height = p->vmpg_height_l; 2502 l->l_p.full_swath_bytes = p->full_swath_bytes_l; 2503 l->l_p.bytes_per_pixel = p->bytes_per_pixel_l; 2504 2505 // output 2506 l->l_p.num_mcaches = p->num_mcaches_l; 2507 l->l_p.mcache_row_bytes = p->mcache_row_bytes_l; 2508 l->l_p.mcache_row_bytes_per_channel = p->mcache_row_bytes_per_channel_l; 2509 l->l_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_l; 2510 l->l_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_l; 2511 l->l_p.mvmpg_width = &l->mvmpg_width_l; 2512 l->l_p.mvmpg_height = &l->mvmpg_height_l; 2513 l->l_p.full_vp_access_width_mvmpg_aligned = &l->full_vp_access_width_mvmpg_aligned_l; 2514 l->l_p.meta_row_width_ub = &l->meta_row_width_l; 2515 l->l_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_l; 2516 2517 calculate_mcache_row_bytes(scratch, &l->l_p); 2518 DML_ASSERT(*p->num_mcaches_l > 0); 2519 2520 if (l->is_dual_plane) { 2521 l->c_p.num_chans = p->num_chans; 2522 l->c_p.mem_word_bytes = p->mem_word_bytes; 2523 l->c_p.mcache_size_bytes = p->mcache_size_bytes; 2524 l->c_p.mcache_line_size_bytes = p->mcache_line_size_bytes; 2525 l->c_p.gpuvm_enable = p->gpuvm_enable; 2526 l->c_p.gpuvm_page_size_kbytes = p->gpuvm_page_size_kbytes; 2527 l->c_p.surf_vert = p->surf_vert; 2528 l->c_p.vp_stationary = p->vp_stationary; 2529 l->c_p.tiling_mode = p->tiling_mode; 2530 l->c_p.vp_start_x = p->vp_start_x_c; 2531 l->c_p.vp_start_y = p->vp_start_y_c; 2532 l->c_p.full_vp_width = p->full_vp_width_c; 2533 l->c_p.full_vp_height = p->full_vp_height_c; 2534 l->c_p.blk_width = p->blk_width_c; 2535 l->c_p.blk_height = p->blk_height_c; 2536 l->c_p.vmpg_width = p->vmpg_width_c; 2537 l->c_p.vmpg_height = p->vmpg_height_c; 2538 l->c_p.full_swath_bytes = p->full_swath_bytes_c; 2539 l->c_p.bytes_per_pixel = p->bytes_per_pixel_c; 2540 2541 // output 2542 l->c_p.num_mcaches = p->num_mcaches_c; 2543 l->c_p.mcache_row_bytes = p->mcache_row_bytes_c; 2544 l->c_p.mcache_row_bytes_per_channel = p->mcache_row_bytes_per_channel_c; 2545 l->c_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_c; 2546 l->c_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_c; 2547 l->c_p.mvmpg_width = &l->mvmpg_width_c; 2548 l->c_p.mvmpg_height = &l->mvmpg_height_c; 2549 l->c_p.full_vp_access_width_mvmpg_aligned = &l->full_vp_access_width_mvmpg_aligned_c; 2550 l->c_p.meta_row_width_ub = &l->meta_row_width_c; 2551 l->c_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_c; 2552 2553 calculate_mcache_row_bytes(scratch, &l->c_p); 2554 DML_ASSERT(*p->num_mcaches_c > 0); 2555 } 2556 2557 // Sharing for iMALL access 2558 l->mcache_remainder_l = *p->mcache_row_bytes_per_channel_l % p->mcache_size_bytes; 2559 l->mcache_remainder_c = *p->mcache_row_bytes_per_channel_c % p->mcache_size_bytes; 2560 l->mvmpg_access_width_l = p->surf_vert ? l->mvmpg_height_l : l->mvmpg_width_l; 2561 l->mvmpg_access_width_c = p->surf_vert ? l->mvmpg_height_c : l->mvmpg_width_c; 2562 2563 if (p->imall_enable) { 2564 *p->mall_comb_mcache_l = (2 * l->mcache_remainder_l <= p->mcache_size_bytes); 2565 2566 if (l->is_dual_plane) 2567 *p->mall_comb_mcache_c = (2 * l->mcache_remainder_c <= p->mcache_size_bytes); 2568 } 2569 2570 if (!p->surf_vert) // horizonatal access 2571 l->luma_time_factor = (double)l->mvmpg_height_c / l->mvmpg_height_l * 2; 2572 else // vertical access 2573 l->luma_time_factor = (double)l->mvmpg_width_c / l->mvmpg_width_l * 2; 2574 2575 // The algorithm starts with computing a non-integer, avg_mcache_element_size_l/c: 2576 if (*p->num_mcaches_l) { 2577 l->avg_mcache_element_size_l = l->meta_row_width_l / *p->num_mcaches_l; 2578 } 2579 if (l->is_dual_plane) { 2580 l->avg_mcache_element_size_c = l->meta_row_width_c / *p->num_mcaches_c; 2581 2582 /* if either remainder is 0, then mcache sharing is not needed or not possible due to full utilization */ 2583 if (l->mcache_remainder_l && l->mcache_remainder_c) { 2584 if (!p->imall_enable || (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c)) { 2585 l->lc_comb_last_mcache_size = (unsigned int)((l->mcache_remainder_l * (*p->mall_comb_mcache_l ? 2 : 1) * l->luma_time_factor) + 2586 (l->mcache_remainder_c * (*p->mall_comb_mcache_c ? 2 : 1))); 2587 } 2588 *p->lc_comb_mcache = (l->lc_comb_last_mcache_size <= p->mcache_size_bytes) && (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c); 2589 } 2590 } 2591 2592 #ifdef __DML_VBA_DEBUG__ 2593 DML_LOG_VERBOSE("DML::%s: imall_enable = %u\n", __func__, p->imall_enable); 2594 DML_LOG_VERBOSE("DML::%s: is_dual_plane = %u\n", __func__, l->is_dual_plane); 2595 DML_LOG_VERBOSE("DML::%s: surf_vert = %u\n", __func__, p->surf_vert); 2596 DML_LOG_VERBOSE("DML::%s: mvmpg_width_l = %u\n", __func__, l->mvmpg_width_l); 2597 DML_LOG_VERBOSE("DML::%s: mvmpg_height_l = %u\n", __func__, l->mvmpg_height_l); 2598 DML_LOG_VERBOSE("DML::%s: mcache_remainder_l = %f\n", __func__, l->mcache_remainder_l); 2599 DML_LOG_VERBOSE("DML::%s: num_mcaches_l = %u\n", __func__, *p->num_mcaches_l); 2600 DML_LOG_VERBOSE("DML::%s: avg_mcache_element_size_l = %u\n", __func__, l->avg_mcache_element_size_l); 2601 DML_LOG_VERBOSE("DML::%s: mvmpg_access_width_l = %u\n", __func__, l->mvmpg_access_width_l); 2602 DML_LOG_VERBOSE("DML::%s: mall_comb_mcache_l = %u\n", __func__, *p->mall_comb_mcache_l); 2603 2604 if (l->is_dual_plane) { 2605 DML_LOG_VERBOSE("DML::%s: mvmpg_width_c = %u\n", __func__, l->mvmpg_width_c); 2606 DML_LOG_VERBOSE("DML::%s: mvmpg_height_c = %u\n", __func__, l->mvmpg_height_c); 2607 DML_LOG_VERBOSE("DML::%s: mcache_remainder_c = %f\n", __func__, l->mcache_remainder_c); 2608 DML_LOG_VERBOSE("DML::%s: luma_time_factor = %f\n", __func__, l->luma_time_factor); 2609 DML_LOG_VERBOSE("DML::%s: num_mcaches_c = %u\n", __func__, *p->num_mcaches_c); 2610 DML_LOG_VERBOSE("DML::%s: avg_mcache_element_size_c = %u\n", __func__, l->avg_mcache_element_size_c); 2611 DML_LOG_VERBOSE("DML::%s: mvmpg_access_width_c = %u\n", __func__, l->mvmpg_access_width_c); 2612 DML_LOG_VERBOSE("DML::%s: mall_comb_mcache_c = %u\n", __func__, *p->mall_comb_mcache_c); 2613 DML_LOG_VERBOSE("DML::%s: lc_comb_last_mcache_size = %u\n", __func__, l->lc_comb_last_mcache_size); 2614 DML_LOG_VERBOSE("DML::%s: lc_comb_mcache = %u\n", __func__, *p->lc_comb_mcache); 2615 } 2616 #endif 2617 // calculate split_coordinate 2618 l->full_vp_access_width_l = p->surf_vert ? p->full_vp_height_l : p->full_vp_width_l; 2619 l->full_vp_access_width_c = p->surf_vert ? p->full_vp_height_c : p->full_vp_width_c; 2620 2621 for (n = 0; n < *p->num_mcaches_l - 1; n++) { 2622 p->mcache_offsets_l[n] = (unsigned int)(math_floor2((n + 1) * l->avg_mcache_element_size_l / l->mvmpg_access_width_l, 1)) * l->mvmpg_access_width_l; 2623 } 2624 p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l; 2625 2626 if (l->is_dual_plane) { 2627 for (n = 0; n < *p->num_mcaches_c - 1; n++) { 2628 p->mcache_offsets_c[n] = (unsigned int)(math_floor2((n + 1) * l->avg_mcache_element_size_c / l->mvmpg_access_width_c, 1)) * l->mvmpg_access_width_c; 2629 } 2630 p->mcache_offsets_c[*p->num_mcaches_c - 1] = l->full_vp_access_width_c; 2631 } 2632 #ifdef __DML_VBA_DEBUG__ 2633 for (n = 0; n < *p->num_mcaches_l; n++) 2634 DML_LOG_VERBOSE("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]); 2635 2636 if (l->is_dual_plane) { 2637 for (n = 0; n < *p->num_mcaches_c; n++) 2638 DML_LOG_VERBOSE("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]); 2639 } 2640 #endif 2641 2642 // Luma/Chroma combine in the last mcache 2643 // In the case of Luma/Chroma combine-mCache (with lc_comb_mcache==1), all mCaches except the last segment are filled as much as possible, when stay aligned to mvmpg boundary 2644 if (*p->lc_comb_mcache && l->is_dual_plane) { 2645 for (n = 0; n < *p->num_mcaches_l - 1; n++) 2646 p->mcache_offsets_l[n] = (n + 1) * l->mvmpg_per_mcache_lb_l * l->mvmpg_access_width_l; 2647 p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l; 2648 2649 for (n = 0; n < *p->num_mcaches_c - 1; n++) 2650 p->mcache_offsets_c[n] = (n + 1) * l->mvmpg_per_mcache_lb_c * l->mvmpg_access_width_c; 2651 p->mcache_offsets_c[*p->num_mcaches_c - 1] = l->full_vp_access_width_c; 2652 2653 #ifdef __DML_VBA_DEBUG__ 2654 for (n = 0; n < *p->num_mcaches_l; n++) 2655 DML_LOG_VERBOSE("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]); 2656 2657 for (n = 0; n < *p->num_mcaches_c; n++) 2658 DML_LOG_VERBOSE("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]); 2659 #endif 2660 } 2661 2662 *p->mcache_shift_granularity_l = l->mvmpg_access_width_l; 2663 *p->mcache_shift_granularity_c = l->mvmpg_access_width_c; 2664 } 2665 2666 static void calculate_mall_bw_overhead_factor( 2667 double mall_prefetch_sdp_overhead_factor[], //mall_sdp_oh_nom/pref 2668 double mall_prefetch_dram_overhead_factor[], //mall_dram_oh_nom/pref 2669 2670 // input 2671 const struct dml2_display_cfg *display_cfg, 2672 unsigned int num_active_planes) 2673 { 2674 for (unsigned int k = 0; k < num_active_planes; ++k) { 2675 mall_prefetch_sdp_overhead_factor[k] = 1.0; 2676 mall_prefetch_dram_overhead_factor[k] = 1.0; 2677 2678 // SDP - on the return side 2679 if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall) // always no data return 2680 mall_prefetch_sdp_overhead_factor[k] = 1.25; 2681 else if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) 2682 mall_prefetch_sdp_overhead_factor[k] = 0.25; 2683 2684 // DRAM 2685 if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall) 2686 mall_prefetch_dram_overhead_factor[k] = 2.0; 2687 2688 #ifdef __DML_VBA_DEBUG__ 2689 DML_LOG_VERBOSE("DML::%s: k=%u, mall_prefetch_sdp_overhead_factor = %f\n", __func__, k, mall_prefetch_sdp_overhead_factor[k]); 2690 DML_LOG_VERBOSE("DML::%s: k=%u, mall_prefetch_dram_overhead_factor = %f\n", __func__, k, mall_prefetch_dram_overhead_factor[k]); 2691 #endif 2692 } 2693 } 2694 2695 static double dml_get_return_bandwidth_available( 2696 const struct dml2_soc_bb *soc, 2697 enum dml2_core_internal_soc_state_type state_type, 2698 enum dml2_core_internal_bw_type bw_type, 2699 bool is_avg_bw, 2700 bool is_hvm_en, 2701 bool is_hvm_only, 2702 double dcfclk_mhz, 2703 double fclk_mhz, 2704 double dram_bw_mbps) 2705 { 2706 double return_bw_mbps = 0.; 2707 double ideal_sdp_bandwidth = (double)soc->return_bus_width_bytes * dcfclk_mhz; 2708 double ideal_fabric_bandwidth = fclk_mhz * (double)soc->fabric_datapath_to_dcn_data_return_bytes; 2709 double ideal_dram_bandwidth = dram_bw_mbps; //dram_speed_mts * soc->clk_table.dram_config.channel_count * soc->clk_table.dram_config.channel_width_bytes; 2710 2711 double derate_sdp_factor; 2712 double derate_fabric_factor; 2713 double derate_dram_factor; 2714 2715 double derate_sdp_bandwidth; 2716 double derate_fabric_bandwidth; 2717 double derate_dram_bandwidth; 2718 2719 if (is_avg_bw) { 2720 if (state_type == dml2_core_internal_soc_state_svp_prefetch) { 2721 derate_sdp_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.dcfclk_derate_percent / 100.0; 2722 derate_fabric_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.fclk_derate_percent / 100.0; 2723 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.dram_derate_percent_pixel / 100.0; 2724 } else { // just assume sys_active 2725 derate_sdp_factor = soc->qos_parameters.derate_table.system_active_average.dcfclk_derate_percent / 100.0; 2726 derate_fabric_factor = soc->qos_parameters.derate_table.system_active_average.fclk_derate_percent / 100.0; 2727 derate_dram_factor = soc->qos_parameters.derate_table.system_active_average.dram_derate_percent_pixel / 100.0; 2728 } 2729 } else { // urgent bw 2730 if (state_type == dml2_core_internal_soc_state_svp_prefetch) { 2731 derate_sdp_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dcfclk_derate_percent / 100.0; 2732 derate_fabric_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.fclk_derate_percent / 100.0; 2733 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100.0; 2734 2735 if (is_hvm_en) { 2736 if (is_hvm_only) 2737 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_vm / 100.0; 2738 else 2739 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel_and_vm / 100.0; 2740 } else { 2741 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100.0; 2742 } 2743 } else { // just assume sys_active 2744 derate_sdp_factor = soc->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0; 2745 derate_fabric_factor = soc->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100.0; 2746 2747 if (is_hvm_en) { 2748 if (is_hvm_only) 2749 derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_vm / 100.0; 2750 else 2751 derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel_and_vm / 100.0; 2752 } else { 2753 derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100.0; 2754 } 2755 } 2756 } 2757 2758 derate_sdp_bandwidth = ideal_sdp_bandwidth * derate_sdp_factor; 2759 derate_fabric_bandwidth = ideal_fabric_bandwidth * derate_fabric_factor; 2760 derate_dram_bandwidth = ideal_dram_bandwidth * derate_dram_factor; 2761 2762 if (bw_type == dml2_core_internal_bw_sdp) 2763 return_bw_mbps = math_min2(derate_sdp_bandwidth, derate_fabric_bandwidth); 2764 else // dml2_core_internal_bw_dram 2765 return_bw_mbps = derate_dram_bandwidth; 2766 2767 DML_LOG_VERBOSE("DML::%s: is_avg_bw = %u\n", __func__, is_avg_bw); 2768 DML_LOG_VERBOSE("DML::%s: is_hvm_en = %u\n", __func__, is_hvm_en); 2769 DML_LOG_VERBOSE("DML::%s: is_hvm_only = %u\n", __func__, is_hvm_only); 2770 DML_LOG_VERBOSE("DML::%s: state_type = %s\n", __func__, dml2_core_internal_soc_state_type_str(state_type)); 2771 DML_LOG_VERBOSE("DML::%s: bw_type = %s\n", __func__, dml2_core_internal_bw_type_str(bw_type)); 2772 DML_LOG_VERBOSE("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz); 2773 DML_LOG_VERBOSE("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz); 2774 DML_LOG_VERBOSE("DML::%s: ideal_sdp_bandwidth = %f\n", __func__, ideal_sdp_bandwidth); 2775 DML_LOG_VERBOSE("DML::%s: ideal_fabric_bandwidth = %f\n", __func__, ideal_fabric_bandwidth); 2776 DML_LOG_VERBOSE("DML::%s: ideal_dram_bandwidth = %f\n", __func__, ideal_dram_bandwidth); 2777 DML_LOG_VERBOSE("DML::%s: derate_sdp_bandwidth = %f (derate %f)\n", __func__, derate_sdp_bandwidth, derate_sdp_factor); 2778 DML_LOG_VERBOSE("DML::%s: derate_fabric_bandwidth = %f (derate %f)\n", __func__, derate_fabric_bandwidth, derate_fabric_factor); 2779 DML_LOG_VERBOSE("DML::%s: derate_dram_bandwidth = %f (derate %f)\n", __func__, derate_dram_bandwidth, derate_dram_factor); 2780 DML_LOG_VERBOSE("DML::%s: return_bw_mbps = %f\n", __func__, return_bw_mbps); 2781 return return_bw_mbps; 2782 } 2783 2784 static noinline_for_stack void calculate_bandwidth_available( 2785 double avg_bandwidth_available_min[dml2_core_internal_soc_state_max], 2786 double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], 2787 double urg_bandwidth_available_min[dml2_core_internal_soc_state_max], // min between SDP and DRAM 2788 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], 2789 double urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_max], 2790 double urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_max], 2791 2792 const struct dml2_soc_bb *soc, 2793 bool HostVMEnable, 2794 double dcfclk_mhz, 2795 double fclk_mhz, 2796 double dram_bw_mbps) 2797 { 2798 unsigned int n, m; 2799 2800 DML_LOG_VERBOSE("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz); 2801 DML_LOG_VERBOSE("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz); 2802 DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, dram_bw_mbps); 2803 2804 // Calculate all the bandwidth availabe 2805 for (m = 0; m < dml2_core_internal_soc_state_max; m++) { 2806 for (n = 0; n < dml2_core_internal_bw_max; n++) { 2807 avg_bandwidth_available[m][n] = dml_get_return_bandwidth_available(soc, 2808 m, // soc_state 2809 n, // bw_type 2810 1, // avg_bw 2811 HostVMEnable, 2812 0, // hvm_only 2813 dcfclk_mhz, 2814 fclk_mhz, 2815 dram_bw_mbps); 2816 2817 urg_bandwidth_available[m][n] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 0, dcfclk_mhz, fclk_mhz, dram_bw_mbps); 2818 2819 2820 #ifdef __DML_VBA_DEBUG__ 2821 DML_LOG_VERBOSE("DML::%s: avg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), avg_bandwidth_available[m][n]); 2822 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_bandwidth_available[m][n]); 2823 #endif 2824 2825 // urg_bandwidth_available_vm_only is indexed by soc_state 2826 if (n == dml2_core_internal_bw_dram) { 2827 urg_bandwidth_available_vm_only[m] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 1, dcfclk_mhz, fclk_mhz, dram_bw_mbps); 2828 urg_bandwidth_available_pixel_and_vm[m] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 0, dcfclk_mhz, fclk_mhz, dram_bw_mbps); 2829 } 2830 } 2831 2832 avg_bandwidth_available_min[m] = math_min2(avg_bandwidth_available[m][dml2_core_internal_bw_dram], avg_bandwidth_available[m][dml2_core_internal_bw_sdp]); 2833 urg_bandwidth_available_min[m] = math_min2(urg_bandwidth_available[m][dml2_core_internal_bw_dram], urg_bandwidth_available[m][dml2_core_internal_bw_sdp]); 2834 2835 #ifdef __DML_VBA_DEBUG__ 2836 DML_LOG_VERBOSE("DML::%s: avg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), avg_bandwidth_available_min[m]); 2837 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_min[m]); 2838 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_vm_only[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_vm_only[n]); 2839 #endif 2840 } 2841 } 2842 2843 static void calculate_avg_bandwidth_required( 2844 double avg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], 2845 2846 // input 2847 const struct dml2_display_cfg *display_cfg, 2848 unsigned int num_active_planes, 2849 double ReadBandwidthLuma[], 2850 double ReadBandwidthChroma[], 2851 double cursor_bw[], 2852 double dcc_dram_bw_nom_overhead_factor_p0[], 2853 double dcc_dram_bw_nom_overhead_factor_p1[], 2854 double mall_prefetch_dram_overhead_factor[], 2855 double mall_prefetch_sdp_overhead_factor[]) 2856 { 2857 unsigned int n, m, k; 2858 double sdp_overhead_factor; 2859 double dram_overhead_factor_p0; 2860 double dram_overhead_factor_p1; 2861 2862 // Average BW support check 2863 for (m = 0; m < dml2_core_internal_soc_state_max; m++) { 2864 for (n = 0; n < dml2_core_internal_bw_max; n++) { // sdp, dram 2865 avg_bandwidth_required[m][n] = 0; 2866 } 2867 } 2868 2869 // SysActive and SVP Prefetch AVG bandwidth Check 2870 for (k = 0; k < num_active_planes; ++k) { 2871 #ifdef __DML_VBA_DEBUG__ 2872 DML_LOG_VERBOSE("DML::%s: plane %0d\n", __func__, k); 2873 DML_LOG_VERBOSE("DML::%s: ReadBandwidthLuma=%f\n", __func__, ReadBandwidthLuma[k]); 2874 DML_LOG_VERBOSE("DML::%s: ReadBandwidthChroma=%f\n", __func__, ReadBandwidthChroma[k]); 2875 DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor_p0=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p0[k]); 2876 DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor_p1=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p1[k]); 2877 DML_LOG_VERBOSE("DML::%s: mall_prefetch_dram_overhead_factor=%f\n", __func__, mall_prefetch_dram_overhead_factor[k]); 2878 DML_LOG_VERBOSE("DML::%s: mall_prefetch_sdp_overhead_factor=%f\n", __func__, mall_prefetch_sdp_overhead_factor[k]); 2879 #endif 2880 2881 sdp_overhead_factor = mall_prefetch_sdp_overhead_factor[k]; 2882 dram_overhead_factor_p0 = dcc_dram_bw_nom_overhead_factor_p0[k] * mall_prefetch_dram_overhead_factor[k]; 2883 dram_overhead_factor_p1 = dcc_dram_bw_nom_overhead_factor_p1[k] * mall_prefetch_dram_overhead_factor[k]; 2884 2885 // FIXME_DCN4, was missing cursor_bw in here, but do I actually need that and tdlut bw for average bandwidth calculation? 2886 // active avg bw not include phantom, but svp_prefetch avg bw should include phantom pipes 2887 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { 2888 avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] += sdp_overhead_factor * (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) + cursor_bw[k]; 2889 avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k]; 2890 } 2891 avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] += sdp_overhead_factor * (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) + cursor_bw[k]; 2892 avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k]; 2893 2894 #ifdef __DML_VBA_DEBUG__ 2895 DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); 2896 DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); 2897 DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); 2898 DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); 2899 #endif 2900 } 2901 } 2902 2903 static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch, 2904 struct dml2_core_calcs_CalculateVMRowAndSwath_params *p) 2905 { 2906 struct dml2_core_calcs_CalculateVMRowAndSwath_locals *s = &scratch->CalculateVMRowAndSwath_locals; 2907 2908 s->HostVMDynamicLevels = CalculateHostVMDynamicLevels(p->display_cfg->gpuvm_enable, p->display_cfg->hostvm_enable, p->HostVMMinPageSize, p->display_cfg->hostvm_max_non_cached_page_table_levels); 2909 2910 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { 2911 if (p->display_cfg->gpuvm_enable == true) { 2912 p->vm_group_bytes[k] = 512; 2913 p->dpte_group_bytes[k] = 512; 2914 } else { 2915 p->vm_group_bytes[k] = 0; 2916 p->dpte_group_bytes[k] = 0; 2917 } 2918 2919 if (dml_is_420(p->myPipe[k].SourcePixelFormat) || p->myPipe[k].SourcePixelFormat == dml2_rgbe_alpha) { 2920 if ((p->myPipe[k].SourcePixelFormat == dml2_420_10 || p->myPipe[k].SourcePixelFormat == dml2_420_12) && !dml_is_vertical_rotation(p->myPipe[k].RotationAngle)) { 2921 s->PTEBufferSizeInRequestsForLuma[k] = (p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma) / 2; 2922 s->PTEBufferSizeInRequestsForChroma[k] = s->PTEBufferSizeInRequestsForLuma[k]; 2923 } else { 2924 s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma; 2925 s->PTEBufferSizeInRequestsForChroma[k] = p->PTEBufferSizeInRequestsChroma; 2926 } 2927 2928 scratch->calculate_vm_and_row_bytes_params.ViewportStationary = p->myPipe[k].ViewportStationary; 2929 scratch->calculate_vm_and_row_bytes_params.DCCEnable = p->myPipe[k].DCCEnable; 2930 scratch->calculate_vm_and_row_bytes_params.NumberOfDPPs = p->myPipe[k].DPPPerSurface; 2931 scratch->calculate_vm_and_row_bytes_params.BlockHeight256Bytes = p->myPipe[k].BlockHeight256BytesC; 2932 scratch->calculate_vm_and_row_bytes_params.BlockWidth256Bytes = p->myPipe[k].BlockWidth256BytesC; 2933 scratch->calculate_vm_and_row_bytes_params.SourcePixelFormat = p->myPipe[k].SourcePixelFormat; 2934 scratch->calculate_vm_and_row_bytes_params.SurfaceTiling = p->myPipe[k].SurfaceTiling; 2935 scratch->calculate_vm_and_row_bytes_params.BytePerPixel = p->myPipe[k].BytePerPixelC; 2936 scratch->calculate_vm_and_row_bytes_params.RotationAngle = p->myPipe[k].RotationAngle; 2937 scratch->calculate_vm_and_row_bytes_params.SwathWidth = p->SwathWidthC[k]; 2938 scratch->calculate_vm_and_row_bytes_params.ViewportHeight = p->myPipe[k].ViewportHeightC; 2939 scratch->calculate_vm_and_row_bytes_params.ViewportXStart = p->myPipe[k].ViewportXStartC; 2940 scratch->calculate_vm_and_row_bytes_params.ViewportYStart = p->myPipe[k].ViewportYStartC; 2941 scratch->calculate_vm_and_row_bytes_params.GPUVMEnable = p->display_cfg->gpuvm_enable; 2942 scratch->calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = p->display_cfg->gpuvm_max_page_table_levels; 2943 scratch->calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; 2944 scratch->calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = s->PTEBufferSizeInRequestsForChroma[k]; 2945 scratch->calculate_vm_and_row_bytes_params.Pitch = p->myPipe[k].PitchC; 2946 scratch->calculate_vm_and_row_bytes_params.MacroTileWidth = p->myPipe[k].BlockWidthC; 2947 scratch->calculate_vm_and_row_bytes_params.MacroTileHeight = p->myPipe[k].BlockHeightC; 2948 scratch->calculate_vm_and_row_bytes_params.is_phantom = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]); 2949 scratch->calculate_vm_and_row_bytes_params.DCCMetaPitch = p->myPipe[k].DCCMetaPitchC; 2950 scratch->calculate_vm_and_row_bytes_params.mrq_present = p->mrq_present; 2951 2952 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &s->PixelPTEBytesPerRowC[k]; 2953 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &s->PixelPTEBytesPerRowStorageC[k]; 2954 scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub = &p->dpte_row_width_chroma_ub[k]; 2955 scratch->calculate_vm_and_row_bytes_params.dpte_row_height = &p->dpte_row_height_chroma[k]; 2956 scratch->calculate_vm_and_row_bytes_params.dpte_row_height_linear = &p->dpte_row_height_linear_chroma[k]; 2957 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &s->PixelPTEBytesPerRowC_one_row_per_frame[k]; 2958 scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &s->dpte_row_width_chroma_ub_one_row_per_frame[k]; 2959 scratch->calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &s->dpte_row_height_chroma_one_row_per_frame[k]; 2960 scratch->calculate_vm_and_row_bytes_params.vmpg_width = &p->vmpg_width_c[k]; 2961 scratch->calculate_vm_and_row_bytes_params.vmpg_height = &p->vmpg_height_c[k]; 2962 scratch->calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &p->PixelPTEReqWidthC[k]; 2963 scratch->calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &p->PixelPTEReqHeightC[k]; 2964 scratch->calculate_vm_and_row_bytes_params.PTERequestSize = &p->PTERequestSizeC[k]; 2965 scratch->calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &p->dpde0_bytes_per_frame_ub_c[k]; 2966 2967 scratch->calculate_vm_and_row_bytes_params.meta_row_bytes = &s->meta_row_bytes_per_row_ub_c[k]; 2968 scratch->calculate_vm_and_row_bytes_params.MetaRequestWidth = &p->meta_req_width_chroma[k]; 2969 scratch->calculate_vm_and_row_bytes_params.MetaRequestHeight = &p->meta_req_height_chroma[k]; 2970 scratch->calculate_vm_and_row_bytes_params.meta_row_width = &p->meta_row_width_chroma[k]; 2971 scratch->calculate_vm_and_row_bytes_params.meta_row_height = &p->meta_row_height_chroma[k]; 2972 scratch->calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &p->meta_pte_bytes_per_frame_ub_c[k]; 2973 2974 s->vm_bytes_c = CalculateVMAndRowBytes(&scratch->calculate_vm_and_row_bytes_params); 2975 2976 p->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( 2977 p->myPipe[k].VRatioChroma, 2978 p->myPipe[k].VTapsChroma, 2979 p->myPipe[k].InterlaceEnable, 2980 p->myPipe[k].ProgressiveToInterlaceUnitInOPP, 2981 p->myPipe[k].SwathHeightC, 2982 p->myPipe[k].RotationAngle, 2983 p->myPipe[k].mirrored, 2984 p->myPipe[k].ViewportStationary, 2985 p->SwathWidthC[k], 2986 p->myPipe[k].ViewportHeightC, 2987 p->myPipe[k].ViewportXStartC, 2988 p->myPipe[k].ViewportYStartC, 2989 2990 // Output 2991 &p->VInitPreFillC[k], 2992 &p->MaxNumSwathC[k]); 2993 } else { 2994 s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma; 2995 s->PTEBufferSizeInRequestsForChroma[k] = 0; 2996 s->PixelPTEBytesPerRowC[k] = 0; 2997 s->PixelPTEBytesPerRowStorageC[k] = 0; 2998 s->vm_bytes_c = 0; 2999 p->MaxNumSwathC[k] = 0; 3000 p->PrefetchSourceLinesC[k] = 0; 3001 s->dpte_row_height_chroma_one_row_per_frame[k] = 0; 3002 s->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; 3003 s->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; 3004 } 3005 3006 scratch->calculate_vm_and_row_bytes_params.ViewportStationary = p->myPipe[k].ViewportStationary; 3007 scratch->calculate_vm_and_row_bytes_params.DCCEnable = p->myPipe[k].DCCEnable; 3008 scratch->calculate_vm_and_row_bytes_params.NumberOfDPPs = p->myPipe[k].DPPPerSurface; 3009 scratch->calculate_vm_and_row_bytes_params.BlockHeight256Bytes = p->myPipe[k].BlockHeight256BytesY; 3010 scratch->calculate_vm_and_row_bytes_params.BlockWidth256Bytes = p->myPipe[k].BlockWidth256BytesY; 3011 scratch->calculate_vm_and_row_bytes_params.SourcePixelFormat = p->myPipe[k].SourcePixelFormat; 3012 scratch->calculate_vm_and_row_bytes_params.SurfaceTiling = p->myPipe[k].SurfaceTiling; 3013 scratch->calculate_vm_and_row_bytes_params.BytePerPixel = p->myPipe[k].BytePerPixelY; 3014 scratch->calculate_vm_and_row_bytes_params.RotationAngle = p->myPipe[k].RotationAngle; 3015 scratch->calculate_vm_and_row_bytes_params.SwathWidth = p->SwathWidthY[k]; 3016 scratch->calculate_vm_and_row_bytes_params.ViewportHeight = p->myPipe[k].ViewportHeight; 3017 scratch->calculate_vm_and_row_bytes_params.ViewportXStart = p->myPipe[k].ViewportXStart; 3018 scratch->calculate_vm_and_row_bytes_params.ViewportYStart = p->myPipe[k].ViewportYStart; 3019 scratch->calculate_vm_and_row_bytes_params.GPUVMEnable = p->display_cfg->gpuvm_enable; 3020 scratch->calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = p->display_cfg->gpuvm_max_page_table_levels; 3021 scratch->calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; 3022 scratch->calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = s->PTEBufferSizeInRequestsForLuma[k]; 3023 scratch->calculate_vm_and_row_bytes_params.Pitch = p->myPipe[k].PitchY; 3024 scratch->calculate_vm_and_row_bytes_params.MacroTileWidth = p->myPipe[k].BlockWidthY; 3025 scratch->calculate_vm_and_row_bytes_params.MacroTileHeight = p->myPipe[k].BlockHeightY; 3026 scratch->calculate_vm_and_row_bytes_params.is_phantom = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]); 3027 scratch->calculate_vm_and_row_bytes_params.DCCMetaPitch = p->myPipe[k].DCCMetaPitchY; 3028 scratch->calculate_vm_and_row_bytes_params.mrq_present = p->mrq_present; 3029 3030 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &s->PixelPTEBytesPerRowY[k]; 3031 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &s->PixelPTEBytesPerRowStorageY[k]; 3032 scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub = &p->dpte_row_width_luma_ub[k]; 3033 scratch->calculate_vm_and_row_bytes_params.dpte_row_height = &p->dpte_row_height_luma[k]; 3034 scratch->calculate_vm_and_row_bytes_params.dpte_row_height_linear = &p->dpte_row_height_linear_luma[k]; 3035 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &s->PixelPTEBytesPerRowY_one_row_per_frame[k]; 3036 scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &s->dpte_row_width_luma_ub_one_row_per_frame[k]; 3037 scratch->calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &s->dpte_row_height_luma_one_row_per_frame[k]; 3038 scratch->calculate_vm_and_row_bytes_params.vmpg_width = &p->vmpg_width_y[k]; 3039 scratch->calculate_vm_and_row_bytes_params.vmpg_height = &p->vmpg_height_y[k]; 3040 scratch->calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &p->PixelPTEReqWidthY[k]; 3041 scratch->calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &p->PixelPTEReqHeightY[k]; 3042 scratch->calculate_vm_and_row_bytes_params.PTERequestSize = &p->PTERequestSizeY[k]; 3043 scratch->calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &p->dpde0_bytes_per_frame_ub_l[k]; 3044 3045 scratch->calculate_vm_and_row_bytes_params.meta_row_bytes = &s->meta_row_bytes_per_row_ub_l[k]; 3046 scratch->calculate_vm_and_row_bytes_params.MetaRequestWidth = &p->meta_req_width_luma[k]; 3047 scratch->calculate_vm_and_row_bytes_params.MetaRequestHeight = &p->meta_req_height_luma[k]; 3048 scratch->calculate_vm_and_row_bytes_params.meta_row_width = &p->meta_row_width_luma[k]; 3049 scratch->calculate_vm_and_row_bytes_params.meta_row_height = &p->meta_row_height_luma[k]; 3050 scratch->calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &p->meta_pte_bytes_per_frame_ub_l[k]; 3051 3052 s->vm_bytes_l = CalculateVMAndRowBytes(&scratch->calculate_vm_and_row_bytes_params); 3053 3054 p->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( 3055 p->myPipe[k].VRatio, 3056 p->myPipe[k].VTaps, 3057 p->myPipe[k].InterlaceEnable, 3058 p->myPipe[k].ProgressiveToInterlaceUnitInOPP, 3059 p->myPipe[k].SwathHeightY, 3060 p->myPipe[k].RotationAngle, 3061 p->myPipe[k].mirrored, 3062 p->myPipe[k].ViewportStationary, 3063 p->SwathWidthY[k], 3064 p->myPipe[k].ViewportHeight, 3065 p->myPipe[k].ViewportXStart, 3066 p->myPipe[k].ViewportYStart, 3067 3068 // Output 3069 &p->VInitPreFillY[k], 3070 &p->MaxNumSwathY[k]); 3071 3072 #ifdef __DML_VBA_DEBUG__ 3073 DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes_l = %u (before hvm level)\n", __func__, k, s->vm_bytes_l); 3074 DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes_c = %u (before hvm level)\n", __func__, k, s->vm_bytes_c); 3075 DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes_per_row_ub_l = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_l[k]); 3076 DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes_per_row_ub_c = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_c[k]); 3077 #endif 3078 p->vm_bytes[k] = (s->vm_bytes_l + s->vm_bytes_c) * (1 + 8 * s->HostVMDynamicLevels); 3079 p->meta_row_bytes[k] = s->meta_row_bytes_per_row_ub_l[k] + s->meta_row_bytes_per_row_ub_c[k]; 3080 p->meta_row_bytes_per_row_ub_l[k] = s->meta_row_bytes_per_row_ub_l[k]; 3081 p->meta_row_bytes_per_row_ub_c[k] = s->meta_row_bytes_per_row_ub_c[k]; 3082 3083 #ifdef __DML_VBA_DEBUG__ 3084 DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes = %u\n", __func__, k, p->meta_row_bytes[k]); 3085 DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes = %u (after hvm level)\n", __func__, k, p->vm_bytes[k]); 3086 #endif 3087 if (s->PixelPTEBytesPerRowStorageY[k] <= 64 * s->PTEBufferSizeInRequestsForLuma[k] && s->PixelPTEBytesPerRowStorageC[k] <= 64 * s->PTEBufferSizeInRequestsForChroma[k]) { 3088 p->PTEBufferSizeNotExceeded[k] = true; 3089 } else { 3090 p->PTEBufferSizeNotExceeded[k] = false; 3091 } 3092 3093 s->one_row_per_frame_fits_in_buffer[k] = (s->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForLuma[k] && 3094 s->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForChroma[k]); 3095 #ifdef __DML_VBA_DEBUG__ 3096 if (p->PTEBufferSizeNotExceeded[k] == 0 || s->one_row_per_frame_fits_in_buffer[k] == 0) { 3097 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]); 3098 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]); 3099 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowStorageY = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageY[k]); 3100 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowStorageC = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageC[k]); 3101 DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeInRequestsForLuma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForLuma[k]); 3102 DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeInRequestsForChroma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForChroma[k]); 3103 DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded (not one_row_per_frame) = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]); 3104 3105 DML_LOG_VERBOSE("DML::%s: k=%u, HostVMDynamicLevels = %u\n", __func__, k, s->HostVMDynamicLevels); 3106 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowY_one_row_per_frame[k]); 3107 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowC_one_row_per_frame[k]); 3108 DML_LOG_VERBOSE("DML::%s: k=%u, one_row_per_frame_fits_in_buffer = %u\n", __func__, k, s->one_row_per_frame_fits_in_buffer[k]); 3109 } 3110 #endif 3111 } 3112 3113 CalculateMALLUseForStaticScreen( 3114 p->display_cfg, 3115 p->NumberOfActiveSurfaces, 3116 p->MALLAllocatedForDCN, 3117 p->SurfaceSizeInMALL, 3118 s->one_row_per_frame_fits_in_buffer, 3119 // Output 3120 p->is_using_mall_for_ss); 3121 3122 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { 3123 if (p->display_cfg->gpuvm_enable) { 3124 if (p->display_cfg->plane_descriptors[k].overrides.hw.force_pte_buffer_mode.enable == 1) { 3125 p->PTE_BUFFER_MODE[k] = p->display_cfg->plane_descriptors[k].overrides.hw.force_pte_buffer_mode.value; 3126 } 3127 p->PTE_BUFFER_MODE[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) || 3128 dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64); 3129 p->BIGK_FRAGMENT_SIZE[k] = (unsigned int)(math_log((float)p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes * 1024, 2) - 12); 3130 } else { 3131 p->PTE_BUFFER_MODE[k] = 0; 3132 p->BIGK_FRAGMENT_SIZE[k] = 0; 3133 } 3134 } 3135 3136 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { 3137 p->DCCMetaBufferSizeNotExceeded[k] = true; 3138 #ifdef __DML_VBA_DEBUG__ 3139 DML_LOG_VERBOSE("DML::%s: k=%u, SurfaceSizeInMALL = %u\n", __func__, k, p->SurfaceSizeInMALL[k]); 3140 DML_LOG_VERBOSE("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, p->is_using_mall_for_ss[k]); 3141 #endif 3142 p->use_one_row_for_frame[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) || 3143 (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64 && dml_is_vertical_rotation(p->myPipe[k].RotationAngle)); 3144 3145 p->use_one_row_for_frame_flip[k] = p->use_one_row_for_frame[k] && !(p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame); 3146 3147 if (p->use_one_row_for_frame[k]) { 3148 p->dpte_row_height_luma[k] = s->dpte_row_height_luma_one_row_per_frame[k]; 3149 p->dpte_row_width_luma_ub[k] = s->dpte_row_width_luma_ub_one_row_per_frame[k]; 3150 s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY_one_row_per_frame[k]; 3151 p->dpte_row_height_chroma[k] = s->dpte_row_height_chroma_one_row_per_frame[k]; 3152 p->dpte_row_width_chroma_ub[k] = s->dpte_row_width_chroma_ub_one_row_per_frame[k]; 3153 s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC_one_row_per_frame[k]; 3154 p->PTEBufferSizeNotExceeded[k] = s->one_row_per_frame_fits_in_buffer[k]; 3155 } 3156 3157 if (p->meta_row_bytes[k] <= p->DCCMetaBufferSizeBytes) { 3158 p->DCCMetaBufferSizeNotExceeded[k] = true; 3159 } else { 3160 p->DCCMetaBufferSizeNotExceeded[k] = false; 3161 3162 #ifdef __DML_VBA_DEBUG__ 3163 DML_LOG_VERBOSE("DML::%s: k=%d, meta_row_bytes = %d\n", __func__, k, p->meta_row_bytes[k]); 3164 DML_LOG_VERBOSE("DML::%s: k=%d, DCCMetaBufferSizeBytes = %d\n", __func__, k, p->DCCMetaBufferSizeBytes); 3165 DML_LOG_VERBOSE("DML::%s: k=%d, DCCMetaBufferSizeNotExceeded = %d\n", __func__, k, p->DCCMetaBufferSizeNotExceeded[k]); 3166 #endif 3167 } 3168 3169 s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY[k] * (1 + 8 * s->HostVMDynamicLevels); 3170 s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC[k] * (1 + 8 * s->HostVMDynamicLevels); 3171 p->PixelPTEBytesPerRow[k] = s->PixelPTEBytesPerRowY[k] + s->PixelPTEBytesPerRowC[k]; 3172 p->dpte_row_bytes_per_row_l[k] = s->PixelPTEBytesPerRowY[k]; 3173 p->dpte_row_bytes_per_row_c[k] = s->PixelPTEBytesPerRowC[k]; 3174 3175 // if one row of dPTEs is meant to span the entire frame, then for these calculations, we will pretend like that one big row is fetched in two halfs 3176 if (p->use_one_row_for_frame[k]) 3177 p->PixelPTEBytesPerRow[k] = p->PixelPTEBytesPerRow[k] / 2; 3178 3179 CalculateRowBandwidth( 3180 p->display_cfg->gpuvm_enable, 3181 p->use_one_row_for_frame[k], 3182 p->myPipe[k].SourcePixelFormat, 3183 p->myPipe[k].VRatio, 3184 p->myPipe[k].VRatioChroma, 3185 p->myPipe[k].DCCEnable, 3186 p->myPipe[k].HTotal / p->myPipe[k].PixelClock, 3187 s->PixelPTEBytesPerRowY[k], 3188 s->PixelPTEBytesPerRowC[k], 3189 p->dpte_row_height_luma[k], 3190 p->dpte_row_height_chroma[k], 3191 3192 p->mrq_present, 3193 p->meta_row_bytes_per_row_ub_l[k], 3194 p->meta_row_bytes_per_row_ub_c[k], 3195 p->meta_row_height_luma[k], 3196 p->meta_row_height_chroma[k], 3197 3198 // Output 3199 &p->dpte_row_bw[k], 3200 &p->meta_row_bw[k]); 3201 #ifdef __DML_VBA_DEBUG__ 3202 DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); 3203 DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame_flip = %u\n", __func__, k, p->use_one_row_for_frame_flip[k]); 3204 DML_LOG_VERBOSE("DML::%s: k=%u, UseMALLForPStateChange = %u\n", __func__, k, p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config); 3205 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_height_luma = %u\n", __func__, k, p->dpte_row_height_luma[k]); 3206 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]); 3207 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]); 3208 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_height_chroma = %u\n", __func__, k, p->dpte_row_height_chroma[k]); 3209 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); 3210 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]); 3211 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRow = %u\n", __func__, k, p->PixelPTEBytesPerRow[k]); 3212 DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]); 3213 DML_LOG_VERBOSE("DML::%s: k=%u, gpuvm_enable = %u\n", __func__, k, p->display_cfg->gpuvm_enable); 3214 DML_LOG_VERBOSE("DML::%s: k=%u, PTE_BUFFER_MODE = %u\n", __func__, k, p->PTE_BUFFER_MODE[k]); 3215 DML_LOG_VERBOSE("DML::%s: k=%u, BIGK_FRAGMENT_SIZE = %u\n", __func__, k, p->BIGK_FRAGMENT_SIZE[k]); 3216 #endif 3217 } 3218 } 3219 3220 static double CalculateUrgentLatency( 3221 double UrgentLatencyPixelDataOnly, 3222 double UrgentLatencyPixelMixedWithVMData, 3223 double UrgentLatencyVMDataOnly, 3224 bool DoUrgentLatencyAdjustment, 3225 double UrgentLatencyAdjustmentFabricClockComponent, 3226 double UrgentLatencyAdjustmentFabricClockReference, 3227 double FabricClock, 3228 double uclk_freq_mhz, 3229 enum dml2_qos_param_type qos_type, 3230 unsigned int urgent_ramp_uclk_cycles, 3231 unsigned int df_qos_response_time_fclk_cycles, 3232 unsigned int max_round_trip_to_furthest_cs_fclk_cycles, 3233 unsigned int mall_overhead_fclk_cycles, 3234 double umc_urgent_ramp_latency_margin, 3235 double fabric_max_transport_latency_margin) 3236 { 3237 double urgent_latency = 0; 3238 if (qos_type == dml2_qos_param_type_dcn4x) { 3239 urgent_latency = (df_qos_response_time_fclk_cycles + mall_overhead_fclk_cycles) / FabricClock 3240 + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1 + fabric_max_transport_latency_margin / 100.0) 3241 + urgent_ramp_uclk_cycles / uclk_freq_mhz * (1 + umc_urgent_ramp_latency_margin / 100.0); 3242 } else { 3243 urgent_latency = math_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); 3244 if (DoUrgentLatencyAdjustment == true) { 3245 urgent_latency = urgent_latency + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); 3246 } 3247 } 3248 #ifdef __DML_VBA_DEBUG__ 3249 if (qos_type == dml2_qos_param_type_dcn4x) { 3250 DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type); 3251 DML_LOG_VERBOSE("DML::%s: urgent_ramp_uclk_cycles = %d\n", __func__, urgent_ramp_uclk_cycles); 3252 DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); 3253 DML_LOG_VERBOSE("DML::%s: umc_urgent_ramp_latency_margin = %f\n", __func__, umc_urgent_ramp_latency_margin); 3254 } else { 3255 DML_LOG_VERBOSE("DML::%s: UrgentLatencyPixelDataOnly = %f\n", __func__, UrgentLatencyPixelDataOnly); 3256 DML_LOG_VERBOSE("DML::%s: UrgentLatencyPixelMixedWithVMData = %f\n", __func__, UrgentLatencyPixelMixedWithVMData); 3257 DML_LOG_VERBOSE("DML::%s: UrgentLatencyVMDataOnly = %f\n", __func__, UrgentLatencyVMDataOnly); 3258 DML_LOG_VERBOSE("DML::%s: UrgentLatencyAdjustmentFabricClockComponent = %f\n", __func__, UrgentLatencyAdjustmentFabricClockComponent); 3259 DML_LOG_VERBOSE("DML::%s: UrgentLatencyAdjustmentFabricClockReference = %f\n", __func__, UrgentLatencyAdjustmentFabricClockReference); 3260 } 3261 DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, FabricClock); 3262 DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, urgent_latency); 3263 #endif 3264 return urgent_latency; 3265 } 3266 3267 static double CalculateTripToMemory( 3268 double UrgLatency, 3269 double FabricClock, 3270 double uclk_freq_mhz, 3271 enum dml2_qos_param_type qos_type, 3272 unsigned int trip_to_memory_uclk_cycles, 3273 unsigned int max_round_trip_to_furthest_cs_fclk_cycles, 3274 unsigned int mall_overhead_fclk_cycles, 3275 double umc_max_latency_margin, 3276 double fabric_max_transport_latency_margin) 3277 { 3278 double trip_to_memory_us; 3279 if (qos_type == dml2_qos_param_type_dcn4x) { 3280 trip_to_memory_us = mall_overhead_fclk_cycles / FabricClock 3281 + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0) 3282 + trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0); 3283 } else { 3284 trip_to_memory_us = UrgLatency; 3285 } 3286 3287 #ifdef __DML_VBA_DEBUG__ 3288 if (qos_type == dml2_qos_param_type_dcn4x) { 3289 DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type); 3290 DML_LOG_VERBOSE("DML::%s: max_round_trip_to_furthest_cs_fclk_cycles = %d\n", __func__, max_round_trip_to_furthest_cs_fclk_cycles); 3291 DML_LOG_VERBOSE("DML::%s: mall_overhead_fclk_cycles = %d\n", __func__, mall_overhead_fclk_cycles); 3292 DML_LOG_VERBOSE("DML::%s: trip_to_memory_uclk_cycles = %d\n", __func__, trip_to_memory_uclk_cycles); 3293 DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); 3294 DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, FabricClock); 3295 DML_LOG_VERBOSE("DML::%s: fabric_max_transport_latency_margin = %f\n", __func__, fabric_max_transport_latency_margin); 3296 DML_LOG_VERBOSE("DML::%s: umc_max_latency_margin = %f\n", __func__, umc_max_latency_margin); 3297 } else { 3298 DML_LOG_VERBOSE("DML::%s: UrgLatency = %f\n", __func__, UrgLatency); 3299 } 3300 DML_LOG_VERBOSE("DML::%s: trip_to_memory_us = %f\n", __func__, trip_to_memory_us); 3301 #endif 3302 3303 3304 return trip_to_memory_us; 3305 } 3306 3307 static double CalculateMetaTripToMemory( 3308 double UrgLatency, 3309 double FabricClock, 3310 double uclk_freq_mhz, 3311 enum dml2_qos_param_type qos_type, 3312 unsigned int meta_trip_to_memory_uclk_cycles, 3313 unsigned int meta_trip_to_memory_fclk_cycles, 3314 double umc_max_latency_margin, 3315 double fabric_max_transport_latency_margin) 3316 { 3317 double meta_trip_to_memory_us; 3318 if (qos_type == dml2_qos_param_type_dcn4x) { 3319 meta_trip_to_memory_us = meta_trip_to_memory_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0) 3320 + meta_trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0); 3321 } else { 3322 meta_trip_to_memory_us = UrgLatency; 3323 } 3324 3325 #ifdef __DML_VBA_DEBUG__ 3326 if (qos_type == dml2_qos_param_type_dcn4x) { 3327 DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type); 3328 DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_fclk_cycles = %d\n", __func__, meta_trip_to_memory_fclk_cycles); 3329 DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_uclk_cycles = %d\n", __func__, meta_trip_to_memory_uclk_cycles); 3330 DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); 3331 } else { 3332 DML_LOG_VERBOSE("DML::%s: UrgLatency = %f\n", __func__, UrgLatency); 3333 } 3334 DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_us = %f\n", __func__, meta_trip_to_memory_us); 3335 #endif 3336 3337 3338 return meta_trip_to_memory_us; 3339 } 3340 3341 static void calculate_cursor_req_attributes( 3342 unsigned int cursor_width, 3343 unsigned int cursor_bpp, 3344 3345 // output 3346 unsigned int *cursor_lines_per_chunk, 3347 unsigned int *cursor_bytes_per_line, 3348 unsigned int *cursor_bytes_per_chunk, 3349 unsigned int *cursor_bytes) 3350 { 3351 unsigned int cursor_bytes_per_req = 0; 3352 unsigned int cursor_width_bytes = 0; 3353 unsigned int cursor_height = 0; 3354 3355 //SW determines the cursor pitch to support the maximum cursor_width that will be used but the following restrictions apply. 3356 //- For 2bpp, cursor_pitch = 256 pixels due to min cursor request size of 64B 3357 //- For 32 or 64 bpp, cursor_pitch = 64, 128 or 256 pixels depending on the cursor width 3358 3359 //The cursor requestor uses a cursor request size of 64B, 128B, or 256B depending on the cursor_width and cursor_bpp as follows. 3360 3361 cursor_width_bytes = (unsigned int)math_ceil2((double)cursor_width * cursor_bpp / 8, 1); 3362 if (cursor_width_bytes <= 64) 3363 cursor_bytes_per_req = 64; 3364 else if (cursor_width_bytes <= 128) 3365 cursor_bytes_per_req = 128; 3366 else 3367 cursor_bytes_per_req = 256; 3368 3369 //If cursor_width_bytes is greater than 256B, then multiple 256B requests are issued to fetch the entire cursor line. 3370 *cursor_bytes_per_line = (unsigned int)math_ceil2((double)cursor_width_bytes, cursor_bytes_per_req); 3371 3372 //Nominally, the cursor chunk is 1KB or 2KB but it is restricted to a power of 2 number of lines with a maximum of 16 lines. 3373 if (cursor_bpp == 2) { 3374 *cursor_lines_per_chunk = 16; 3375 } else if (cursor_bpp == 32) { 3376 if (cursor_width <= 32) 3377 *cursor_lines_per_chunk = 16; 3378 else if (cursor_width <= 64) 3379 *cursor_lines_per_chunk = 8; 3380 else if (cursor_width <= 128) 3381 *cursor_lines_per_chunk = 4; 3382 else 3383 *cursor_lines_per_chunk = 2; 3384 } else if (cursor_bpp == 64) { 3385 if (cursor_width <= 16) 3386 *cursor_lines_per_chunk = 16; 3387 else if (cursor_width <= 32) 3388 *cursor_lines_per_chunk = 8; 3389 else if (cursor_width <= 64) 3390 *cursor_lines_per_chunk = 4; 3391 else if (cursor_width <= 128) 3392 *cursor_lines_per_chunk = 2; 3393 else 3394 *cursor_lines_per_chunk = 1; 3395 } else { 3396 if (cursor_width > 0) { 3397 DML_LOG_VERBOSE("DML::%s: Invalid cursor_bpp = %d\n", __func__, cursor_bpp); 3398 DML_ASSERT(0); 3399 } 3400 } 3401 3402 *cursor_bytes_per_chunk = *cursor_bytes_per_line * *cursor_lines_per_chunk; 3403 3404 // For the cursor implementation, all requested data is stored in the return buffer. Given this fact, the cursor_bytes can be directly compared with the CursorBufferSize. 3405 // Only cursor_width is provided for worst case sizing so assume that the cursor is square 3406 cursor_height = cursor_width; 3407 *cursor_bytes = *cursor_bytes_per_line * cursor_height; 3408 #ifdef __DML_VBA_DEBUG__ 3409 DML_LOG_VERBOSE("DML::%s: cursor_bpp = %d\n", __func__, cursor_bpp); 3410 DML_LOG_VERBOSE("DML::%s: cursor_width = %d\n", __func__, cursor_width); 3411 DML_LOG_VERBOSE("DML::%s: cursor_width_bytes = %d\n", __func__, cursor_width_bytes); 3412 DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_req = %d\n", __func__, cursor_bytes_per_req); 3413 DML_LOG_VERBOSE("DML::%s: cursor_lines_per_chunk = %d\n", __func__, *cursor_lines_per_chunk); 3414 DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_line = %d\n", __func__, *cursor_bytes_per_line); 3415 DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, *cursor_bytes_per_chunk); 3416 DML_LOG_VERBOSE("DML::%s: cursor_bytes = %d\n", __func__, *cursor_bytes); 3417 DML_LOG_VERBOSE("DML::%s: cursor_pitch = %d\n", __func__, cursor_bpp == 2 ? 256 : 1U << (unsigned int)math_ceil2(math_log((float)cursor_width, 2), 1)); 3418 #endif 3419 } 3420 3421 static void calculate_cursor_urgent_burst_factor( 3422 unsigned int CursorBufferSize, 3423 unsigned int CursorWidth, 3424 unsigned int cursor_bytes_per_chunk, 3425 unsigned int cursor_lines_per_chunk, 3426 double LineTime, 3427 double UrgentLatency, 3428 3429 double *UrgentBurstFactorCursor, 3430 bool *NotEnoughUrgentLatencyHiding) 3431 { 3432 unsigned int LinesInCursorBuffer = 0; 3433 double CursorBufferSizeInTime = 0; 3434 3435 if (CursorWidth > 0) { 3436 LinesInCursorBuffer = (unsigned int)math_floor2(CursorBufferSize * 1024.0 / (double)cursor_bytes_per_chunk, 1) * cursor_lines_per_chunk; 3437 3438 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime; 3439 if (CursorBufferSizeInTime - UrgentLatency <= 0) { 3440 *NotEnoughUrgentLatencyHiding = 1; 3441 *UrgentBurstFactorCursor = 1; 3442 } else { 3443 *NotEnoughUrgentLatencyHiding = 0; 3444 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency); 3445 } 3446 3447 #ifdef __DML_VBA_DEBUG__ 3448 DML_LOG_VERBOSE("DML::%s: LinesInCursorBuffer = %u\n", __func__, LinesInCursorBuffer); 3449 DML_LOG_VERBOSE("DML::%s: CursorBufferSizeInTime = %f\n", __func__, CursorBufferSizeInTime); 3450 DML_LOG_VERBOSE("DML::%s: CursorBufferSize = %u (kbytes)\n", __func__, CursorBufferSize); 3451 DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %u\n", __func__, cursor_bytes_per_chunk); 3452 DML_LOG_VERBOSE("DML::%s: cursor_lines_per_chunk = %u\n", __func__, cursor_lines_per_chunk); 3453 DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, *UrgentBurstFactorCursor); 3454 DML_LOG_VERBOSE("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding); 3455 #endif 3456 3457 } 3458 } 3459 3460 static void CalculateUrgentBurstFactor( 3461 const struct dml2_plane_parameters *plane_cfg, 3462 unsigned int swath_width_luma_ub, 3463 unsigned int swath_width_chroma_ub, 3464 unsigned int SwathHeightY, 3465 unsigned int SwathHeightC, 3466 double LineTime, 3467 double UrgentLatency, 3468 double VRatio, 3469 double VRatioC, 3470 double BytePerPixelInDETY, 3471 double BytePerPixelInDETC, 3472 unsigned int DETBufferSizeY, 3473 unsigned int DETBufferSizeC, 3474 // Output 3475 double *UrgentBurstFactorLuma, 3476 double *UrgentBurstFactorChroma, 3477 bool *NotEnoughUrgentLatencyHiding) 3478 { 3479 double LinesInDETLuma; 3480 double LinesInDETChroma; 3481 double DETBufferSizeInTimeLuma; 3482 double DETBufferSizeInTimeChroma; 3483 3484 *NotEnoughUrgentLatencyHiding = 0; 3485 *UrgentBurstFactorLuma = 0; 3486 *UrgentBurstFactorChroma = 0; 3487 3488 #ifdef __DML_VBA_DEBUG__ 3489 DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio); 3490 DML_LOG_VERBOSE("DML::%s: VRatioC = %f\n", __func__, VRatioC); 3491 DML_LOG_VERBOSE("DML::%s: DETBufferSizeY = %d\n", __func__, DETBufferSizeY); 3492 DML_LOG_VERBOSE("DML::%s: DETBufferSizeC = %d\n", __func__, DETBufferSizeC); 3493 DML_LOG_VERBOSE("DML::%s: BytePerPixelInDETY = %f\n", __func__, BytePerPixelInDETY); 3494 DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 3495 DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, LineTime); 3496 #endif 3497 DML_ASSERT(VRatio > 0); 3498 3499 LinesInDETLuma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub; 3500 3501 DETBufferSizeInTimeLuma = math_floor2(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; 3502 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { 3503 *NotEnoughUrgentLatencyHiding = 1; 3504 *UrgentBurstFactorLuma = 1; 3505 } else { 3506 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); 3507 } 3508 3509 if (BytePerPixelInDETC > 0) { 3510 LinesInDETChroma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC / swath_width_chroma_ub; 3511 3512 DETBufferSizeInTimeChroma = math_floor2(LinesInDETChroma, SwathHeightC) * LineTime / VRatioC; 3513 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { 3514 *NotEnoughUrgentLatencyHiding = 1; 3515 *UrgentBurstFactorChroma = 1; 3516 } else { 3517 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency); 3518 } 3519 } 3520 3521 #ifdef __DML_VBA_DEBUG__ 3522 DML_LOG_VERBOSE("DML::%s: LinesInDETLuma = %f\n", __func__, LinesInDETLuma); 3523 DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); 3524 DML_LOG_VERBOSE("DML::%s: DETBufferSizeInTimeLuma = %f\n", __func__, DETBufferSizeInTimeLuma); 3525 DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, *UrgentBurstFactorLuma); 3526 DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, *UrgentBurstFactorChroma); 3527 DML_LOG_VERBOSE("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding); 3528 #endif 3529 } 3530 3531 static void CalculateDCFCLKDeepSleepTdlut( 3532 const struct dml2_display_cfg *display_cfg, 3533 unsigned int NumberOfActiveSurfaces, 3534 unsigned int BytePerPixelY[], 3535 unsigned int BytePerPixelC[], 3536 unsigned int SwathWidthY[], 3537 unsigned int SwathWidthC[], 3538 unsigned int DPPPerSurface[], 3539 double PSCL_THROUGHPUT[], 3540 double PSCL_THROUGHPUT_CHROMA[], 3541 double Dppclk[], 3542 double ReadBandwidthLuma[], 3543 double ReadBandwidthChroma[], 3544 unsigned int ReturnBusWidth, 3545 3546 double dispclk, 3547 unsigned int tdlut_bytes_to_deliver[], 3548 double prefetch_swath_time_us[], 3549 3550 // Output 3551 double *DCFClkDeepSleep) 3552 { 3553 double DisplayPipeLineDeliveryTimeLuma; 3554 double DisplayPipeLineDeliveryTimeChroma; 3555 double DCFClkDeepSleepPerSurface[DML2_MAX_PLANES]; 3556 double ReadBandwidth = 0.0; 3557 3558 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { 3559 double pixel_rate_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); 3560 3561 if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) { 3562 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_rate_mhz; 3563 } else { 3564 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; 3565 } 3566 if (BytePerPixelC[k] == 0) { 3567 DisplayPipeLineDeliveryTimeChroma = 0; 3568 } else { 3569 if (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio <= 1) { 3570 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerSurface[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_rate_mhz; 3571 } else { 3572 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; 3573 } 3574 } 3575 3576 if (BytePerPixelC[k] > 0) { 3577 DCFClkDeepSleepPerSurface[k] = math_max2(__DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 3578 __DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma); 3579 } else { 3580 DCFClkDeepSleepPerSurface[k] = __DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma; 3581 } 3582 DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], pixel_rate_mhz / 16); 3583 3584 // adjust for 3dlut delivery time 3585 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && tdlut_bytes_to_deliver[k] > 0) { 3586 double tdlut_required_deepsleep_dcfclk = (double) tdlut_bytes_to_deliver[k] / 64.0 / prefetch_swath_time_us[k]; 3587 3588 DML_LOG_VERBOSE("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); 3589 DML_LOG_VERBOSE("DML::%s: k=%d, tdlut_bytes_to_deliver = %d\n", __func__, k, tdlut_bytes_to_deliver[k]); 3590 DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_swath_time_us = %f\n", __func__, k, prefetch_swath_time_us[k]); 3591 DML_LOG_VERBOSE("DML::%s: k=%d, tdlut_required_deepsleep_dcfclk = %f\n", __func__, k, tdlut_required_deepsleep_dcfclk); 3592 3593 // increase the deepsleep dcfclk to match the original dispclk throughput rate 3594 if (tdlut_required_deepsleep_dcfclk > DCFClkDeepSleepPerSurface[k]) { 3595 DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], tdlut_required_deepsleep_dcfclk); 3596 DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], dispclk / 4.0); 3597 } 3598 } 3599 3600 #ifdef __DML_VBA_DEBUG__ 3601 DML_LOG_VERBOSE("DML::%s: k=%u, PixelClock = %f\n", __func__, k, pixel_rate_mhz); 3602 DML_LOG_VERBOSE("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); 3603 #endif 3604 } 3605 3606 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { 3607 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; 3608 } 3609 3610 *DCFClkDeepSleep = math_max2(8.0, __DML2_CALCS_DCFCLK_FACTOR__ * ReadBandwidth / (double)ReturnBusWidth); 3611 3612 #ifdef __DML_VBA_DEBUG__ 3613 DML_LOG_VERBOSE("DML::%s: __DML2_CALCS_DCFCLK_FACTOR__ = %f\n", __func__, __DML2_CALCS_DCFCLK_FACTOR__); 3614 DML_LOG_VERBOSE("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth); 3615 DML_LOG_VERBOSE("DML::%s: ReturnBusWidth = %u\n", __func__, ReturnBusWidth); 3616 DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep); 3617 #endif 3618 3619 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { 3620 *DCFClkDeepSleep = math_max2(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]); 3621 } 3622 3623 DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep); 3624 } 3625 3626 static noinline_for_stack void CalculateDCFCLKDeepSleep( 3627 const struct dml2_display_cfg *display_cfg, 3628 unsigned int NumberOfActiveSurfaces, 3629 unsigned int BytePerPixelY[], 3630 unsigned int BytePerPixelC[], 3631 unsigned int SwathWidthY[], 3632 unsigned int SwathWidthC[], 3633 unsigned int DPPPerSurface[], 3634 double PSCL_THROUGHPUT[], 3635 double PSCL_THROUGHPUT_CHROMA[], 3636 double Dppclk[], 3637 double ReadBandwidthLuma[], 3638 double ReadBandwidthChroma[], 3639 unsigned int ReturnBusWidth, 3640 3641 // Output 3642 double *DCFClkDeepSleep) 3643 { 3644 double zero_double[DML2_MAX_PLANES]; 3645 unsigned int zero_integer[DML2_MAX_PLANES]; 3646 3647 memset(zero_double, 0, DML2_MAX_PLANES * sizeof(double)); 3648 memset(zero_integer, 0, DML2_MAX_PLANES * sizeof(unsigned int)); 3649 3650 CalculateDCFCLKDeepSleepTdlut( 3651 display_cfg, 3652 NumberOfActiveSurfaces, 3653 BytePerPixelY, 3654 BytePerPixelC, 3655 SwathWidthY, 3656 SwathWidthC, 3657 DPPPerSurface, 3658 PSCL_THROUGHPUT, 3659 PSCL_THROUGHPUT_CHROMA, 3660 Dppclk, 3661 ReadBandwidthLuma, 3662 ReadBandwidthChroma, 3663 ReturnBusWidth, 3664 0, 3665 zero_integer, //tdlut_bytes_to_deliver, 3666 zero_double, //prefetch_swath_time_us, 3667 3668 // Output 3669 DCFClkDeepSleep); 3670 } 3671 3672 static double CalculateWriteBackDelay( 3673 enum dml2_source_format_class WritebackPixelFormat, 3674 double WritebackHRatio, 3675 double WritebackVRatio, 3676 unsigned int WritebackVTaps, 3677 unsigned int WritebackDestinationWidth, 3678 unsigned int WritebackDestinationHeight, 3679 unsigned int WritebackSourceHeight, 3680 unsigned int HTotal) 3681 { 3682 (void)WritebackPixelFormat; 3683 (void)WritebackHRatio; 3684 double CalculateWriteBackDelay; 3685 double Line_length; 3686 double Output_lines_last_notclamped; 3687 double WritebackVInit; 3688 3689 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; 3690 Line_length = math_max2((double)WritebackDestinationWidth, math_ceil2((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps); 3691 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - math_ceil2(((double)WritebackSourceHeight - (double)WritebackVInit) / (double)WritebackVRatio, 1.0); 3692 if (Output_lines_last_notclamped < 0) { 3693 CalculateWriteBackDelay = 0; 3694 } else { 3695 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80; 3696 } 3697 return CalculateWriteBackDelay; 3698 } 3699 3700 static unsigned int CalculateMaxVStartup( 3701 bool ptoi_supported, 3702 unsigned int vblank_nom_default_us, 3703 const struct dml2_timing_cfg *timing, 3704 double write_back_delay_us) 3705 { 3706 unsigned int vblank_size = 0; 3707 unsigned int max_vstartup_lines = 0; 3708 3709 double line_time_us = (double)timing->h_total / ((double)timing->pixel_clock_khz / 1000); 3710 unsigned int vblank_actual = timing->v_total - timing->v_active; 3711 unsigned int vblank_nom_default_in_line = (unsigned int)math_floor2((double)vblank_nom_default_us / line_time_us, 1.0); 3712 unsigned int vblank_avail = (timing->vblank_nom == 0) ? vblank_nom_default_in_line : (unsigned int)timing->vblank_nom; 3713 3714 vblank_size = (unsigned int)math_min2(vblank_actual, vblank_avail); 3715 3716 if (timing->interlaced && !ptoi_supported) 3717 max_vstartup_lines = (unsigned int)(math_floor2((vblank_size - 1) / 2.0, 1.0)); 3718 else 3719 max_vstartup_lines = vblank_size - (unsigned int)math_max2(1.0, math_ceil2(write_back_delay_us / line_time_us, 1.0)); 3720 #ifdef __DML_VBA_DEBUG__ 3721 DML_LOG_VERBOSE("DML::%s: VBlankNom = %lu\n", __func__, timing->vblank_nom); 3722 DML_LOG_VERBOSE("DML::%s: vblank_nom_default_us = %u\n", __func__, vblank_nom_default_us); 3723 DML_LOG_VERBOSE("DML::%s: line_time_us = %f\n", __func__, line_time_us); 3724 DML_LOG_VERBOSE("DML::%s: vblank_actual = %u\n", __func__, vblank_actual); 3725 DML_LOG_VERBOSE("DML::%s: vblank_avail = %u\n", __func__, vblank_avail); 3726 DML_LOG_VERBOSE("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines); 3727 #endif 3728 max_vstartup_lines = (unsigned int)math_min2(max_vstartup_lines, DML_MAX_VSTARTUP_START); 3729 return max_vstartup_lines; 3730 } 3731 3732 static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch *scratch, 3733 struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *p) 3734 { 3735 unsigned int MaximumSwathHeightY[DML2_MAX_PLANES] = { 0 }; 3736 unsigned int MaximumSwathHeightC[DML2_MAX_PLANES] = { 0 }; 3737 unsigned int RoundedUpSwathSizeBytesY[DML2_MAX_PLANES] = { 0 }; 3738 unsigned int RoundedUpSwathSizeBytesC[DML2_MAX_PLANES] = { 0 }; 3739 unsigned int SwathWidthSingleDPP[DML2_MAX_PLANES] = { 0 }; 3740 unsigned int SwathWidthSingleDPPChroma[DML2_MAX_PLANES] = { 0 }; 3741 3742 unsigned int TotalActiveDPP = 0; 3743 bool NoChromaOrLinear = true; 3744 unsigned int SurfaceDoingUnboundedRequest = 0; 3745 unsigned int DETBufferSizeInKByteForSwathCalculation; 3746 3747 const long TTUFIFODEPTH = 8; 3748 const long MAXIMUMCOMPRESSION = 4; 3749 3750 #ifdef __DML_VBA_DEBUG__ 3751 DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP); 3752 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { 3753 DML_LOG_VERBOSE("DML::%s: DPPPerSurface[%u] = %u\n", __func__, k, p->DPPPerSurface[k]); 3754 } 3755 #endif 3756 CalculateSwathWidth( 3757 p->display_cfg, 3758 p->ForceSingleDPP, 3759 p->NumberOfActiveSurfaces, 3760 p->ODMMode, 3761 p->BytePerPixY, 3762 p->BytePerPixC, 3763 p->Read256BytesBlockHeightY, 3764 p->Read256BytesBlockHeightC, 3765 p->Read256BytesBlockWidthY, 3766 p->Read256BytesBlockWidthC, 3767 p->surf_linear128_l, 3768 p->surf_linear128_c, 3769 p->DPPPerSurface, 3770 3771 // Output 3772 p->req_per_swath_ub_l, 3773 p->req_per_swath_ub_c, 3774 SwathWidthSingleDPP, 3775 SwathWidthSingleDPPChroma, 3776 p->SwathWidth, 3777 p->SwathWidthChroma, 3778 MaximumSwathHeightY, 3779 MaximumSwathHeightC, 3780 p->swath_width_luma_ub, 3781 p->swath_width_chroma_ub); 3782 3783 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { 3784 p->full_swath_bytes_l[k] = (unsigned int)(p->swath_width_luma_ub[k] * p->BytePerPixDETY[k] * MaximumSwathHeightY[k]); 3785 p->full_swath_bytes_c[k] = (unsigned int)(p->swath_width_chroma_ub[k] * p->BytePerPixDETC[k] * MaximumSwathHeightC[k]); 3786 #ifdef __DML_VBA_DEBUG__ 3787 DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]); 3788 DML_LOG_VERBOSE("DML::%s: k=%u swath_width_luma_ub = %u\n", __func__, k, p->swath_width_luma_ub[k]); 3789 DML_LOG_VERBOSE("DML::%s: k=%u BytePerPixDETY = %f\n", __func__, k, p->BytePerPixDETY[k]); 3790 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightY = %u\n", __func__, k, MaximumSwathHeightY[k]); 3791 DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]); 3792 DML_LOG_VERBOSE("DML::%s: k=%u swath_width_chroma_ub = %u\n", __func__, k, p->swath_width_chroma_ub[k]); 3793 DML_LOG_VERBOSE("DML::%s: k=%u BytePerPixDETC = %f\n", __func__, k, p->BytePerPixDETC[k]); 3794 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightC = %u\n", __func__, k, MaximumSwathHeightC[k]); 3795 DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]); 3796 #endif 3797 if (p->display_cfg->plane_descriptors[k].pixel_format == dml2_420_10) { 3798 p->full_swath_bytes_l[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_l[k], 256)); 3799 p->full_swath_bytes_c[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_c[k], 256)); 3800 } 3801 } 3802 3803 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { 3804 TotalActiveDPP = TotalActiveDPP + (p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]); 3805 if (p->DPPPerSurface[k] > 0) 3806 SurfaceDoingUnboundedRequest = k; 3807 if (dml_is_420(p->display_cfg->plane_descriptors[k].pixel_format) || p->display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha 3808 || p->display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) { 3809 NoChromaOrLinear = false; 3810 } 3811 } 3812 3813 *p->UnboundedRequestEnabled = UnboundedRequest(p->display_cfg->overrides.hw.force_unbounded_requesting.enable, p->display_cfg->overrides.hw.force_unbounded_requesting.value, TotalActiveDPP, NoChromaOrLinear); 3814 3815 CalculateDETBufferSize( 3816 &scratch->CalculateDETBufferSize_locals, 3817 p->display_cfg, 3818 p->ForceSingleDPP, 3819 p->NumberOfActiveSurfaces, 3820 *p->UnboundedRequestEnabled, 3821 p->nomDETInKByte, 3822 p->MaxTotalDETInKByte, 3823 p->ConfigReturnBufferSizeInKByte, 3824 p->MinCompressedBufferSizeInKByte, 3825 p->ConfigReturnBufferSegmentSizeInkByte, 3826 p->CompressedBufferSegmentSizeInkByte, 3827 p->ReadBandwidthLuma, 3828 p->ReadBandwidthChroma, 3829 p->full_swath_bytes_l, 3830 p->full_swath_bytes_c, 3831 p->DPPPerSurface, 3832 3833 // Output 3834 p->DETBufferSizeInKByte, // per hubp pipe 3835 p->CompressedBufferSizeInkByte); 3836 3837 #ifdef __DML_VBA_DEBUG__ 3838 DML_LOG_VERBOSE("DML::%s: TotalActiveDPP = %u\n", __func__, TotalActiveDPP); 3839 DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte); 3840 DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte); 3841 DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, *p->UnboundedRequestEnabled); 3842 DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte); 3843 #endif 3844 3845 *p->ViewportSizeSupport = true; 3846 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { 3847 3848 DETBufferSizeInKByteForSwathCalculation = (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 1024 : p->DETBufferSizeInKByte[k]); 3849 #ifdef __DML_VBA_DEBUG__ 3850 DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation = %u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation); 3851 #endif 3852 if (p->display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) { 3853 p->SwathHeightY[k] = MaximumSwathHeightY[k]; 3854 p->SwathHeightC[k] = MaximumSwathHeightC[k]; 3855 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k]; 3856 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k]; 3857 3858 if (p->surf_linear128_l[k]) 3859 p->request_size_bytes_luma[k] = 128; 3860 else 3861 p->request_size_bytes_luma[k] = 256; 3862 3863 if (p->surf_linear128_c[k]) 3864 p->request_size_bytes_chroma[k] = 128; 3865 else 3866 p->request_size_bytes_chroma[k] = 256; 3867 3868 } else if (p->full_swath_bytes_l[k] + p->full_swath_bytes_c[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 3869 p->SwathHeightY[k] = MaximumSwathHeightY[k]; 3870 p->SwathHeightC[k] = MaximumSwathHeightC[k]; 3871 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k]; 3872 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k]; 3873 p->request_size_bytes_luma[k] = 256; 3874 p->request_size_bytes_chroma[k] = 256; 3875 3876 } else if (p->full_swath_bytes_l[k] >= 1.5 * p->full_swath_bytes_c[k] && p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 3877 p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2; 3878 p->SwathHeightC[k] = MaximumSwathHeightC[k]; 3879 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2; 3880 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k]; 3881 p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; 3882 p->request_size_bytes_chroma[k] = 256; 3883 3884 } else if (p->full_swath_bytes_l[k] < 1.5 * p->full_swath_bytes_c[k] && p->full_swath_bytes_l[k] + p->full_swath_bytes_c[k] / 2 <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 3885 p->SwathHeightY[k] = MaximumSwathHeightY[k]; 3886 p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2; 3887 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k]; 3888 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2; 3889 p->request_size_bytes_luma[k] = 256; 3890 p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; 3891 3892 } else { 3893 p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2; 3894 p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2; 3895 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2; 3896 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2; 3897 p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; 3898 p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; 3899 } 3900 3901 if (p->SwathHeightC[k] == 0) 3902 p->request_size_bytes_chroma[k] = 0; 3903 3904 if ((p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] / 2 > DETBufferSizeInKByteForSwathCalculation * 1024 / 2) || 3905 p->SwathWidth[k] > p->MaximumSwathWidthLuma[k] || (p->SwathHeightC[k] > 0 && p->SwathWidthChroma[k] > p->MaximumSwathWidthChroma[k])) { 3906 *p->ViewportSizeSupport = false; 3907 DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l=%u\n", __func__, k, p->full_swath_bytes_l[k]); 3908 DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c=%u\n", __func__, k, p->full_swath_bytes_c[k]); 3909 DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation=%u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation); 3910 DML_LOG_VERBOSE("DML::%s: k=%u SwathWidth=%u\n", __func__, k, p->SwathWidth[k]); 3911 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, p->MaximumSwathWidthLuma[k]); 3912 DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthChroma=%d\n", __func__, k, p->SwathWidthChroma[k]); 3913 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, p->MaximumSwathWidthChroma[k]); 3914 p->ViewportSizeSupportPerSurface[k] = false; 3915 } else { 3916 p->ViewportSizeSupportPerSurface[k] = true; 3917 } 3918 3919 if (p->SwathHeightC[k] == 0) { 3920 #ifdef __DML_VBA_DEBUG__ 3921 DML_LOG_VERBOSE("DML::%s: k=%u, All DET will be used for plane0\n", __func__, k); 3922 #endif 3923 p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024; 3924 p->DETBufferSizeC[k] = 0; 3925 } else if (RoundedUpSwathSizeBytesY[k] <= 1.5 * RoundedUpSwathSizeBytesC[k]) { 3926 #ifdef __DML_VBA_DEBUG__ 3927 DML_LOG_VERBOSE("DML::%s: k=%u, Half DET will be used for plane0, and half for plane1\n", __func__, k); 3928 #endif 3929 p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024 / 2; 3930 p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 / 2; 3931 } else { 3932 #ifdef __DML_VBA_DEBUG__ 3933 DML_LOG_VERBOSE("DML::%s: k=%u, 2/3 DET will be used for plane0, and 1/3 for plane1\n", __func__, k); 3934 #endif 3935 p->DETBufferSizeY[k] = (unsigned int)(math_floor2(p->DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024)); 3936 p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 - p->DETBufferSizeY[k]; 3937 } 3938 3939 #ifdef __DML_VBA_DEBUG__ 3940 DML_LOG_VERBOSE("DML::%s: k=%u SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]); 3941 DML_LOG_VERBOSE("DML::%s: k=%u SwathHeightC = %u\n", __func__, k, p->SwathHeightC[k]); 3942 DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]); 3943 DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]); 3944 DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesY = %u\n", __func__, k, RoundedUpSwathSizeBytesY[k]); 3945 DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); 3946 DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]); 3947 DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); 3948 DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeC = %u\n", __func__, k, p->DETBufferSizeC[k]); 3949 DML_LOG_VERBOSE("DML::%s: k=%u ViewportSizeSupportPerSurface = %u\n", __func__, k, p->ViewportSizeSupportPerSurface[k]); 3950 #endif 3951 3952 } 3953 3954 *p->compbuf_reserved_space_64b = 2 * p->pixel_chunk_size_kbytes * 1024 / 64; 3955 if (*p->UnboundedRequestEnabled) { 3956 *p->compbuf_reserved_space_64b = (unsigned int)math_ceil2(math_max2(*p->compbuf_reserved_space_64b, 3957 (double)(p->rob_buffer_size_kbytes * 1024 / 64) - (double)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / (p->mrq_present ? MAXIMUMCOMPRESSION : 1) / 64)), 1.0); 3958 #ifdef __DML_VBA_DEBUG__ 3959 DML_LOG_VERBOSE("DML::%s: RoundedUpSwathSizeBytesY[%d] = %u\n", __func__, SurfaceDoingUnboundedRequest, RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest]); 3960 DML_LOG_VERBOSE("DML::%s: rob_buffer_size_kbytes = %u\n", __func__, p->rob_buffer_size_kbytes); 3961 #endif 3962 } 3963 #ifdef __DML_VBA_DEBUG__ 3964 DML_LOG_VERBOSE("DML::%s: compbuf_reserved_space_64b = %u\n", __func__, *p->compbuf_reserved_space_64b); 3965 #endif 3966 3967 *p->hw_debug5 = false; 3968 #ifdef ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE 3969 if (p->NumberOfActiveSurfaces > 1) 3970 *p->hw_debug5 = true; 3971 #else 3972 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { 3973 if (!(p->mrq_present) && (!(*p->UnboundedRequestEnabled)) && (TotalActiveDPP == 1) 3974 && p->display_cfg->plane_descriptors[k].surface.dcc.enable 3975 && ((p->rob_buffer_size_kbytes * 1024 * (p->mrq_present ? MAXIMUMCOMPRESSION : 1) 3976 + *p->CompressedBufferSizeInkByte * MAXIMUMCOMPRESSION * 1024) > TTUFIFODEPTH * (RoundedUpSwathSizeBytesY[k] + RoundedUpSwathSizeBytesC[k]))) 3977 *p->hw_debug5 = true; 3978 #ifdef __DML_VBA_DEBUG__ 3979 DML_LOG_VERBOSE("DML::%s: k=%u UnboundedRequestEnabled = %u\n", __func__, k, *p->UnboundedRequestEnabled); 3980 DML_LOG_VERBOSE("DML::%s: k=%u MAXIMUMCOMPRESSION = %lu\n", __func__, k, MAXIMUMCOMPRESSION); 3981 DML_LOG_VERBOSE("DML::%s: k=%u TTUFIFODEPTH = %lu\n", __func__, k, TTUFIFODEPTH); 3982 DML_LOG_VERBOSE("DML::%s: k=%u CompressedBufferSizeInkByte = %u\n", __func__, k, *p->CompressedBufferSizeInkByte); 3983 DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); 3984 DML_LOG_VERBOSE("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5); 3985 #endif 3986 } 3987 #endif 3988 } 3989 3990 static enum dml2_odm_mode DecideODMMode(unsigned int HActive, 3991 double MaxDispclk, 3992 unsigned int MaximumPixelsPerLinePerDSCUnit, 3993 enum dml2_output_format_class OutFormat, 3994 bool UseDSC, 3995 unsigned int NumberOfDSCSlices, 3996 double SurfaceRequiredDISPCLKWithoutODMCombine, 3997 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne, 3998 double SurfaceRequiredDISPCLKWithODMCombineThreeToOne, 3999 double SurfaceRequiredDISPCLKWithODMCombineFourToOne) 4000 { 4001 (void)SurfaceRequiredDISPCLKWithODMCombineFourToOne; 4002 enum dml2_odm_mode MinimumRequiredODMModeForMaxDispClock; 4003 enum dml2_odm_mode MinimumRequiredODMModeForMaxDSCHActive; 4004 enum dml2_odm_mode MinimumRequiredODMModeForMax420HActive; 4005 enum dml2_odm_mode ODMMode = dml2_odm_mode_bypass; 4006 4007 MinimumRequiredODMModeForMaxDispClock = 4008 (SurfaceRequiredDISPCLKWithoutODMCombine <= MaxDispclk) ? dml2_odm_mode_bypass : 4009 (SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= MaxDispclk) ? dml2_odm_mode_combine_2to1 : 4010 (SurfaceRequiredDISPCLKWithODMCombineThreeToOne <= MaxDispclk) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1; 4011 if (ODMMode < MinimumRequiredODMModeForMaxDispClock) 4012 ODMMode = MinimumRequiredODMModeForMaxDispClock; 4013 4014 if (UseDSC) { 4015 MinimumRequiredODMModeForMaxDSCHActive = 4016 (HActive <= 1 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_bypass : 4017 (HActive <= 2 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_combine_2to1 : 4018 (HActive <= 3 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1; 4019 if (ODMMode < MinimumRequiredODMModeForMaxDSCHActive) 4020 ODMMode = MinimumRequiredODMModeForMaxDSCHActive; 4021 } 4022 4023 if (OutFormat == dml2_420) { 4024 MinimumRequiredODMModeForMax420HActive = 4025 (HActive <= 1 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_bypass : 4026 (HActive <= 2 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_combine_2to1 : 4027 (HActive <= 3 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1; 4028 if (ODMMode < MinimumRequiredODMModeForMax420HActive) 4029 ODMMode = MinimumRequiredODMModeForMax420HActive; 4030 } 4031 4032 if (UseDSC) { 4033 if (ODMMode == dml2_odm_mode_bypass && NumberOfDSCSlices > 4) 4034 ODMMode = dml2_odm_mode_combine_2to1; 4035 if (ODMMode == dml2_odm_mode_combine_2to1 && NumberOfDSCSlices > 8) 4036 ODMMode = dml2_odm_mode_combine_3to1; 4037 if (ODMMode == dml2_odm_mode_combine_3to1 && NumberOfDSCSlices != 12) 4038 ODMMode = dml2_odm_mode_combine_4to1; 4039 } 4040 4041 return ODMMode; 4042 } 4043 4044 static void CalculateODMConstraints( 4045 enum dml2_odm_mode ODMUse, 4046 double SurfaceRequiredDISPCLKWithoutODMCombine, 4047 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne, 4048 double SurfaceRequiredDISPCLKWithODMCombineThreeToOne, 4049 double SurfaceRequiredDISPCLKWithODMCombineFourToOne, 4050 unsigned int MaximumPixelsPerLinePerDSCUnit, 4051 /* Output */ 4052 double *DISPCLKRequired, 4053 unsigned int *NumberOfDPPRequired, 4054 unsigned int *MaxHActiveForDSC, 4055 unsigned int *MaxDSCSlices, 4056 unsigned int *MaxHActiveFor420) 4057 { 4058 switch (ODMUse) { 4059 case dml2_odm_mode_combine_2to1: 4060 *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineTwoToOne; 4061 *NumberOfDPPRequired = 2; 4062 break; 4063 case dml2_odm_mode_combine_3to1: 4064 *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineThreeToOne; 4065 *NumberOfDPPRequired = 3; 4066 break; 4067 case dml2_odm_mode_combine_4to1: 4068 *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineFourToOne; 4069 *NumberOfDPPRequired = 4; 4070 break; 4071 case dml2_odm_mode_auto: 4072 case dml2_odm_mode_split_1to2: 4073 case dml2_odm_mode_mso_1to2: 4074 case dml2_odm_mode_mso_1to4: 4075 case dml2_odm_mode_bypass: 4076 default: 4077 *DISPCLKRequired = SurfaceRequiredDISPCLKWithoutODMCombine; 4078 *NumberOfDPPRequired = 1; 4079 break; 4080 } 4081 *MaxHActiveForDSC = *NumberOfDPPRequired * MaximumPixelsPerLinePerDSCUnit; 4082 *MaxDSCSlices = *NumberOfDPPRequired * DML_MAX_NUM_OF_SLICES_PER_DSC; 4083 *MaxHActiveFor420 = *NumberOfDPPRequired * DML2_MAX_FMT_420_BUFFER_WIDTH; 4084 } 4085 4086 static bool ValidateODMMode(enum dml2_odm_mode ODMMode, 4087 double MaxDispclk, 4088 unsigned int HActive, 4089 enum dml2_output_format_class OutFormat, 4090 bool UseDSC, 4091 unsigned int NumberOfDSCSlices, 4092 unsigned int TotalNumberOfActiveDPP, 4093 unsigned int TotalNumberOfActiveOPP, 4094 unsigned int MaxNumDPP, 4095 unsigned int MaxNumOPP, 4096 double DISPCLKRequired, 4097 unsigned int NumberOfDPPRequired, 4098 unsigned int MaxHActiveForDSC, 4099 unsigned int MaxDSCSlices, 4100 unsigned int MaxHActiveFor420) 4101 { 4102 bool are_odm_segments_symmetrical = (ODMMode == dml2_odm_mode_combine_3to1) ? UseDSC : true; 4103 bool is_max_dsc_slice_required = (ODMMode == dml2_odm_mode_combine_3to1); 4104 unsigned int pixels_per_clock_cycle = (OutFormat == dml2_420 || OutFormat == dml2_n422) ? 2 : 1; 4105 unsigned int h_timing_div_mode = 4106 (ODMMode == dml2_odm_mode_combine_4to1 || ODMMode == dml2_odm_mode_combine_3to1) ? 4 : 4107 (ODMMode == dml2_odm_mode_combine_2to1) ? 2 : pixels_per_clock_cycle; 4108 4109 if (DISPCLKRequired > MaxDispclk) 4110 return false; 4111 if ((TotalNumberOfActiveDPP + NumberOfDPPRequired) > MaxNumDPP || (TotalNumberOfActiveOPP + NumberOfDPPRequired) > MaxNumOPP) 4112 return false; 4113 if (are_odm_segments_symmetrical) { 4114 if (HActive % (NumberOfDPPRequired * pixels_per_clock_cycle)) 4115 return false; 4116 } 4117 if (HActive % h_timing_div_mode) 4118 /* 4119 * TODO - OTG_H_TOTAL, OTG_H_BLANK_START/END and 4120 * OTG_H_SYNC_A_START/END all need to be visible by h timing div 4121 * mode. This logic only checks H active. 4122 */ 4123 return false; 4124 4125 if (UseDSC) { 4126 if (HActive > MaxHActiveForDSC) 4127 return false; 4128 if (NumberOfDSCSlices > MaxDSCSlices) 4129 return false; 4130 if (HActive % NumberOfDSCSlices) 4131 return false; 4132 if (NumberOfDSCSlices % NumberOfDPPRequired) 4133 return false; 4134 if (is_max_dsc_slice_required) { 4135 if (NumberOfDSCSlices != MaxDSCSlices) 4136 return false; 4137 } 4138 } 4139 4140 if (OutFormat == dml2_420) { 4141 if (HActive > MaxHActiveFor420) 4142 return false; 4143 } 4144 4145 return true; 4146 } 4147 4148 static noinline_for_stack void CalculateODMMode( 4149 unsigned int MaximumPixelsPerLinePerDSCUnit, 4150 unsigned int HActive, 4151 enum dml2_output_format_class OutFormat, 4152 enum dml2_output_encoder_class Output, 4153 enum dml2_odm_mode ODMUse, 4154 double MaxDispclk, 4155 bool DSCEnable, 4156 unsigned int TotalNumberOfActiveDPP, 4157 unsigned int TotalNumberOfActiveOPP, 4158 unsigned int MaxNumDPP, 4159 unsigned int MaxNumOPP, 4160 double PixelClock, 4161 unsigned int NumberOfDSCSlices, 4162 4163 // Output 4164 bool *TotalAvailablePipesSupport, 4165 unsigned int *NumberOfDPP, 4166 enum dml2_odm_mode *ODMMode, 4167 double *RequiredDISPCLKPerSurface) 4168 { 4169 double SurfaceRequiredDISPCLKWithoutODMCombine; 4170 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne; 4171 double SurfaceRequiredDISPCLKWithODMCombineThreeToOne; 4172 double SurfaceRequiredDISPCLKWithODMCombineFourToOne; 4173 double DISPCLKRequired; 4174 unsigned int NumberOfDPPRequired; 4175 unsigned int MaxHActiveForDSC; 4176 unsigned int MaxDSCSlices; 4177 unsigned int MaxHActiveFor420; 4178 bool success; 4179 bool UseDSC = DSCEnable && (NumberOfDSCSlices > 0); 4180 enum dml2_odm_mode DecidedODMMode; 4181 bool isTMDS420 = (OutFormat == dml2_420 && Output == dml2_hdmi); 4182 4183 SurfaceRequiredDISPCLKWithoutODMCombine = CalculateRequiredDispclk(dml2_odm_mode_bypass, PixelClock, isTMDS420); 4184 SurfaceRequiredDISPCLKWithODMCombineTwoToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_2to1, PixelClock, isTMDS420); 4185 SurfaceRequiredDISPCLKWithODMCombineThreeToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_3to1, PixelClock, isTMDS420); 4186 SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_4to1, PixelClock, isTMDS420); 4187 #ifdef __DML_VBA_DEBUG__ 4188 DML_LOG_VERBOSE("DML::%s: ODMUse = %d\n", __func__, ODMUse); 4189 DML_LOG_VERBOSE("DML::%s: Output = %d\n", __func__, Output); 4190 DML_LOG_VERBOSE("DML::%s: DSCEnable = %d\n", __func__, DSCEnable); 4191 DML_LOG_VERBOSE("DML::%s: MaxDispclk = %f\n", __func__, MaxDispclk); 4192 DML_LOG_VERBOSE("DML::%s: MaximumPixelsPerLinePerDSCUnit = %d\n", __func__, MaximumPixelsPerLinePerDSCUnit); 4193 DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithoutODMCombine = %f\n", __func__, SurfaceRequiredDISPCLKWithoutODMCombine); 4194 DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineTwoToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineTwoToOne); 4195 DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineThreeToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineThreeToOne); 4196 DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineFourToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineFourToOne); 4197 #endif 4198 if (ODMUse == dml2_odm_mode_auto) 4199 DecidedODMMode = DecideODMMode(HActive, 4200 MaxDispclk, 4201 MaximumPixelsPerLinePerDSCUnit, 4202 OutFormat, 4203 UseDSC, 4204 NumberOfDSCSlices, 4205 SurfaceRequiredDISPCLKWithoutODMCombine, 4206 SurfaceRequiredDISPCLKWithODMCombineTwoToOne, 4207 SurfaceRequiredDISPCLKWithODMCombineThreeToOne, 4208 SurfaceRequiredDISPCLKWithODMCombineFourToOne); 4209 else 4210 DecidedODMMode = ODMUse; 4211 CalculateODMConstraints(DecidedODMMode, 4212 SurfaceRequiredDISPCLKWithoutODMCombine, 4213 SurfaceRequiredDISPCLKWithODMCombineTwoToOne, 4214 SurfaceRequiredDISPCLKWithODMCombineThreeToOne, 4215 SurfaceRequiredDISPCLKWithODMCombineFourToOne, 4216 MaximumPixelsPerLinePerDSCUnit, 4217 &DISPCLKRequired, 4218 &NumberOfDPPRequired, 4219 &MaxHActiveForDSC, 4220 &MaxDSCSlices, 4221 &MaxHActiveFor420); 4222 success = ValidateODMMode(DecidedODMMode, 4223 MaxDispclk, 4224 HActive, 4225 OutFormat, 4226 UseDSC, 4227 NumberOfDSCSlices, 4228 TotalNumberOfActiveDPP, 4229 TotalNumberOfActiveOPP, 4230 MaxNumDPP, 4231 MaxNumOPP, 4232 DISPCLKRequired, 4233 NumberOfDPPRequired, 4234 MaxHActiveForDSC, 4235 MaxDSCSlices, 4236 MaxHActiveFor420); 4237 4238 *ODMMode = DecidedODMMode; 4239 *TotalAvailablePipesSupport = success; 4240 *NumberOfDPP = NumberOfDPPRequired; 4241 *RequiredDISPCLKPerSurface = success ? DISPCLKRequired : 0; 4242 #ifdef __DML_VBA_DEBUG__ 4243 DML_LOG_VERBOSE("DML::%s: ODMMode = %d\n", __func__, *ODMMode); 4244 DML_LOG_VERBOSE("DML::%s: NumberOfDPP = %d\n", __func__, *NumberOfDPP); 4245 DML_LOG_VERBOSE("DML::%s: TotalAvailablePipesSupport = %d\n", __func__, *TotalAvailablePipesSupport); 4246 DML_LOG_VERBOSE("DML::%s: RequiredDISPCLKPerSurface = %f\n", __func__, *RequiredDISPCLKPerSurface); 4247 #endif 4248 } 4249 4250 static noinline_for_stack void CalculateOutputLink( 4251 struct dml2_core_internal_scratch *s, 4252 double PHYCLK, 4253 double PHYCLKD18, 4254 double PHYCLKD32, 4255 double Downspreading, 4256 enum dml2_output_encoder_class Output, 4257 enum dml2_output_format_class OutputFormat, 4258 unsigned int HTotal, 4259 unsigned int HActive, 4260 double PixelClockBackEnd, 4261 double ForcedOutputLinkBPP, 4262 unsigned int DSCInputBitPerComponent, 4263 unsigned int NumberOfDSCSlices, 4264 double AudioSampleRate, 4265 unsigned int AudioSampleLayout, 4266 enum dml2_odm_mode ODMModeNoDSC, 4267 enum dml2_odm_mode ODMModeDSC, 4268 enum dml2_dsc_enable_option DSCEnable, 4269 unsigned int OutputLinkDPLanes, 4270 enum dml2_output_link_dp_rate OutputLinkDPRate, 4271 4272 // Output 4273 bool *RequiresDSC, 4274 bool *RequiresFEC, 4275 double *OutBpp, 4276 enum dml2_core_internal_output_type *OutputType, 4277 enum dml2_core_internal_output_type_rate *OutputRate, 4278 unsigned int *RequiredSlots) 4279 { 4280 bool LinkDSCEnable; 4281 unsigned int dummy; 4282 *RequiresDSC = false; 4283 *RequiresFEC = false; 4284 *OutBpp = 0; 4285 4286 *OutputType = dml2_core_internal_output_type_unknown; 4287 *OutputRate = dml2_core_internal_output_rate_unknown; 4288 4289 #ifdef __DML_VBA_DEBUG__ 4290 DML_LOG_VERBOSE("DML::%s: DSCEnable = %u (dis, en, en_if_necessary)\n", __func__, DSCEnable); 4291 DML_LOG_VERBOSE("DML::%s: PHYCLK = %f\n", __func__, PHYCLK); 4292 DML_LOG_VERBOSE("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd); 4293 DML_LOG_VERBOSE("DML::%s: AudioSampleRate = %f\n", __func__, AudioSampleRate); 4294 DML_LOG_VERBOSE("DML::%s: HActive = %u\n", __func__, HActive); 4295 DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal); 4296 DML_LOG_VERBOSE("DML::%s: ODMModeNoDSC = %u\n", __func__, ODMModeNoDSC); 4297 DML_LOG_VERBOSE("DML::%s: ODMModeDSC = %u\n", __func__, ODMModeDSC); 4298 DML_LOG_VERBOSE("DML::%s: ForcedOutputLinkBPP = %f\n", __func__, ForcedOutputLinkBPP); 4299 DML_LOG_VERBOSE("DML::%s: Output (encoder) = %u\n", __func__, Output); 4300 DML_LOG_VERBOSE("DML::%s: OutputLinkDPRate = %u\n", __func__, OutputLinkDPRate); 4301 #endif 4302 { 4303 if (Output == dml2_hdmi) { 4304 *RequiresDSC = false; 4305 *RequiresFEC = false; 4306 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, math_min2(600, PHYCLK) * 10, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, 4307 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); 4308 //OutputTypeAndRate = "HDMI"; 4309 *OutputType = dml2_core_internal_output_type_hdmi; 4310 } else if (Output == dml2_dp || Output == dml2_dp2p0 || Output == dml2_edp) { 4311 if (DSCEnable == dml2_dsc_enable) { 4312 *RequiresDSC = true; 4313 LinkDSCEnable = true; 4314 if (Output == dml2_dp || Output == dml2_dp2p0) { 4315 *RequiresFEC = true; 4316 } else { 4317 *RequiresFEC = false; 4318 } 4319 } else { 4320 *RequiresDSC = false; 4321 LinkDSCEnable = false; 4322 if (Output == dml2_dp2p0) { 4323 *RequiresFEC = true; 4324 } else { 4325 *RequiresFEC = false; 4326 } 4327 } 4328 if (Output == dml2_dp2p0) { 4329 *OutBpp = 0; 4330 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr10) && PHYCLKD32 >= 10000.0 / 32) { 4331 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, 4332 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 4333 if (*OutBpp == 0 && PHYCLKD32 < 13500.0 / 32 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { 4334 *RequiresDSC = true; 4335 LinkDSCEnable = true; 4336 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, 4337 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 4338 } 4339 //OutputTypeAndRate = Output & " UHBR10"; 4340 *OutputType = dml2_core_internal_output_type_dp2p0; 4341 *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr10; 4342 } 4343 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr13p5) && *OutBpp == 0 && PHYCLKD32 >= 13500.0 / 32) { 4344 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, 4345 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 4346 4347 if (*OutBpp == 0 && PHYCLKD32 < 20000.0 / 32 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { 4348 *RequiresDSC = true; 4349 LinkDSCEnable = true; 4350 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, 4351 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 4352 } 4353 //OutputTypeAndRate = Output & " UHBR13p5"; 4354 *OutputType = dml2_core_internal_output_type_dp2p0; 4355 *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr13p5; 4356 } 4357 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr20) && *OutBpp == 0 && PHYCLKD32 >= 20000.0 / 32) { 4358 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, 4359 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 4360 if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { 4361 *RequiresDSC = true; 4362 LinkDSCEnable = true; 4363 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, 4364 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 4365 } 4366 //OutputTypeAndRate = Output & " UHBR20"; 4367 *OutputType = dml2_core_internal_output_type_dp2p0; 4368 *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr20; 4369 } 4370 } else { // output is dp or edp 4371 *OutBpp = 0; 4372 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr) && PHYCLK >= 270) { 4373 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, 4374 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 4375 if (*OutBpp == 0 && PHYCLK < 540 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { 4376 *RequiresDSC = true; 4377 LinkDSCEnable = true; 4378 if (Output == dml2_dp) { 4379 *RequiresFEC = true; 4380 } 4381 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, 4382 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 4383 } 4384 //OutputTypeAndRate = Output & " HBR"; 4385 *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp; 4386 *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr; 4387 } 4388 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr2) && *OutBpp == 0 && PHYCLK >= 540) { 4389 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, 4390 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 4391 4392 if (*OutBpp == 0 && PHYCLK < 810 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { 4393 *RequiresDSC = true; 4394 LinkDSCEnable = true; 4395 if (Output == dml2_dp) { 4396 *RequiresFEC = true; 4397 } 4398 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, 4399 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 4400 } 4401 //OutputTypeAndRate = Output & " HBR2"; 4402 *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp; 4403 *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr2; 4404 } 4405 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr3) && *OutBpp == 0 && PHYCLK >= 810) { // VBA_ERROR, vba code doesn't have hbr3 check 4406 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, 4407 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 4408 4409 if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { 4410 *RequiresDSC = true; 4411 LinkDSCEnable = true; 4412 if (Output == dml2_dp) { 4413 *RequiresFEC = true; 4414 } 4415 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, 4416 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 4417 } 4418 //OutputTypeAndRate = Output & " HBR3"; 4419 *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp; 4420 *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr3; 4421 } 4422 } 4423 } else if (Output == dml2_hdmifrl) { 4424 if (DSCEnable == dml2_dsc_enable) { 4425 *RequiresDSC = true; 4426 LinkDSCEnable = true; 4427 *RequiresFEC = true; 4428 } else { 4429 *RequiresDSC = false; 4430 LinkDSCEnable = false; 4431 *RequiresFEC = false; 4432 } 4433 *OutBpp = 0; 4434 if (PHYCLKD18 >= 3000.0 / 18) { 4435 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 3000, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); 4436 //OutputTypeAndRate = Output & "3x3"; 4437 *OutputType = dml2_core_internal_output_type_hdmifrl; 4438 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_3x3; 4439 } 4440 if (*OutBpp == 0 && PHYCLKD18 >= 6000.0 / 18) { 4441 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 6000, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); 4442 //OutputTypeAndRate = Output & "6x3"; 4443 *OutputType = dml2_core_internal_output_type_hdmifrl; 4444 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_6x3; 4445 } 4446 if (*OutBpp == 0 && PHYCLKD18 >= 6000.0 / 18) { 4447 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 6000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); 4448 //OutputTypeAndRate = Output & "6x4"; 4449 *OutputType = dml2_core_internal_output_type_hdmifrl; 4450 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_6x4; 4451 } 4452 if (*OutBpp == 0 && PHYCLKD18 >= 8000.0 / 18) { 4453 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 8000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); 4454 //OutputTypeAndRate = Output & "8x4"; 4455 *OutputType = dml2_core_internal_output_type_hdmifrl; 4456 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_8x4; 4457 } 4458 if (*OutBpp == 0 && PHYCLKD18 >= 10000.0 / 18) { 4459 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 10000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); 4460 if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0 && PHYCLKD18 < 12000.0 / 18) { 4461 *RequiresDSC = true; 4462 LinkDSCEnable = true; 4463 *RequiresFEC = true; 4464 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 10000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); 4465 } 4466 //OutputTypeAndRate = Output & "10x4"; 4467 *OutputType = dml2_core_internal_output_type_hdmifrl; 4468 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_10x4; 4469 } 4470 if (*OutBpp == 0 && PHYCLKD18 >= 12000.0 / 18) { 4471 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 12000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); 4472 if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { 4473 *RequiresDSC = true; 4474 LinkDSCEnable = true; 4475 *RequiresFEC = true; 4476 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 12000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); 4477 } 4478 //OutputTypeAndRate = Output & "12x4"; 4479 *OutputType = dml2_core_internal_output_type_hdmifrl; 4480 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_12x4; 4481 } 4482 } 4483 } 4484 #ifdef __DML_VBA_DEBUG__ 4485 DML_LOG_VERBOSE("DML::%s: RequiresDSC = %u\n", __func__, *RequiresDSC); 4486 DML_LOG_VERBOSE("DML::%s: RequiresFEC = %u\n", __func__, *RequiresFEC); 4487 DML_LOG_VERBOSE("DML::%s: OutBpp = %f\n", __func__, *OutBpp); 4488 #endif 4489 } 4490 4491 static double CalculateWriteBackDISPCLK( 4492 enum dml2_source_format_class WritebackPixelFormat, 4493 double PixelClock, 4494 double WritebackHRatio, 4495 double WritebackVRatio, 4496 unsigned int WritebackHTaps, 4497 unsigned int WritebackVTaps, 4498 unsigned int WritebackSourceWidth, 4499 unsigned int WritebackDestinationWidth, 4500 unsigned int HTotal, 4501 unsigned int WritebackLineBufferSize) 4502 { 4503 (void)WritebackPixelFormat; 4504 (void)WritebackVRatio; 4505 double DISPCLK_H, DISPCLK_V, DISPCLK_HB; 4506 4507 DISPCLK_H = PixelClock * math_ceil2((double)WritebackHTaps / 8.0, 1) / WritebackHRatio; 4508 DISPCLK_V = PixelClock * (WritebackVTaps * math_ceil2((double)WritebackDestinationWidth / 6.0, 1) + 8.0) / (double)HTotal; 4509 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / (double)WritebackSourceWidth; 4510 return math_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB); 4511 } 4512 4513 static double RequiredDTBCLK( 4514 bool DSCEnable, 4515 double PixelClock, 4516 enum dml2_output_format_class OutputFormat, 4517 double OutputBpp, 4518 unsigned int DSCSlices, 4519 unsigned int HTotal, 4520 unsigned int HActive, 4521 unsigned int AudioRate, 4522 unsigned int AudioLayout) 4523 { 4524 if (DSCEnable != true) { 4525 return math_max2(PixelClock / 4.0 * OutputBpp / 24.0, 25.0); 4526 } else { 4527 double PixelWordRate = PixelClock / (OutputFormat == dml2_444 ? 1 : 2); 4528 double HCActive = math_ceil2(DSCSlices * math_ceil2(OutputBpp * math_ceil2(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); 4529 double HCBlank = 64 + 32 * math_ceil2(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); 4530 double AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; 4531 double HActiveTribyteRate = PixelWordRate * HCActive / HActive; 4532 return math_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002; 4533 } 4534 } 4535 4536 static unsigned int DSCDelayRequirement( 4537 bool DSCEnabled, 4538 enum dml2_odm_mode ODMMode, 4539 unsigned int DSCInputBitPerComponent, 4540 double OutputBpp, 4541 unsigned int HActive, 4542 unsigned int HTotal, 4543 unsigned int NumberOfDSCSlices, 4544 enum dml2_output_format_class OutputFormat, 4545 enum dml2_output_encoder_class Output, 4546 double PixelClock, 4547 double PixelClockBackEnd) 4548 { 4549 unsigned int DSCDelayRequirement_val = 0; 4550 unsigned int NumberOfDSCSlicesFactor = 1; 4551 4552 if (DSCEnabled == true && OutputBpp != 0) { 4553 4554 if (ODMMode == dml2_odm_mode_combine_4to1) 4555 NumberOfDSCSlicesFactor = 4; 4556 else if (ODMMode == dml2_odm_mode_combine_3to1) 4557 NumberOfDSCSlicesFactor = 3; 4558 else if (ODMMode == dml2_odm_mode_combine_2to1) 4559 NumberOfDSCSlicesFactor = 2; 4560 4561 DSCDelayRequirement_val = NumberOfDSCSlicesFactor * (dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (unsigned int)(math_ceil2((double)HActive / (double)NumberOfDSCSlices, 1.0)), 4562 (NumberOfDSCSlices / NumberOfDSCSlicesFactor), OutputFormat, Output) + dscComputeDelay(OutputFormat, Output)); 4563 4564 DSCDelayRequirement_val = (unsigned int)(DSCDelayRequirement_val + (HTotal - HActive) * math_ceil2((double)DSCDelayRequirement_val / (double)HActive, 1.0)); 4565 DSCDelayRequirement_val = (unsigned int)(DSCDelayRequirement_val * PixelClock / PixelClockBackEnd); 4566 4567 } else { 4568 DSCDelayRequirement_val = 0; 4569 } 4570 #ifdef __DML_VBA_DEBUG__ 4571 DML_LOG_VERBOSE("DML::%s: DSCEnabled= %u\n", __func__, DSCEnabled); 4572 DML_LOG_VERBOSE("DML::%s: ODMMode = %u\n", __func__, ODMMode); 4573 DML_LOG_VERBOSE("DML::%s: OutputBpp = %f\n", __func__, OutputBpp); 4574 DML_LOG_VERBOSE("DML::%s: HActive = %u\n", __func__, HActive); 4575 DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal); 4576 DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, PixelClock); 4577 DML_LOG_VERBOSE("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd); 4578 DML_LOG_VERBOSE("DML::%s: OutputFormat = %u\n", __func__, OutputFormat); 4579 DML_LOG_VERBOSE("DML::%s: DSCInputBitPerComponent = %u\n", __func__, DSCInputBitPerComponent); 4580 DML_LOG_VERBOSE("DML::%s: NumberOfDSCSlices = %u\n", __func__, NumberOfDSCSlices); 4581 DML_LOG_VERBOSE("DML::%s: DSCDelayRequirement_val = %u\n", __func__, DSCDelayRequirement_val); 4582 #endif 4583 4584 return DSCDelayRequirement_val; 4585 } 4586 4587 static void CalculateSurfaceSizeInMall( 4588 const struct dml2_display_cfg *display_cfg, 4589 unsigned int NumberOfActiveSurfaces, 4590 unsigned int MALLAllocatedForDCN, 4591 unsigned int BytesPerPixelY[], 4592 unsigned int BytesPerPixelC[], 4593 unsigned int Read256BytesBlockWidthY[], 4594 unsigned int Read256BytesBlockWidthC[], 4595 unsigned int Read256BytesBlockHeightY[], 4596 unsigned int Read256BytesBlockHeightC[], 4597 unsigned int ReadBlockWidthY[], 4598 unsigned int ReadBlockWidthC[], 4599 unsigned int ReadBlockHeightY[], 4600 unsigned int ReadBlockHeightC[], 4601 4602 // Output 4603 unsigned int SurfaceSizeInMALL[], 4604 bool *ExceededMALLSize) 4605 { 4606 (void)Read256BytesBlockWidthY; 4607 (void)Read256BytesBlockWidthC; 4608 (void)Read256BytesBlockHeightY; 4609 (void)Read256BytesBlockHeightC; 4610 unsigned int TotalSurfaceSizeInMALLForSS = 0; 4611 unsigned int TotalSurfaceSizeInMALLForSubVP = 0; 4612 unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024; 4613 4614 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { 4615 const struct dml2_composition_cfg *composition = &display_cfg->plane_descriptors[k].composition; 4616 const struct dml2_surface_cfg *surface = &display_cfg->plane_descriptors[k].surface; 4617 4618 if (composition->viewport.stationary) { 4619 SurfaceSizeInMALL[k] = (unsigned int)(math_min2(math_ceil2((double)surface->plane0.width, ReadBlockWidthY[k]), 4620 math_floor2(composition->viewport.plane0.x_start + composition->viewport.plane0.width + ReadBlockWidthY[k] - 1, ReadBlockWidthY[k]) - 4621 math_floor2((double)composition->viewport.plane0.x_start, ReadBlockWidthY[k])) * 4622 math_min2(math_ceil2((double)surface->plane0.height, ReadBlockHeightY[k]), 4623 math_floor2((double)composition->viewport.plane0.y_start + composition->viewport.plane0.height + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) - 4624 math_floor2((double)composition->viewport.plane0.y_start, ReadBlockHeightY[k])) * BytesPerPixelY[k]); 4625 4626 if (ReadBlockWidthC[k] > 0) { 4627 SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] + 4628 math_min2(math_ceil2((double)surface->plane1.width, ReadBlockWidthC[k]), 4629 math_floor2((double)composition->viewport.plane1.y_start + composition->viewport.plane1.width + ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) - 4630 math_floor2((double)composition->viewport.plane1.y_start, ReadBlockWidthC[k])) * 4631 math_min2(math_ceil2((double)surface->plane1.height, ReadBlockHeightC[k]), 4632 math_floor2((double)composition->viewport.plane1.y_start + composition->viewport.plane1.height + ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) - 4633 math_floor2(composition->viewport.plane1.y_start, ReadBlockHeightC[k])) * BytesPerPixelC[k]); 4634 } 4635 } else { 4636 SurfaceSizeInMALL[k] = (unsigned int)(math_ceil2(math_min2(surface->plane0.width, composition->viewport.plane0.width + ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) * 4637 math_ceil2(math_min2(surface->plane0.height, composition->viewport.plane0.height + ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * BytesPerPixelY[k]); 4638 if (ReadBlockWidthC[k] > 0) { 4639 SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] + 4640 math_ceil2(math_min2(surface->plane1.width, composition->viewport.plane1.width + ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) * 4641 math_ceil2(math_min2(surface->plane1.height, composition->viewport.plane1.height + ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * BytesPerPixelC[k]); 4642 } 4643 } 4644 } 4645 4646 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { 4647 /* SS and Subvp counted separate as they are never used at the same time */ 4648 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) 4649 TotalSurfaceSizeInMALLForSubVP += SurfaceSizeInMALL[k]; 4650 else if (display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable) 4651 TotalSurfaceSizeInMALLForSS += SurfaceSizeInMALL[k]; 4652 } 4653 4654 *ExceededMALLSize = (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) || 4655 (TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes); 4656 4657 #ifdef __DML_VBA_DEBUG__ 4658 DML_LOG_VERBOSE("DML::%s: MALLAllocatedForDCN = %u\n", __func__, MALLAllocatedForDCN * 1024 * 1024); 4659 DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALLForSubVP = %u\n", __func__, TotalSurfaceSizeInMALLForSubVP); 4660 DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALLForSS = %u\n", __func__, TotalSurfaceSizeInMALLForSS); 4661 DML_LOG_VERBOSE("DML::%s: ExceededMALLSize = %u\n", __func__, *ExceededMALLSize); 4662 #endif 4663 } 4664 4665 static void calculate_tdlut_setting( 4666 struct dml2_core_internal_scratch *scratch, 4667 struct dml2_core_calcs_calculate_tdlut_setting_params *p) 4668 { 4669 (void)scratch; 4670 // locals 4671 unsigned int tdlut_bpe = 8; 4672 unsigned int tdlut_width; 4673 unsigned int tdlut_pitch_bytes; 4674 unsigned int tdlut_footprint_bytes; 4675 unsigned int vmpg_bytes; 4676 unsigned int tdlut_vmpg_per_frame; 4677 unsigned int tdlut_pte_req_per_frame; 4678 unsigned int tdlut_bytes_per_line; 4679 double tdlut_drain_rate; 4680 unsigned int tdlut_mpc_width; 4681 unsigned int tdlut_bytes_per_group_simple; 4682 4683 if (!p->setup_for_tdlut) { 4684 *p->tdlut_groups_per_2row_ub = 0; 4685 *p->tdlut_opt_time = 0; 4686 *p->tdlut_drain_time = 0; 4687 *p->tdlut_bytes_to_deliver = 0; 4688 *p->tdlut_bytes_per_group = 0; 4689 *p->tdlut_pte_bytes_per_frame = 0; 4690 *p->tdlut_bytes_per_frame = 0; 4691 return; 4692 } 4693 4694 if (p->tdlut_mpc_width_flag) { 4695 tdlut_mpc_width = 33; 4696 tdlut_bytes_per_group_simple = 39*256; 4697 } else { 4698 tdlut_mpc_width = 17; 4699 tdlut_bytes_per_group_simple = 10*256; 4700 } 4701 4702 vmpg_bytes = p->gpuvm_page_size_kbytes * 1024; 4703 4704 if (p->tdlut_addressing_mode == dml2_tdlut_simple_linear) { 4705 if (p->tdlut_width_mode == dml2_tdlut_width_17_cube) 4706 tdlut_width = 4916; 4707 else 4708 tdlut_width = 35940; 4709 } else { 4710 if (p->tdlut_width_mode == dml2_tdlut_width_17_cube) 4711 tdlut_width = 17; 4712 else // dml2_tdlut_width_33_cube 4713 tdlut_width = 33; 4714 } 4715 4716 if (p->is_gfx11) 4717 tdlut_pitch_bytes = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); //256B alignment 4718 else 4719 tdlut_pitch_bytes = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 128); //128B alignment 4720 4721 if (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) 4722 tdlut_footprint_bytes = tdlut_pitch_bytes * tdlut_width * tdlut_width; 4723 else 4724 tdlut_footprint_bytes = tdlut_pitch_bytes; 4725 4726 if (!p->gpuvm_enable) { 4727 tdlut_vmpg_per_frame = 0; 4728 tdlut_pte_req_per_frame = 0; 4729 } else { 4730 tdlut_vmpg_per_frame = (unsigned int)math_ceil2(tdlut_footprint_bytes - 1, vmpg_bytes) / vmpg_bytes + 1; 4731 tdlut_pte_req_per_frame = (unsigned int)math_ceil2(tdlut_vmpg_per_frame - 1, 8) / 8 + 1; 4732 } 4733 tdlut_bytes_per_line = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 64); //64b request 4734 *p->tdlut_pte_bytes_per_frame = tdlut_pte_req_per_frame * 64; 4735 4736 if (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) { 4737 //the tdlut_width is either 17 or 33 but the 33x33x33 is subsampled every other line/slice 4738 *p->tdlut_bytes_per_frame = tdlut_bytes_per_line * tdlut_mpc_width * tdlut_mpc_width; 4739 *p->tdlut_bytes_per_group = tdlut_bytes_per_line * tdlut_mpc_width; 4740 //the delivery cycles is DispClk cycles per line * number of lines * number of slices 4741 //tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width; 4742 tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / math_ceil2(tdlut_mpc_width/2.0, 1); 4743 } else { 4744 //tdlut_addressing_mode = tdlut_simple_linear, 3dlut width should be 4*1229=4916 elements 4745 *p->tdlut_bytes_per_frame = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); 4746 *p->tdlut_bytes_per_group = tdlut_bytes_per_group_simple; 4747 //tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_width/2.0, 1); 4748 tdlut_drain_rate = 2 * tdlut_bpe * p->dispclk_mhz; 4749 } 4750 4751 //the tdlut is fetched during the 2 row times of prefetch. 4752 if (p->setup_for_tdlut) { 4753 *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2((double) *p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1); 4754 if (*p->tdlut_bytes_per_frame > p->cursor_buffer_size * 1024) 4755 *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate; 4756 else 4757 *p->tdlut_opt_time = 0; 4758 *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate; 4759 *p->tdlut_bytes_to_deliver = (unsigned int) (p->cursor_buffer_size * 1024.0); 4760 } 4761 4762 #ifdef __DML_VBA_DEBUG__ 4763 DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %d\n", __func__, p->gpuvm_enable); 4764 DML_LOG_VERBOSE("DML::%s: vmpg_bytes = %d\n", __func__, vmpg_bytes); 4765 DML_LOG_VERBOSE("DML::%s: tdlut_vmpg_per_frame = %d\n", __func__, tdlut_vmpg_per_frame); 4766 DML_LOG_VERBOSE("DML::%s: tdlut_pte_req_per_frame = %d\n", __func__, tdlut_pte_req_per_frame); 4767 4768 DML_LOG_VERBOSE("DML::%s: dispclk_mhz = %f\n", __func__, p->dispclk_mhz); 4769 DML_LOG_VERBOSE("DML::%s: tdlut_width = %u\n", __func__, tdlut_width); 4770 DML_LOG_VERBOSE("DML::%s: tdlut_addressing_mode = %s\n", __func__, (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) ? "sw_linear" : "simple_linear"); 4771 DML_LOG_VERBOSE("DML::%s: tdlut_pitch_bytes = %u\n", __func__, tdlut_pitch_bytes); 4772 DML_LOG_VERBOSE("DML::%s: tdlut_footprint_bytes = %u\n", __func__, tdlut_footprint_bytes); 4773 DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_frame = %u\n", __func__, *p->tdlut_bytes_per_frame); 4774 DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_line = %u\n", __func__, tdlut_bytes_per_line); 4775 DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_group = %u\n", __func__, *p->tdlut_bytes_per_group); 4776 DML_LOG_VERBOSE("DML::%s: tdlut_drain_rate = %f\n", __func__, tdlut_drain_rate); 4777 DML_LOG_VERBOSE("DML::%s: tdlut_delivery_cycles = %u\n", __func__, p->tdlut_addressing_mode == dml2_tdlut_sw_linear ? (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width : (unsigned int)math_ceil2(tdlut_width/2.0, 1)); 4778 DML_LOG_VERBOSE("DML::%s: tdlut_opt_time = %f\n", __func__, *p->tdlut_opt_time); 4779 DML_LOG_VERBOSE("DML::%s: tdlut_drain_time = %f\n", __func__, *p->tdlut_drain_time); 4780 DML_LOG_VERBOSE("DML::%s: tdlut_bytes_to_deliver = %d\n", __func__, *p->tdlut_bytes_to_deliver); 4781 DML_LOG_VERBOSE("DML::%s: tdlut_groups_per_2row_ub = %d\n", __func__, *p->tdlut_groups_per_2row_ub); 4782 #endif 4783 } 4784 4785 static void CalculateTarb( 4786 const struct dml2_display_cfg *display_cfg, 4787 unsigned int PixelChunkSizeInKByte, 4788 unsigned int NumberOfActiveSurfaces, 4789 unsigned int NumberOfDPP[], 4790 unsigned int dpte_group_bytes[], 4791 unsigned int tdlut_bytes_per_group[], 4792 double HostVMInefficiencyFactor, 4793 double HostVMInefficiencyFactorPrefetch, 4794 unsigned int HostVMMinPageSize, 4795 double ReturnBW, 4796 unsigned int MetaChunkSize, 4797 4798 // output 4799 double *Tarb, 4800 double *Tarb_prefetch) 4801 { 4802 double extra_bytes = 0; 4803 double extra_bytes_prefetch = 0; 4804 double HostVMDynamicLevels = CalculateHostVMDynamicLevels(display_cfg->gpuvm_enable, display_cfg->hostvm_enable, HostVMMinPageSize, display_cfg->hostvm_max_non_cached_page_table_levels); 4805 4806 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { 4807 extra_bytes = extra_bytes + (NumberOfDPP[k] * PixelChunkSizeInKByte * 1024); 4808 4809 if (display_cfg->plane_descriptors[k].surface.dcc.enable) 4810 extra_bytes = extra_bytes + (MetaChunkSize * 1024); 4811 4812 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut) 4813 extra_bytes = extra_bytes + tdlut_bytes_per_group[k]; 4814 } 4815 4816 extra_bytes_prefetch = extra_bytes; 4817 4818 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { 4819 if (display_cfg->gpuvm_enable == true) { 4820 extra_bytes = extra_bytes + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; 4821 extra_bytes_prefetch = extra_bytes_prefetch + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactorPrefetch; 4822 } 4823 } 4824 *Tarb = extra_bytes / ReturnBW; 4825 *Tarb_prefetch = extra_bytes_prefetch / ReturnBW; 4826 #ifdef __DML_VBA_DEBUG__ 4827 DML_LOG_VERBOSE("DML::%s: PixelChunkSizeInKByte = %d\n", __func__, PixelChunkSizeInKByte); 4828 DML_LOG_VERBOSE("DML::%s: MetaChunkSize = %d\n", __func__, MetaChunkSize); 4829 DML_LOG_VERBOSE("DML::%s: extra_bytes = %f\n", __func__, extra_bytes); 4830 DML_LOG_VERBOSE("DML::%s: extra_bytes_prefetch = %f\n", __func__, extra_bytes_prefetch); 4831 #endif 4832 } 4833 4834 static double CalculateTWait( 4835 long reserved_vblank_time_ns, 4836 double UrgentLatency, 4837 double Ttrip, 4838 double g6_temp_read_blackout_us) 4839 { 4840 double TWait; 4841 double t_urg_trip = math_max2(UrgentLatency, Ttrip); 4842 TWait = math_max2(reserved_vblank_time_ns/1000.0, g6_temp_read_blackout_us) + t_urg_trip; 4843 4844 #ifdef __DML_VBA_DEBUG__ 4845 DML_LOG_VERBOSE("DML::%s: reserved_vblank_time_ns = %ld\n", __func__, reserved_vblank_time_ns); 4846 DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); 4847 DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, Ttrip); 4848 DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, TWait); 4849 #endif 4850 return TWait; 4851 } 4852 4853 4854 static void CalculateVUpdateAndDynamicMetadataParameters( 4855 unsigned int MaxInterDCNTileRepeaters, 4856 double Dppclk, 4857 double Dispclk, 4858 double DCFClkDeepSleep, 4859 double PixelClock, 4860 unsigned int HTotal, 4861 unsigned int VBlank, 4862 unsigned int DynamicMetadataTransmittedBytes, 4863 unsigned int DynamicMetadataLinesBeforeActiveRequired, 4864 unsigned int InterlaceEnable, 4865 bool ProgressiveToInterlaceUnitInOPP, 4866 4867 // Output 4868 double *TSetup, 4869 double *Tdmbf, 4870 double *Tdmec, 4871 double *Tdmsks, 4872 unsigned int *VUpdateOffsetPix, 4873 unsigned int *VUpdateWidthPix, 4874 unsigned int *VReadyOffsetPix) 4875 { 4876 double TotalRepeaterDelayTime; 4877 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk); 4878 *VUpdateWidthPix = (unsigned int)(math_ceil2((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0)); 4879 *VReadyOffsetPix = (unsigned int)(math_ceil2(math_max2(150.0 / Dppclk, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0)); 4880 *VUpdateOffsetPix = (unsigned int)(math_ceil2(HTotal / 4.0, 1.0)); 4881 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; 4882 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk; 4883 *Tdmec = HTotal / PixelClock; 4884 4885 if (DynamicMetadataLinesBeforeActiveRequired == 0) { 4886 *Tdmsks = VBlank * HTotal / PixelClock / 2.0; 4887 } else { 4888 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; 4889 } 4890 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) { 4891 *Tdmsks = *Tdmsks / 2; 4892 } 4893 #ifdef __DML_VBA_DEBUG__ 4894 DML_LOG_VERBOSE("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired); 4895 DML_LOG_VERBOSE("DML::%s: VBlank = %u\n", __func__, VBlank); 4896 DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal); 4897 DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, PixelClock); 4898 DML_LOG_VERBOSE("DML::%s: Dppclk = %f\n", __func__, Dppclk); 4899 DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep); 4900 DML_LOG_VERBOSE("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters); 4901 DML_LOG_VERBOSE("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime); 4902 4903 DML_LOG_VERBOSE("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix); 4904 DML_LOG_VERBOSE("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix); 4905 DML_LOG_VERBOSE("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix); 4906 4907 DML_LOG_VERBOSE("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks); 4908 #endif 4909 } 4910 4911 static double get_urgent_bandwidth_required( 4912 struct dml2_core_shared_get_urgent_bandwidth_required_locals *l, 4913 const struct dml2_display_cfg *display_cfg, 4914 enum dml2_core_internal_soc_state_type state_type, 4915 enum dml2_core_internal_bw_type bw_type, 4916 bool inc_flip_bw, // including flip bw 4917 bool use_qual_row_bw, 4918 unsigned int NumberOfActiveSurfaces, 4919 unsigned int NumberOfDPP[], 4920 double dcc_dram_bw_nom_overhead_factor_p0[], 4921 double dcc_dram_bw_nom_overhead_factor_p1[], 4922 double dcc_dram_bw_pref_overhead_factor_p0[], 4923 double dcc_dram_bw_pref_overhead_factor_p1[], 4924 double mall_prefetch_sdp_overhead_factor[], 4925 double mall_prefetch_dram_overhead_factor[], 4926 double ReadBandwidthLuma[], 4927 double ReadBandwidthChroma[], 4928 double PrefetchBandwidthLuma[], 4929 double PrefetchBandwidthChroma[], 4930 double PrefetchBandwidthMax[], 4931 double excess_vactive_fill_bw_l[], 4932 double excess_vactive_fill_bw_c[], 4933 double cursor_bw[], 4934 double dpte_row_bw[], 4935 double meta_row_bw[], 4936 double prefetch_cursor_bw[], 4937 double prefetch_vmrow_bw[], 4938 double flip_bw[], 4939 double UrgentBurstFactorLuma[], 4940 double UrgentBurstFactorChroma[], 4941 double UrgentBurstFactorCursor[], 4942 double UrgentBurstFactorLumaPre[], 4943 double UrgentBurstFactorChromaPre[], 4944 double UrgentBurstFactorCursorPre[], 4945 /* outputs */ 4946 double surface_required_bw[], 4947 double surface_peak_required_bw[]) 4948 { 4949 // set inc_flip_bw = 0 for total_dchub_urgent_read_bw_noflip calculation, 1 for total_dchub_urgent_read_bw as described in the MAS 4950 // set use_qual_row_bw = 1 to calculate using qualified row bandwidth, used for total_flip_bw calculation 4951 4952 memset(l, 0, sizeof(struct dml2_core_shared_get_urgent_bandwidth_required_locals)); 4953 4954 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { 4955 l->mall_svp_prefetch_factor = (state_type == dml2_core_internal_soc_state_svp_prefetch) ? (bw_type == dml2_core_internal_bw_dram ? mall_prefetch_dram_overhead_factor[k] : mall_prefetch_sdp_overhead_factor[k]) : 1.0; 4956 l->tmp_nom_adj_factor_p0 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p0[k] : 1.0) * l->mall_svp_prefetch_factor; 4957 l->tmp_nom_adj_factor_p1 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p1[k] : 1.0) * l->mall_svp_prefetch_factor; 4958 l->tmp_pref_adj_factor_p0 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_pref_overhead_factor_p0[k] : 1.0) * l->mall_svp_prefetch_factor; 4959 l->tmp_pref_adj_factor_p1 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_pref_overhead_factor_p1[k] : 1.0) * l->mall_svp_prefetch_factor; 4960 4961 l->adj_factor_p0 = UrgentBurstFactorLuma[k] * l->tmp_nom_adj_factor_p0; 4962 l->adj_factor_p1 = UrgentBurstFactorChroma[k] * l->tmp_nom_adj_factor_p1; 4963 l->adj_factor_cur = UrgentBurstFactorCursor[k]; 4964 l->adj_factor_p0_pre = UrgentBurstFactorLumaPre[k] * l->tmp_pref_adj_factor_p0; 4965 l->adj_factor_p1_pre = UrgentBurstFactorChromaPre[k] * l->tmp_pref_adj_factor_p1; 4966 l->adj_factor_cur_pre = UrgentBurstFactorCursorPre[k]; 4967 4968 bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]); 4969 bool exclude_this_plane = false; 4970 4971 // Exclude phantom pipe in bw calculation for non svp prefetch state 4972 if (state_type != dml2_core_internal_soc_state_svp_prefetch && is_phantom) 4973 exclude_this_plane = true; 4974 4975 // The qualified row bandwidth, qual_row_bw, accounts for the regular non-flip row bandwidth when there is no possible immediate flip or HostVM invalidation flip. 4976 // The qual_row_bw is zero if HostVM is possible and only non-zero and equal to row_bw(i) if immediate flip is not allowed for that pipe. 4977 if (use_qual_row_bw) { 4978 if (display_cfg->hostvm_enable) 4979 l->per_plane_flip_bw[k] = 0; // qual_row_bw 4980 else if (!display_cfg->plane_descriptors[k].immediate_flip) 4981 l->per_plane_flip_bw[k] = NumberOfDPP[k] * (dpte_row_bw[k] + meta_row_bw[k]); 4982 } else { 4983 // the final_flip_bw includes the regular row_bw when immediate flip is disallowed (and no HostVM) 4984 if ((!display_cfg->plane_descriptors[k].immediate_flip && !display_cfg->hostvm_enable) || !inc_flip_bw) 4985 l->per_plane_flip_bw[k] = NumberOfDPP[k] * (dpte_row_bw[k] + meta_row_bw[k]); 4986 else 4987 l->per_plane_flip_bw[k] = NumberOfDPP[k] * flip_bw[k]; 4988 } 4989 4990 if (!exclude_this_plane) { 4991 l->vm_row_bw = NumberOfDPP[k] * prefetch_vmrow_bw[k]; 4992 l->flip_and_active_bw = l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur; 4993 l->flip_and_prefetch_bw = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre; 4994 l->flip_and_prefetch_bw_max = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthMax[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre; 4995 l->active_and_excess_bw = (ReadBandwidthLuma[k] + excess_vactive_fill_bw_l[k]) * l->tmp_nom_adj_factor_p0 + (ReadBandwidthChroma[k] + excess_vactive_fill_bw_c[k]) * l->tmp_nom_adj_factor_p1 + dpte_row_bw[k] + meta_row_bw[k]; 4996 surface_required_bw[k] = math_max5(l->vm_row_bw, l->flip_and_active_bw, l->flip_and_prefetch_bw, l->active_and_excess_bw, l->flip_and_prefetch_bw_max); 4997 4998 /* export peak required bandwidth for the surface */ 4999 surface_peak_required_bw[k] = math_max2(surface_required_bw[k], surface_peak_required_bw[k]); 5000 5001 #ifdef __DML_VBA_DEBUG__ 5002 DML_LOG_VERBOSE("DML::%s: k=%d, max1: vm_row_bw=%f\n", __func__, k, l->vm_row_bw); 5003 DML_LOG_VERBOSE("DML::%s: k=%d, max2: flip_and_active_bw=%f\n", __func__, k, l->flip_and_active_bw); 5004 DML_LOG_VERBOSE("DML::%s: k=%d, max3: flip_and_prefetch_bw=%f\n", __func__, k, l->flip_and_prefetch_bw); 5005 DML_LOG_VERBOSE("DML::%s: k=%d, max4: active_and_excess_bw=%f\n", __func__, k, l->active_and_excess_bw); 5006 DML_LOG_VERBOSE("DML::%s: k=%d, surface_required_bw=%f\n", __func__, k, surface_required_bw[k]); 5007 DML_LOG_VERBOSE("DML::%s: k=%d, surface_peak_required_bw=%f\n", __func__, k, surface_peak_required_bw[k]); 5008 #endif 5009 } else { 5010 surface_required_bw[k] = 0.0; 5011 } 5012 5013 l->required_bandwidth_mbps += surface_required_bw[k]; 5014 5015 #ifdef __DML_VBA_DEBUG__ 5016 DML_LOG_VERBOSE("DML::%s: k=%d, NumberOfDPP=%d\n", __func__, k, NumberOfDPP[k]); 5017 DML_LOG_VERBOSE("DML::%s: k=%d, use_qual_row_bw=%d\n", __func__, k, use_qual_row_bw); 5018 DML_LOG_VERBOSE("DML::%s: k=%d, immediate_flip=%d\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip); 5019 DML_LOG_VERBOSE("DML::%s: k=%d, mall_svp_prefetch_factor=%f\n", __func__, k, l->mall_svp_prefetch_factor); 5020 DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p0=%f\n", __func__, k, l->adj_factor_p0); 5021 DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p1=%f\n", __func__, k, l->adj_factor_p1); 5022 DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_cur=%f\n", __func__, k, l->adj_factor_cur); 5023 5024 DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p0_pre=%f\n", __func__, k, l->adj_factor_p0_pre); 5025 DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p1_pre=%f\n", __func__, k, l->adj_factor_p1_pre); 5026 DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_cur_pre=%f\n", __func__, k, l->adj_factor_cur_pre); 5027 5028 DML_LOG_VERBOSE("DML::%s: k=%d, per_plane_flip_bw=%f\n", __func__, k, l->per_plane_flip_bw[k]); 5029 DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_vmrow_bw=%f\n", __func__, k, prefetch_vmrow_bw[k]); 5030 DML_LOG_VERBOSE("DML::%s: k=%d, ReadBandwidthLuma=%f\n", __func__, k, ReadBandwidthLuma[k]); 5031 DML_LOG_VERBOSE("DML::%s: k=%d, ReadBandwidthChroma=%f\n", __func__, k, ReadBandwidthChroma[k]); 5032 DML_LOG_VERBOSE("DML::%s: k=%d, excess_vactive_fill_bw_l=%f\n", __func__, k, excess_vactive_fill_bw_l[k]); 5033 DML_LOG_VERBOSE("DML::%s: k=%d, excess_vactive_fill_bw_c=%f\n", __func__, k, excess_vactive_fill_bw_c[k]); 5034 DML_LOG_VERBOSE("DML::%s: k=%d, cursor_bw=%f\n", __func__, k, cursor_bw[k]); 5035 5036 DML_LOG_VERBOSE("DML::%s: k=%d, meta_row_bw=%f\n", __func__, k, meta_row_bw[k]); 5037 DML_LOG_VERBOSE("DML::%s: k=%d, dpte_row_bw=%f\n", __func__, k, dpte_row_bw[k]); 5038 DML_LOG_VERBOSE("DML::%s: k=%d, PrefetchBandwidthLuma=%f\n", __func__, k, PrefetchBandwidthLuma[k]); 5039 DML_LOG_VERBOSE("DML::%s: k=%d, PrefetchBandwidthChroma=%f\n", __func__, k, PrefetchBandwidthChroma[k]); 5040 DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_cursor_bw=%f\n", __func__, k, prefetch_cursor_bw[k]); 5041 DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane); 5042 DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), soc_state=%s, inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, dml2_core_internal_soc_state_type_str(state_type), inc_flip_bw, is_phantom, exclude_this_plane); 5043 DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane); 5044 #endif 5045 } 5046 5047 return l->required_bandwidth_mbps; 5048 } 5049 5050 static void CalculateExtraLatency( 5051 const struct dml2_display_cfg *display_cfg, 5052 unsigned int ROBBufferSizeInKByte, 5053 unsigned int RoundTripPingLatencyCycles, 5054 unsigned int ReorderingBytes, 5055 double DCFCLK, 5056 double FabricClock, 5057 unsigned int PixelChunkSizeInKByte, 5058 double ReturnBW, 5059 unsigned int NumberOfActiveSurfaces, 5060 unsigned int NumberOfDPP[], 5061 unsigned int dpte_group_bytes[], 5062 unsigned int tdlut_bytes_per_group[], 5063 double HostVMInefficiencyFactor, 5064 double HostVMInefficiencyFactorPrefetch, 5065 unsigned int HostVMMinPageSize, 5066 enum dml2_qos_param_type qos_type, 5067 bool max_outstanding_when_urgent_expected, 5068 unsigned int max_outstanding_requests, 5069 unsigned int request_size_bytes_luma[], 5070 unsigned int request_size_bytes_chroma[], 5071 unsigned int MetaChunkSize, 5072 unsigned int dchub_arb_to_ret_delay, 5073 double Ttrip, 5074 unsigned int hostvm_mode, 5075 5076 // output 5077 double *ExtraLatency, // Tex 5078 double *ExtraLatency_sr, // Tex_sr 5079 double *ExtraLatencyPrefetch) 5080 5081 { 5082 double Tarb; 5083 double Tarb_prefetch; 5084 double Tex_trips; 5085 unsigned int max_request_size_bytes = 0; 5086 5087 CalculateTarb( 5088 display_cfg, 5089 PixelChunkSizeInKByte, 5090 NumberOfActiveSurfaces, 5091 NumberOfDPP, 5092 dpte_group_bytes, 5093 tdlut_bytes_per_group, 5094 HostVMInefficiencyFactor, 5095 HostVMInefficiencyFactorPrefetch, 5096 HostVMMinPageSize, 5097 ReturnBW, 5098 MetaChunkSize, 5099 // output 5100 &Tarb, 5101 &Tarb_prefetch); 5102 5103 Tex_trips = (display_cfg->hostvm_enable && hostvm_mode == 1) ? (2.0 * Ttrip) : 0.0; 5104 5105 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { 5106 if (request_size_bytes_luma[k] > max_request_size_bytes) 5107 max_request_size_bytes = request_size_bytes_luma[k]; 5108 if (request_size_bytes_chroma[k] > max_request_size_bytes) 5109 max_request_size_bytes = request_size_bytes_chroma[k]; 5110 } 5111 5112 if (qos_type == dml2_qos_param_type_dcn4x) { 5113 *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK; 5114 *ExtraLatency = *ExtraLatency_sr; 5115 if (max_outstanding_when_urgent_expected) 5116 *ExtraLatency = *ExtraLatency + (ROBBufferSizeInKByte * 1024 - max_outstanding_requests * max_request_size_bytes) / ReturnBW; 5117 } else { 5118 *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK + RoundTripPingLatencyCycles / FabricClock + ReorderingBytes / ReturnBW; 5119 *ExtraLatency = *ExtraLatency_sr; 5120 } 5121 *ExtraLatency = *ExtraLatency + Tex_trips; 5122 *ExtraLatencyPrefetch = *ExtraLatency + Tarb_prefetch; 5123 *ExtraLatency = *ExtraLatency + Tarb; 5124 *ExtraLatency_sr = *ExtraLatency_sr + Tarb; 5125 5126 #ifdef __DML_VBA_DEBUG__ 5127 DML_LOG_VERBOSE("DML::%s: qos_type=%u\n", __func__, qos_type); 5128 DML_LOG_VERBOSE("DML::%s: hostvm_mode=%u\n", __func__, hostvm_mode); 5129 DML_LOG_VERBOSE("DML::%s: Tex_trips=%f\n", __func__, Tex_trips); 5130 DML_LOG_VERBOSE("DML::%s: max_outstanding_when_urgent_expected=%u\n", __func__, max_outstanding_when_urgent_expected); 5131 DML_LOG_VERBOSE("DML::%s: FabricClock=%f\n", __func__, FabricClock); 5132 DML_LOG_VERBOSE("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); 5133 DML_LOG_VERBOSE("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); 5134 DML_LOG_VERBOSE("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles); 5135 DML_LOG_VERBOSE("DML::%s: ReorderingBytes=%u\n", __func__, ReorderingBytes); 5136 DML_LOG_VERBOSE("DML::%s: Tarb=%f\n", __func__, Tarb); 5137 DML_LOG_VERBOSE("DML::%s: ExtraLatency=%f\n", __func__, *ExtraLatency); 5138 DML_LOG_VERBOSE("DML::%s: ExtraLatency_sr=%f\n", __func__, *ExtraLatency_sr); 5139 DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch=%f\n", __func__, *ExtraLatencyPrefetch); 5140 #endif 5141 } 5142 5143 static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_CalculatePrefetchSchedule_params *p) 5144 { 5145 struct dml2_core_calcs_CalculatePrefetchSchedule_locals *s = &scratch->CalculatePrefetchSchedule_locals; 5146 bool dcc_mrq_enable; 5147 5148 unsigned int vm_bytes; 5149 unsigned int extra_tdpe_bytes; 5150 unsigned int tdlut_row_bytes; 5151 unsigned int Lo; 5152 5153 s->NoTimeToPrefetch = false; 5154 s->DPPCycles = 0; 5155 s->DISPCLKCycles = 0; 5156 s->DSTTotalPixelsAfterScaler = 0.0; 5157 s->LineTime = 0.0; 5158 s->dst_y_prefetch_equ = 0.0; 5159 s->prefetch_bw_oto = 0.0; 5160 s->Tvm_oto = 0.0; 5161 s->Tr0_oto = 0.0; 5162 s->Tvm_oto_lines = 0.0; 5163 s->Tr0_oto_lines = 0.0; 5164 s->dst_y_prefetch_oto = 0.0; 5165 s->TimeForFetchingVM = 0.0; 5166 s->TimeForFetchingRowInVBlank = 0.0; 5167 s->LinesToRequestPrefetchPixelData = 0.0; 5168 s->HostVMDynamicLevelsTrips = 0; 5169 s->trip_to_mem = 0.0; 5170 *p->Tvm_trips = 0.0; 5171 *p->Tr0_trips = 0.0; 5172 s->Tvm_trips_rounded = 0.0; 5173 s->Tr0_trips_rounded = 0.0; 5174 s->max_Tsw = 0.0; 5175 s->Lsw_oto = 0.0; 5176 *p->Tpre_rounded = 0.0; 5177 s->prefetch_bw_equ = 0.0; 5178 s->Tvm_equ = 0.0; 5179 s->Tr0_equ = 0.0; 5180 s->Tdmbf = 0.0; 5181 s->Tdmec = 0.0; 5182 s->Tdmsks = 0.0; 5183 *p->prefetch_sw_bytes = 0.0; 5184 s->prefetch_bw_pr = 0.0; 5185 s->bytes_pp = 0.0; 5186 s->dep_bytes = 0.0; 5187 s->min_Lsw_oto = 0.0; 5188 s->min_Lsw_equ = 0.0; 5189 s->Tsw_est1 = 0.0; 5190 s->Tsw_est2 = 0.0; 5191 s->Tsw_est3 = 0.0; 5192 s->cursor_prefetch_bytes = 0; 5193 *p->prefetch_cursor_bw = 0; 5194 *p->RequiredPrefetchBWMax = 0.0; 5195 5196 dcc_mrq_enable = (p->dcc_enable && p->mrq_present); 5197 5198 s->TWait_p = p->TWait - p->Ttrip; // TWait includes max(Turg, Ttrip) and Ttrip here is already max(Turg, Ttrip) 5199 5200 if (p->display_cfg->gpuvm_enable == true && p->display_cfg->hostvm_enable == true) { 5201 s->HostVMDynamicLevelsTrips = p->display_cfg->hostvm_max_non_cached_page_table_levels; 5202 } else { 5203 s->HostVMDynamicLevelsTrips = 0; 5204 } 5205 #ifdef __DML_VBA_DEBUG__ 5206 DML_LOG_VERBOSE("DML::%s: dcc_enable = %u\n", __func__, p->dcc_enable); 5207 DML_LOG_VERBOSE("DML::%s: mrq_present = %u\n", __func__, p->mrq_present); 5208 DML_LOG_VERBOSE("DML::%s: dcc_mrq_enable = %u\n", __func__, dcc_mrq_enable); 5209 DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->display_cfg->gpuvm_enable); 5210 DML_LOG_VERBOSE("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels); 5211 DML_LOG_VERBOSE("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable); 5212 DML_LOG_VERBOSE("DML::%s: VStartup = %u\n", __func__, p->VStartup); 5213 DML_LOG_VERBOSE("DML::%s: HostVMEnable = %u\n", __func__, p->display_cfg->hostvm_enable); 5214 DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); 5215 DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, p->TWait); 5216 DML_LOG_VERBOSE("DML::%s: TWait_p = %f\n", __func__, s->TWait_p); 5217 DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, p->Ttrip); 5218 DML_LOG_VERBOSE("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk); 5219 DML_LOG_VERBOSE("DML::%s: myPipe->Dispclk = %f\n", __func__, p->myPipe->Dispclk); 5220 #endif 5221 CalculateVUpdateAndDynamicMetadataParameters( 5222 p->MaxInterDCNTileRepeaters, 5223 p->myPipe->Dppclk, 5224 p->myPipe->Dispclk, 5225 p->myPipe->DCFClkDeepSleep, 5226 p->myPipe->PixelClock, 5227 p->myPipe->HTotal, 5228 p->myPipe->VBlank, 5229 p->DynamicMetadataTransmittedBytes, 5230 p->DynamicMetadataLinesBeforeActiveRequired, 5231 p->myPipe->InterlaceEnable, 5232 p->myPipe->ProgressiveToInterlaceUnitInOPP, 5233 p->TSetup, 5234 5235 // Output 5236 &s->Tdmbf, 5237 &s->Tdmec, 5238 &s->Tdmsks, 5239 p->VUpdateOffsetPix, 5240 p->VUpdateWidthPix, 5241 p->VReadyOffsetPix); 5242 5243 s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock; 5244 s->trip_to_mem = p->Ttrip; 5245 *p->Tvm_trips = p->ExtraLatencyPrefetch + math_max2(s->trip_to_mem * (p->display_cfg->gpuvm_max_page_table_levels * (s->HostVMDynamicLevelsTrips + 1)), p->Turg); 5246 if (dcc_mrq_enable) 5247 *p->Tvm_trips_flip = *p->Tvm_trips; 5248 else 5249 *p->Tvm_trips_flip = *p->Tvm_trips - s->trip_to_mem; 5250 5251 *p->Tr0_trips_flip = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1); 5252 *p->Tr0_trips = math_max2(*p->Tr0_trips_flip, p->tdlut_opt_time / 2); 5253 5254 if (p->DynamicMetadataVMEnabled == true) { 5255 *p->Tdmdl_vm = s->TWait_p + *p->Tvm_trips; 5256 *p->Tdmdl = *p->Tdmdl_vm + p->Ttrip; 5257 } else { 5258 *p->Tdmdl_vm = 0; 5259 *p->Tdmdl = s->TWait_p + p->ExtraLatencyPrefetch + p->Ttrip; // Tex 5260 } 5261 5262 if (p->DynamicMetadataEnable == true) { 5263 if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) { 5264 *p->NotEnoughTimeForDynamicMetadata = true; 5265 DML_LOG_VERBOSE("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); 5266 DML_LOG_VERBOSE("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf); 5267 DML_LOG_VERBOSE("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec); 5268 DML_LOG_VERBOSE("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks); 5269 DML_LOG_VERBOSE("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl); 5270 } else { 5271 *p->NotEnoughTimeForDynamicMetadata = false; 5272 } 5273 } else { 5274 *p->NotEnoughTimeForDynamicMetadata = false; 5275 } 5276 5277 if (p->myPipe->ScalerEnabled) 5278 s->DPPCycles = (unsigned int)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCL); 5279 else 5280 s->DPPCycles = (unsigned int)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCLLBOnly); 5281 5282 s->DPPCycles = (unsigned int)(s->DPPCycles + p->myPipe->NumberOfCursors * p->DPPCLKDelayCNVCCursor); 5283 5284 s->DISPCLKCycles = (unsigned int)p->DISPCLKDelaySubtotal; 5285 5286 if (p->myPipe->Dppclk == 0.0 || p->myPipe->Dispclk == 0.0) 5287 return true; 5288 5289 *p->DSTXAfterScaler = (unsigned int)math_round(s->DPPCycles * p->myPipe->PixelClock / p->myPipe->Dppclk + s->DISPCLKCycles * p->myPipe->PixelClock / p->myPipe->Dispclk + p->DSCDelay); 5290 *p->DSTXAfterScaler = (unsigned int)math_round(*p->DSTXAfterScaler + (p->myPipe->ODMMode != dml2_odm_mode_bypass ? 18 : 0) + (p->myPipe->DPPPerSurface - 1) * p->DPP_RECOUT_WIDTH + 5291 ((p->myPipe->ODMMode == dml2_odm_mode_split_1to2 || p->myPipe->ODMMode == dml2_odm_mode_mso_1to2) ? (double)p->myPipe->HActive / 2.0 : 0) + 5292 ((p->myPipe->ODMMode == dml2_odm_mode_mso_1to4) ? (double)p->myPipe->HActive * 3.0 / 4.0 : 0)); 5293 5294 #ifdef __DML_VBA_DEBUG__ 5295 DML_LOG_VERBOSE("DML::%s: DynamicMetadataVMEnabled = %u\n", __func__, p->DynamicMetadataVMEnabled); 5296 DML_LOG_VERBOSE("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles); 5297 DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock); 5298 DML_LOG_VERBOSE("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk); 5299 DML_LOG_VERBOSE("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles); 5300 DML_LOG_VERBOSE("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk); 5301 DML_LOG_VERBOSE("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay); 5302 DML_LOG_VERBOSE("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode); 5303 DML_LOG_VERBOSE("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH); 5304 DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler); 5305 5306 DML_LOG_VERBOSE("DML::%s: setup_for_tdlut = %u\n", __func__, p->setup_for_tdlut); 5307 DML_LOG_VERBOSE("DML::%s: tdlut_opt_time = %f\n", __func__, p->tdlut_opt_time); 5308 DML_LOG_VERBOSE("DML::%s: tdlut_pte_bytes_per_frame = %u\n", __func__, p->tdlut_pte_bytes_per_frame); 5309 DML_LOG_VERBOSE("DML::%s: tdlut_drain_time = %f\n", __func__, p->tdlut_drain_time); 5310 #endif 5311 5312 if (p->OutputFormat == dml2_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP)) 5313 *p->DSTYAfterScaler = 1; 5314 else 5315 *p->DSTYAfterScaler = 0; 5316 5317 s->DSTTotalPixelsAfterScaler = *p->DSTYAfterScaler * p->myPipe->HTotal + *p->DSTXAfterScaler; 5318 *p->DSTYAfterScaler = (unsigned int)(math_floor2(s->DSTTotalPixelsAfterScaler / p->myPipe->HTotal, 1)); 5319 *p->DSTXAfterScaler = (unsigned int)(s->DSTTotalPixelsAfterScaler - ((double)(*p->DSTYAfterScaler * p->myPipe->HTotal))); 5320 #ifdef __DML_VBA_DEBUG__ 5321 DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler); 5322 DML_LOG_VERBOSE("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler); 5323 #endif 5324 5325 #ifdef __DML_VBA_DEBUG__ 5326 DML_LOG_VERBOSE("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips); 5327 DML_LOG_VERBOSE("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips); 5328 DML_LOG_VERBOSE("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem); 5329 DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch); 5330 DML_LOG_VERBOSE("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels); 5331 DML_LOG_VERBOSE("DML::%s: HostVMDynamicLevelsTrips = %u\n", __func__, s->HostVMDynamicLevelsTrips); 5332 #endif 5333 if (p->display_cfg->gpuvm_enable) { 5334 s->Tvm_trips_rounded = math_ceil2(4.0 * *p->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime; 5335 *p->Tvm_trips_flip_rounded = math_ceil2(4.0 * *p->Tvm_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime; 5336 } else { 5337 if (p->DynamicMetadataEnable || dcc_mrq_enable || p->setup_for_tdlut) 5338 s->Tvm_trips_rounded = math_max2(s->LineTime * math_ceil2(4.0*math_max3(p->ExtraLatencyPrefetch, p->Turg, s->trip_to_mem)/s->LineTime, 1)/4, s->LineTime/4.0); 5339 else 5340 s->Tvm_trips_rounded = s->LineTime / 4.0; 5341 *p->Tvm_trips_flip_rounded = s->LineTime / 4.0; 5342 } 5343 5344 s->Tvm_trips_rounded = math_max2(s->Tvm_trips_rounded, s->LineTime / 4.0); 5345 *p->Tvm_trips_flip_rounded = math_max2(*p->Tvm_trips_flip_rounded, s->LineTime / 4.0); 5346 5347 if (p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable) { 5348 s->Tr0_trips_rounded = math_ceil2(4.0 * *p->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime; 5349 *p->Tr0_trips_flip_rounded = math_ceil2(4.0 * *p->Tr0_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime; 5350 } else { 5351 s->Tr0_trips_rounded = s->LineTime / 4.0; 5352 *p->Tr0_trips_flip_rounded = s->LineTime / 4.0; 5353 } 5354 s->Tr0_trips_rounded = math_max2(s->Tr0_trips_rounded, s->LineTime / 4.0); 5355 *p->Tr0_trips_flip_rounded = math_max2(*p->Tr0_trips_flip_rounded, s->LineTime / 4.0); 5356 5357 if (p->display_cfg->gpuvm_enable == true) { 5358 if (p->display_cfg->gpuvm_max_page_table_levels >= 3) { 5359 *p->Tno_bw = p->ExtraLatencyPrefetch + s->trip_to_mem * (double)((p->display_cfg->gpuvm_max_page_table_levels - 2) * (s->HostVMDynamicLevelsTrips + 1)); 5360 } else if (p->display_cfg->gpuvm_max_page_table_levels == 1 && !dcc_mrq_enable && !p->setup_for_tdlut) { 5361 *p->Tno_bw = p->ExtraLatencyPrefetch; 5362 } else { 5363 *p->Tno_bw = 0; 5364 } 5365 } else { 5366 *p->Tno_bw = 0; 5367 } 5368 5369 if (p->mrq_present || p->display_cfg->gpuvm_max_page_table_levels >= 3) 5370 *p->Tno_bw_flip = *p->Tno_bw; 5371 else 5372 *p->Tno_bw_flip = 0; //because there is no 3DLUT for iFlip 5373 5374 if (dml_is_420(p->myPipe->SourcePixelFormat)) { 5375 s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4.0; 5376 } else { 5377 s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC; 5378 } 5379 5380 *p->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC; 5381 *p->prefetch_sw_bytes = *p->prefetch_sw_bytes * p->mall_prefetch_sdp_overhead_factor; 5382 5383 vm_bytes = p->vm_bytes; // vm_bytes is dpde0_bytes_per_frame_ub_l + dpde0_bytes_per_frame_ub_c + 2*extra_dpde_bytes; 5384 extra_tdpe_bytes = (unsigned int)math_max2(0, (p->display_cfg->gpuvm_max_page_table_levels - 1) * 128); 5385 5386 if (p->setup_for_tdlut) 5387 vm_bytes = vm_bytes + p->tdlut_pte_bytes_per_frame + (p->display_cfg->gpuvm_enable ? extra_tdpe_bytes : 0); 5388 5389 tdlut_row_bytes = (unsigned long) math_ceil2(p->tdlut_bytes_per_frame/2.0, 1.0); 5390 5391 s->min_Lsw_oto = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_OTO__; 5392 s->min_Lsw_oto = math_max2(s->min_Lsw_oto, p->tdlut_drain_time / s->LineTime); 5393 s->min_Lsw_oto = math_max2(s->min_Lsw_oto, 2.0); 5394 5395 // use vactive swath bw for prefetch oto and also cap prefetch_bw_oto to max_vratio_oto 5396 // Note: in prefetch calculation, acounting is done mostly per-pipe. 5397 // vactive swath bw represents the per-surface (aka per dml plane) bw to move vratio_l/c lines of bytes_l/c per line time 5398 s->per_pipe_vactive_sw_bw = p->vactive_sw_bw_l / (double)p->myPipe->DPPPerSurface; 5399 5400 // one-to-one prefetch bw as one line of bytes per line time (as per vratio_pre_l/c = 1) 5401 s->prefetch_bw_oto = (p->swath_width_luma_ub * p->myPipe->BytePerPixelY) / s->LineTime; 5402 5403 if (p->myPipe->BytePerPixelC > 0) { 5404 s->per_pipe_vactive_sw_bw += p->vactive_sw_bw_c / (double)p->myPipe->DPPPerSurface; 5405 s->prefetch_bw_oto += (p->swath_width_chroma_ub * p->myPipe->BytePerPixelC) / s->LineTime; 5406 } 5407 5408 /* oto prefetch bw should be always be less than total vactive bw */ 5409 //DML_ASSERT(s->prefetch_bw_oto < s->per_pipe_vactive_sw_bw * p->myPipe->DPPPerSurface); 5410 5411 s->prefetch_bw_oto = math_max2(s->per_pipe_vactive_sw_bw, s->prefetch_bw_oto) * p->mall_prefetch_sdp_overhead_factor; 5412 5413 s->prefetch_bw_oto = math_min2(s->prefetch_bw_oto, *p->prefetch_sw_bytes/(s->min_Lsw_oto*s->LineTime)); 5414 5415 s->Lsw_oto = math_ceil2(4.0 * *p->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, 1.0) / 4.0; 5416 5417 s->prefetch_bw_oto = math_max3(s->prefetch_bw_oto, 5418 p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw, 5419 (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime)); 5420 5421 /* oto bw needs to be outputted even if the oto schedule isn't being used to avoid ms/mp mismatch. 5422 * mp will fail if ms decides to use equ schedule and mp decides to use oto schedule 5423 * and the required bandwidth increases when going from ms to mp 5424 */ 5425 *p->RequiredPrefetchBWMax = s->prefetch_bw_oto; 5426 5427 #ifdef __DML_VBA_DEBUG__ 5428 DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_l = %f\n", __func__, p->vactive_sw_bw_l); 5429 DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_c = %f\n", __func__, p->vactive_sw_bw_c); 5430 DML_LOG_VERBOSE("DML::%s: per_pipe_vactive_sw_bw = %f\n", __func__, s->per_pipe_vactive_sw_bw); 5431 #endif 5432 5433 if (p->display_cfg->gpuvm_enable == true) { 5434 s->Tvm_oto = math_max3( 5435 *p->Tvm_trips, 5436 *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto, 5437 s->LineTime / 4.0); 5438 5439 #ifdef __DML_VBA_DEBUG__ 5440 DML_LOG_VERBOSE("DML::%s: Tvm_oto max0 = %f\n", __func__, *p->Tvm_trips); 5441 DML_LOG_VERBOSE("DML::%s: Tvm_oto max1 = %f\n", __func__, *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto); 5442 DML_LOG_VERBOSE("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4.0); 5443 #endif 5444 } else { 5445 s->Tvm_oto = s->Tvm_trips_rounded; 5446 } 5447 5448 if ((p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable)) { 5449 s->Tr0_oto = math_max3( 5450 *p->Tr0_trips, 5451 (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto, 5452 s->LineTime / 4.0); 5453 #ifdef __DML_VBA_DEBUG__ 5454 DML_LOG_VERBOSE("DML::%s: Tr0_oto max0 = %f\n", __func__, *p->Tr0_trips); 5455 DML_LOG_VERBOSE("DML::%s: Tr0_oto max1 = %f\n", __func__, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto); 5456 DML_LOG_VERBOSE("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime / 4); 5457 #endif 5458 } else 5459 s->Tr0_oto = s->LineTime / 4.0; 5460 5461 s->Tvm_oto_lines = math_ceil2(4.0 * s->Tvm_oto / s->LineTime, 1) / 4.0; 5462 s->Tr0_oto_lines = math_ceil2(4.0 * s->Tr0_oto / s->LineTime, 1) / 4.0; 5463 s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto; 5464 5465 #ifdef DML_GLOBAL_PREFETCH_CHECK 5466 DML_LOG_VERBOSE("DML::%s: impacted_Tpre = %f\n", __func__, p->impacted_dst_y_pre); 5467 if (p->impacted_dst_y_pre > 0) { 5468 DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto); 5469 s->dst_y_prefetch_oto = math_max2(s->dst_y_prefetch_oto, p->impacted_dst_y_pre); 5470 DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f (impacted)\n", __func__, s->dst_y_prefetch_oto); 5471 } 5472 #endif 5473 *p->Tpre_oto = s->dst_y_prefetch_oto * s->LineTime; 5474 5475 //To (time for delay after scaler) in line time 5476 Lo = (unsigned int)(*p->DSTYAfterScaler + (double)*p->DSTXAfterScaler / (double)p->myPipe->HTotal); 5477 5478 s->min_Lsw_equ = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_EQU__; 5479 s->min_Lsw_equ = math_max2(s->min_Lsw_equ, p->tdlut_drain_time / s->LineTime); 5480 s->min_Lsw_equ = math_max2(s->min_Lsw_equ, 2.0); 5481 //Tpre_equ in line time 5482 if (p->DynamicMetadataVMEnabled && p->DynamicMetadataEnable) 5483 s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, *p->Tvm_trips) + s->TWait_p) / s->LineTime - Lo; 5484 else 5485 s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, p->ExtraLatencyPrefetch) + s->TWait_p) / s->LineTime - Lo; 5486 5487 #ifdef DML_GLOBAL_PREFETCH_CHECK 5488 s->dst_y_prefetch_equ_impacted = math_max2(p->impacted_dst_y_pre, s->dst_y_prefetch_equ); 5489 5490 s->dst_y_prefetch_equ_impacted = math_min2(s->dst_y_prefetch_equ_impacted, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH 5491 5492 if (s->dst_y_prefetch_equ_impacted > s->dst_y_prefetch_equ) 5493 s->dst_y_prefetch_equ -= s->dst_y_prefetch_equ_impacted - s->dst_y_prefetch_equ; 5494 #endif 5495 5496 s->dst_y_prefetch_equ = math_min2(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH 5497 5498 #ifdef __DML_VBA_DEBUG__ 5499 DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal); 5500 DML_LOG_VERBOSE("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto); 5501 DML_LOG_VERBOSE("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ); 5502 DML_LOG_VERBOSE("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); 5503 DML_LOG_VERBOSE("DML::%s: Tno_bw_flip = %f\n", __func__, *p->Tno_bw_flip); 5504 DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch); 5505 DML_LOG_VERBOSE("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem); 5506 DML_LOG_VERBOSE("DML::%s: mall_prefetch_sdp_overhead_factor = %f\n", __func__, p->mall_prefetch_sdp_overhead_factor); 5507 DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY); 5508 DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); 5509 DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub); 5510 DML_LOG_VERBOSE("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC); 5511 DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC); 5512 DML_LOG_VERBOSE("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub); 5513 DML_LOG_VERBOSE("DML::%s: prefetch_sw_bytes = %f\n", __func__, *p->prefetch_sw_bytes); 5514 DML_LOG_VERBOSE("DML::%s: max_Tsw = %f\n", __func__, s->max_Tsw); 5515 DML_LOG_VERBOSE("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp); 5516 DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, vm_bytes); 5517 DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); 5518 DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); 5519 DML_LOG_VERBOSE("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips); 5520 DML_LOG_VERBOSE("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips); 5521 DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip = %f\n", __func__, *p->Tvm_trips_flip); 5522 DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip = %f\n", __func__, *p->Tr0_trips_flip); 5523 DML_LOG_VERBOSE("DML::%s: prefetch_bw_pr = %f\n", __func__, s->prefetch_bw_pr); 5524 DML_LOG_VERBOSE("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto); 5525 DML_LOG_VERBOSE("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto); 5526 DML_LOG_VERBOSE("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto); 5527 DML_LOG_VERBOSE("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines); 5528 DML_LOG_VERBOSE("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines); 5529 DML_LOG_VERBOSE("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto); 5530 DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto); 5531 DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ); 5532 DML_LOG_VERBOSE("DML::%s: tdlut_row_bytes = %d\n", __func__, tdlut_row_bytes); 5533 DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %d\n", __func__, p->meta_row_bytes); 5534 #endif 5535 s->dst_y_prefetch_equ = math_floor2(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0; 5536 *p->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime; 5537 5538 #ifdef __DML_VBA_DEBUG__ 5539 DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ); 5540 DML_LOG_VERBOSE("DML::%s: LineTime: %f\n", __func__, s->LineTime); 5541 DML_LOG_VERBOSE("DML::%s: VStartup: %u\n", __func__, p->VStartup); 5542 DML_LOG_VERBOSE("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime); 5543 DML_LOG_VERBOSE("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup); 5544 DML_LOG_VERBOSE("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc); 5545 DML_LOG_VERBOSE("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait); 5546 DML_LOG_VERBOSE("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf); 5547 DML_LOG_VERBOSE("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec); 5548 DML_LOG_VERBOSE("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks); 5549 DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, p->TWait); 5550 DML_LOG_VERBOSE("DML::%s: TWait_p = %f\n", __func__, s->TWait_p); 5551 DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, p->Ttrip); 5552 DML_LOG_VERBOSE("DML::%s: Tex = %f\n", __func__, p->ExtraLatencyPrefetch); 5553 DML_LOG_VERBOSE("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm); 5554 DML_LOG_VERBOSE("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl); 5555 DML_LOG_VERBOSE("DML::%s: TWait_p: %fus\n", __func__, s->TWait_p); 5556 DML_LOG_VERBOSE("DML::%s: Ttrip: %fus\n", __func__, p->Ttrip); 5557 DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler); 5558 DML_LOG_VERBOSE("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler); 5559 DML_LOG_VERBOSE("DML::%s: vm_bytes: %f (hvm inefficiency scaled)\n", __func__, vm_bytes*p->HostVMInefficiencyFactor); 5560 DML_LOG_VERBOSE("DML::%s: row_bytes: %f (hvm inefficiency scaled, 1 row)\n", __func__, p->PixelPTEBytesPerRow*p->HostVMInefficiencyFactor+p->meta_row_bytes+tdlut_row_bytes); 5561 DML_LOG_VERBOSE("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); 5562 DML_LOG_VERBOSE("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, (s->dst_y_prefetch_equ * s->LineTime), *p->Tpre_rounded, (*p->Tpre_rounded - (s->dst_y_prefetch_equ * s->LineTime))); 5563 DML_LOG_VERBOSE("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); 5564 #endif 5565 5566 *p->dst_y_per_vm_vblank = 0; 5567 *p->dst_y_per_row_vblank = 0; 5568 *p->VRatioPrefetchY = 0; 5569 *p->VRatioPrefetchC = 0; 5570 *p->RequiredPrefetchPixelDataBWLuma = 0; 5571 5572 // Derive bandwidth by finding how much data to move within the time constraint 5573 // Tpre_rounded is Tpre rounding to 2-bit fraction 5574 // Tvm_trips_rounded is Tvm_trips ceiling to 1/4 line time 5575 // Tr0_trips_rounded is Tr0_trips ceiling to 1/4 line time 5576 // So that means prefetch bw calculated can be higher since the total time available for prefetch is less 5577 bool min_Lsw_equ_ok = *p->Tpre_rounded >= s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded + s->min_Lsw_equ*s->LineTime; 5578 bool tpre_gt_req_latency = true; 5579 #if 0 5580 // Check that Tpre_rounded is big enough if all of the stages of the prefetch are time constrained. 5581 // The terms Tvm_trips_rounded and Tr0_trips_rounded represent the min time constraints for the VM and row stages. 5582 // Normally, these terms cover the overall time constraint for Tpre >= (Tex + max{Ttrip, Turg}), but if these terms are at their minimum, an explicit check is necessary. 5583 tpre_gt_req_latency = *p->Tpre_rounded > (math_max2(p->Turg, s->trip_to_mem) + p->ExtraLatencyPrefetch); 5584 #endif 5585 5586 if (s->dst_y_prefetch_equ > 1 && min_Lsw_equ_ok && tpre_gt_req_latency) { 5587 s->prefetch_bw1 = 0.; 5588 s->prefetch_bw2 = 0.; 5589 s->prefetch_bw3 = 0.; 5590 s->prefetch_bw4 = 0.; 5591 5592 // prefetch_bw1: VM + 2*R0 + SW 5593 if (*p->Tpre_rounded - *p->Tno_bw > 0) { 5594 s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor 5595 + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) 5596 + *p->prefetch_sw_bytes) 5597 / (*p->Tpre_rounded - *p->Tno_bw); 5598 s->Tsw_est1 = *p->prefetch_sw_bytes / s->prefetch_bw1; 5599 } else 5600 s->prefetch_bw1 = 0; 5601 5602 DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f\n", __func__, s->prefetch_bw1); 5603 if ((s->Tsw_est1 < s->min_Lsw_equ * s->LineTime) && (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) { 5604 s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / 5605 (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw); 5606 #ifdef __DML_VBA_DEBUG__ 5607 DML_LOG_VERBOSE("DML::%s: vm and 2 rows bytes = %f\n", __func__, (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes))); 5608 DML_LOG_VERBOSE("DML::%s: Tpre_rounded = %f\n", __func__, *p->Tpre_rounded); 5609 DML_LOG_VERBOSE("DML::%s: minus term = %f\n", __func__, s->min_Lsw_equ * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw); 5610 DML_LOG_VERBOSE("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ); 5611 DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime); 5612 DML_LOG_VERBOSE("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); 5613 DML_LOG_VERBOSE("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw)); 5614 DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f (updated)\n", __func__, s->prefetch_bw1); 5615 #endif 5616 } 5617 5618 // prefetch_bw2: VM + SW 5619 if (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded > 0) { 5620 s->prefetch_bw2 = (vm_bytes * p->HostVMInefficiencyFactor + *p->prefetch_sw_bytes) / 5621 (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded); 5622 s->Tsw_est2 = *p->prefetch_sw_bytes / s->prefetch_bw2; 5623 } else 5624 s->prefetch_bw2 = 0; 5625 5626 DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f\n", __func__, s->prefetch_bw2); 5627 if ((s->Tsw_est2 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime) > 0)) { 5628 s->prefetch_bw2 = vm_bytes * p->HostVMInefficiencyFactor / (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime); 5629 DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f (updated)\n", __func__, s->prefetch_bw2); 5630 } 5631 5632 // prefetch_bw3: 2*R0 + SW 5633 if (*p->Tpre_rounded - s->Tvm_trips_rounded > 0) { 5634 s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + *p->prefetch_sw_bytes) / 5635 (*p->Tpre_rounded - s->Tvm_trips_rounded); 5636 s->Tsw_est3 = *p->prefetch_sw_bytes / s->prefetch_bw3; 5637 } else 5638 s->prefetch_bw3 = 0; 5639 5640 DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f\n", __func__, s->prefetch_bw3); 5641 if ((s->Tsw_est3 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) { 5642 s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded); 5643 DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f (updated)\n", __func__, s->prefetch_bw3); 5644 } 5645 5646 // prefetch_bw4: SW 5647 if (*p->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0) 5648 s->prefetch_bw4 = *p->prefetch_sw_bytes / (*p->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded); 5649 else 5650 s->prefetch_bw4 = 0; 5651 5652 #ifdef __DML_VBA_DEBUG__ 5653 DML_LOG_VERBOSE("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); 5654 DML_LOG_VERBOSE("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, s->dst_y_prefetch_equ * s->LineTime, *p->Tpre_rounded, (*p->Tpre_rounded - (s->dst_y_prefetch_equ * s->LineTime))); 5655 DML_LOG_VERBOSE("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); 5656 DML_LOG_VERBOSE("DML::%s: Tr0_trips=%f Tr0_trips_rounded: %f, delta=%f\n", __func__, *p->Tr0_trips, s->Tr0_trips_rounded, (s->Tr0_trips_rounded - *p->Tr0_trips)); 5657 DML_LOG_VERBOSE("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1); 5658 DML_LOG_VERBOSE("DML::%s: Tsw_est2: %f\n", __func__, s->Tsw_est2); 5659 DML_LOG_VERBOSE("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3); 5660 DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f (final)\n", __func__, s->prefetch_bw1); 5661 DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f (final)\n", __func__, s->prefetch_bw2); 5662 DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f (final)\n", __func__, s->prefetch_bw3); 5663 DML_LOG_VERBOSE("DML::%s: prefetch_bw4: %f (final)\n", __func__, s->prefetch_bw4); 5664 #endif 5665 { 5666 bool Case1OK = false; 5667 bool Case2OK = false; 5668 bool Case3OK = false; 5669 5670 // get "equalized" bw among all stages (vm, r0, sw), so based is all 3 stages are just above the latency-based requirement 5671 // so it is not too dis-portionally favor a particular stage, next is either r0 more agressive and next is vm more agressive, the worst is all are agressive 5672 // vs the latency based number 5673 5674 // prefetch_bw1: VM + 2*R0 + SW 5675 // so prefetch_bw1 will have enough bw to transfer the necessary data within Tpre_rounded - Tno_bw (Tpre is the the worst-case latency based time to fetch the data) 5676 // here is to make sure equ bw wont be more agressive than the latency-based requirement. 5677 // check vm time >= vm_trips 5678 // check r0 time >= r0_trips 5679 5680 double total_row_bytes = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes); 5681 5682 DML_LOG_VERBOSE("DML::%s: Tvm_trips_rounded = %f\n", __func__, s->Tvm_trips_rounded); 5683 DML_LOG_VERBOSE("DML::%s: Tr0_trips_rounded = %f\n", __func__, s->Tr0_trips_rounded); 5684 5685 if (s->prefetch_bw1 > 0) { 5686 double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw1; 5687 double row_transfer_time = total_row_bytes / s->prefetch_bw1; 5688 DML_LOG_VERBOSE("DML::%s: Case1: vm_transfer_time = %f\n", __func__, vm_transfer_time); 5689 DML_LOG_VERBOSE("DML::%s: Case1: row_transfer_time = %f\n", __func__, row_transfer_time); 5690 if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) { 5691 Case1OK = true; 5692 } 5693 } 5694 5695 // prefetch_bw2: VM + SW 5696 // prefetch_bw2 will be enough bw to transfer VM and SW data within (Tpre_rounded - Tr0_trips_rounded - Tno_bw) 5697 // check vm time >= vm_trips 5698 // check r0 time < r0_trips 5699 if (s->prefetch_bw2 > 0) { 5700 double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw2; 5701 double row_transfer_time = total_row_bytes / s->prefetch_bw2; 5702 DML_LOG_VERBOSE("DML::%s: Case2: vm_transfer_time = %f\n", __func__, vm_transfer_time); 5703 DML_LOG_VERBOSE("DML::%s: Case2: row_transfer_time = %f\n", __func__, row_transfer_time); 5704 if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time < s->Tr0_trips_rounded) { 5705 Case2OK = true; 5706 } 5707 } 5708 5709 // prefetch_bw3: VM + 2*R0 5710 // check vm time < vm_trips 5711 // check r0 time >= r0_trips 5712 if (s->prefetch_bw3 > 0) { 5713 double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw3; 5714 double row_transfer_time = total_row_bytes / s->prefetch_bw3; 5715 DML_LOG_VERBOSE("DML::%s: Case3: vm_transfer_time = %f\n", __func__, vm_transfer_time); 5716 DML_LOG_VERBOSE("DML::%s: Case3: row_transfer_time = %f\n", __func__, row_transfer_time); 5717 if (vm_transfer_time < s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) { 5718 Case3OK = true; 5719 } 5720 } 5721 5722 if (Case1OK) { 5723 s->prefetch_bw_equ = s->prefetch_bw1; 5724 } else if (Case2OK) { 5725 s->prefetch_bw_equ = s->prefetch_bw2; 5726 } else if (Case3OK) { 5727 s->prefetch_bw_equ = s->prefetch_bw3; 5728 } else { 5729 s->prefetch_bw_equ = s->prefetch_bw4; 5730 } 5731 5732 s->prefetch_bw_equ = math_max3(s->prefetch_bw_equ, 5733 p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw, 5734 (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime)); 5735 #ifdef __DML_VBA_DEBUG__ 5736 DML_LOG_VERBOSE("DML::%s: Case1OK: %u\n", __func__, Case1OK); 5737 DML_LOG_VERBOSE("DML::%s: Case2OK: %u\n", __func__, Case2OK); 5738 DML_LOG_VERBOSE("DML::%s: Case3OK: %u\n", __func__, Case3OK); 5739 DML_LOG_VERBOSE("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ); 5740 #endif 5741 5742 if (s->prefetch_bw_equ > 0) { 5743 if (p->display_cfg->gpuvm_enable == true) { 5744 s->Tvm_equ = math_max3(*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_equ, *p->Tvm_trips, s->LineTime / 4); 5745 } else { 5746 s->Tvm_equ = s->LineTime / 4; 5747 } 5748 5749 if (p->display_cfg->gpuvm_enable == true || dcc_mrq_enable || p->setup_for_tdlut) { 5750 s->Tr0_equ = math_max3((p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_equ, // PixelPTEBytesPerRow is dpte_row_bytes 5751 *p->Tr0_trips, 5752 s->LineTime / 4); 5753 } else { 5754 s->Tr0_equ = s->LineTime / 4; 5755 } 5756 } else { 5757 s->Tvm_equ = 0; 5758 s->Tr0_equ = 0; 5759 DML_LOG_VERBOSE("DML::%s: prefetch_bw_equ equals 0!\n", __func__); 5760 } 5761 } 5762 #ifdef __DML_VBA_DEBUG__ 5763 DML_LOG_VERBOSE("DML::%s: Tvm_equ = %f\n", __func__, s->Tvm_equ); 5764 DML_LOG_VERBOSE("DML::%s: Tr0_equ = %f\n", __func__, s->Tr0_equ); 5765 #endif 5766 // Use the more stressful prefetch schedule 5767 if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) { 5768 *p->dst_y_prefetch = s->dst_y_prefetch_oto; 5769 s->TimeForFetchingVM = s->Tvm_oto; 5770 s->TimeForFetchingRowInVBlank = s->Tr0_oto; 5771 5772 *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; 5773 *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; 5774 #ifdef __DML_VBA_DEBUG__ 5775 DML_LOG_VERBOSE("DML::%s: Using oto scheduling for prefetch\n", __func__); 5776 #endif 5777 5778 } else { 5779 *p->dst_y_prefetch = s->dst_y_prefetch_equ; 5780 5781 if (s->dst_y_prefetch_equ < s->dst_y_prefetch_equ_impacted) 5782 *p->dst_y_prefetch = s->dst_y_prefetch_equ_impacted; 5783 5784 s->TimeForFetchingVM = s->Tvm_equ; 5785 s->TimeForFetchingRowInVBlank = s->Tr0_equ; 5786 5787 *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; 5788 *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; 5789 5790 /* equ bw should be propagated so a ceiling of the equ bw is accounted for prior to mode programming. 5791 * Overall bandwidth may be lower when going from mode support to mode programming but final pixel data 5792 * bandwidth may end up higher than what was calculated in mode support. 5793 */ 5794 *p->RequiredPrefetchBWMax = math_max2(s->prefetch_bw_equ, *p->RequiredPrefetchBWMax); 5795 5796 #ifdef __DML_VBA_DEBUG__ 5797 DML_LOG_VERBOSE("DML::%s: Using equ bw scheduling for prefetch\n", __func__); 5798 #endif 5799 } 5800 5801 // Lsw = dst_y_prefetch - (dst_y_per_vm_vblank + 2*dst_y_per_row_vblank) 5802 s->LinesToRequestPrefetchPixelData = *p->dst_y_prefetch - *p->dst_y_per_vm_vblank - 2 * *p->dst_y_per_row_vblank; // Lsw 5803 5804 s->cursor_prefetch_bytes = (unsigned int)math_max2(p->cursor_bytes_per_chunk, 4 * p->cursor_bytes_per_line); 5805 *p->prefetch_cursor_bw = p->num_cursors * s->cursor_prefetch_bytes / (s->LinesToRequestPrefetchPixelData * s->LineTime); 5806 *p->prefetch_swath_time_us = (s->LinesToRequestPrefetchPixelData * s->LineTime); 5807 5808 #ifdef __DML_VBA_DEBUG__ 5809 DML_LOG_VERBOSE("DML::%s: TimeForFetchingVM = %f\n", __func__, s->TimeForFetchingVM); 5810 DML_LOG_VERBOSE("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank); 5811 DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime); 5812 DML_LOG_VERBOSE("DML::%s: dst_y_prefetch = %f\n", __func__, *p->dst_y_prefetch); 5813 DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank); 5814 DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank); 5815 DML_LOG_VERBOSE("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData); 5816 DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); 5817 DML_LOG_VERBOSE("DML::%s: prefetch_swath_time_us = %f\n", __func__, *p->prefetch_swath_time_us); 5818 5819 DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, p->cursor_bytes_per_chunk); 5820 DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_line = %d\n", __func__, p->cursor_bytes_per_line); 5821 DML_LOG_VERBOSE("DML::%s: cursor_prefetch_bytes = %d\n", __func__, s->cursor_prefetch_bytes); 5822 DML_LOG_VERBOSE("DML::%s: prefetch_cursor_bw = %f\n", __func__, *p->prefetch_cursor_bw); 5823 #endif 5824 DML_ASSERT(*p->dst_y_prefetch < 64); 5825 5826 unsigned int min_lsw_required = (unsigned int)math_max2(2, p->tdlut_drain_time / s->LineTime); 5827 if (s->LinesToRequestPrefetchPixelData >= min_lsw_required && s->prefetch_bw_equ > 0) { 5828 *p->VRatioPrefetchY = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData; 5829 *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY, 1.0); 5830 #ifdef __DML_VBA_DEBUG__ 5831 DML_LOG_VERBOSE("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY); 5832 DML_LOG_VERBOSE("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY); 5833 DML_LOG_VERBOSE("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY); 5834 #endif 5835 if ((p->SwathHeightY > 4) && (p->VInitPreFillY > 3)) { 5836 if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillY - 3.0) / 2.0) { 5837 *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY, 5838 (double)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0)); 5839 } else { 5840 s->NoTimeToPrefetch = true; 5841 DML_LOG_VERBOSE("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY); 5842 *p->VRatioPrefetchY = 0; 5843 } 5844 #ifdef __DML_VBA_DEBUG__ 5845 DML_LOG_VERBOSE("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY); 5846 DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); 5847 DML_LOG_VERBOSE("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY); 5848 #endif 5849 } 5850 5851 *p->VRatioPrefetchC = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData; 5852 *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, 1.0); 5853 5854 #ifdef __DML_VBA_DEBUG__ 5855 DML_LOG_VERBOSE("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC); 5856 DML_LOG_VERBOSE("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC); 5857 DML_LOG_VERBOSE("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC); 5858 #endif 5859 if ((p->SwathHeightC > 4) && (p->VInitPreFillC > 3)) { 5860 if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillC - 3.0) / 2.0) { 5861 *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, (double)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0)); 5862 } else { 5863 s->NoTimeToPrefetch = true; 5864 DML_LOG_VERBOSE("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC); 5865 *p->VRatioPrefetchC = 0; 5866 } 5867 #ifdef __DML_VBA_DEBUG__ 5868 DML_LOG_VERBOSE("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC); 5869 DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC); 5870 DML_LOG_VERBOSE("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC); 5871 #endif 5872 } 5873 5874 *p->RequiredPrefetchPixelDataBWLuma = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelY * p->swath_width_luma_ub / s->LineTime; 5875 *p->RequiredPrefetchPixelDataBWChroma = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelC * p->swath_width_chroma_ub / s->LineTime; 5876 5877 #ifdef __DML_VBA_DEBUG__ 5878 DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY); 5879 DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub); 5880 DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime); 5881 DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWLuma); 5882 DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWChroma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWChroma); 5883 #endif 5884 } else { 5885 s->NoTimeToPrefetch = true; 5886 DML_LOG_VERBOSE("DML::%s: No time to prefetch!, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required); 5887 DML_LOG_VERBOSE("DML::%s: No time to prefetch!, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ); 5888 *p->VRatioPrefetchY = 0; 5889 *p->VRatioPrefetchC = 0; 5890 *p->RequiredPrefetchPixelDataBWLuma = 0; 5891 *p->RequiredPrefetchPixelDataBWChroma = 0; 5892 } 5893 DML_LOG_VERBOSE("DML: Tpre: %fus - sum of time to request 2 x data pte, swaths\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingVM); 5894 DML_LOG_VERBOSE("DML: Tvm: %fus - time to fetch vm\n", s->TimeForFetchingVM); 5895 DML_LOG_VERBOSE("DML: Tr0: %fus - time to fetch first row of data pagetables\n", s->TimeForFetchingRowInVBlank); 5896 DML_LOG_VERBOSE("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime); 5897 DML_LOG_VERBOSE("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime); 5898 DML_LOG_VERBOSE("DML: Tvstartup - TSetup - Tcalc - TWait - Tpre - To > 0\n"); 5899 DML_LOG_VERBOSE("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingVM - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup); 5900 DML_LOG_VERBOSE("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow); 5901 5902 } else { 5903 DML_LOG_VERBOSE("DML::%s: No time to prefetch! dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ); 5904 DML_LOG_VERBOSE("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded (%f) should be >= Tvm_trips_rounded (%f) + 2.0*Tr0_trips_rounded (%f) + min_Tsw_equ (%f)\n", 5905 __func__, min_Lsw_equ_ok, *p->Tpre_rounded, s->Tvm_trips_rounded, 2.0*s->Tr0_trips_rounded, s->min_Lsw_equ*s->LineTime); 5906 s->NoTimeToPrefetch = true; 5907 s->TimeForFetchingVM = 0; 5908 s->TimeForFetchingRowInVBlank = 0; 5909 *p->dst_y_per_vm_vblank = 0; 5910 *p->dst_y_per_row_vblank = 0; 5911 s->LinesToRequestPrefetchPixelData = 0; 5912 *p->VRatioPrefetchY = 0; 5913 *p->VRatioPrefetchC = 0; 5914 *p->RequiredPrefetchPixelDataBWLuma = 0; 5915 *p->RequiredPrefetchPixelDataBWChroma = 0; 5916 } 5917 5918 { 5919 double prefetch_vm_bw; 5920 double prefetch_row_bw; 5921 5922 if (vm_bytes == 0) { 5923 prefetch_vm_bw = 0; 5924 } else if (*p->dst_y_per_vm_vblank > 0) { 5925 #ifdef __DML_VBA_DEBUG__ 5926 DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); 5927 DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank); 5928 DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime); 5929 #endif 5930 prefetch_vm_bw = vm_bytes * p->HostVMInefficiencyFactor / (*p->dst_y_per_vm_vblank * s->LineTime); 5931 #ifdef __DML_VBA_DEBUG__ 5932 DML_LOG_VERBOSE("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); 5933 #endif 5934 } else { 5935 prefetch_vm_bw = 0; 5936 s->NoTimeToPrefetch = true; 5937 DML_LOG_VERBOSE("DML::%s: No time to prefetch!. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank); 5938 } 5939 5940 if (p->PixelPTEBytesPerRow == 0 && tdlut_row_bytes == 0) { 5941 prefetch_row_bw = 0; 5942 } else if (*p->dst_y_per_row_vblank > 0) { 5943 prefetch_row_bw = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + tdlut_row_bytes) / (*p->dst_y_per_row_vblank * s->LineTime); 5944 5945 #ifdef __DML_VBA_DEBUG__ 5946 DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); 5947 DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank); 5948 DML_LOG_VERBOSE("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); 5949 #endif 5950 } else { 5951 prefetch_row_bw = 0; 5952 s->NoTimeToPrefetch = true; 5953 DML_LOG_VERBOSE("DML::%s: No time to prefetch!. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank); 5954 } 5955 5956 *p->prefetch_vmrow_bw = math_max2(prefetch_vm_bw, prefetch_row_bw); 5957 } 5958 5959 if (s->NoTimeToPrefetch) { 5960 s->TimeForFetchingVM = 0; 5961 s->TimeForFetchingRowInVBlank = 0; 5962 *p->dst_y_per_vm_vblank = 0; 5963 *p->dst_y_per_row_vblank = 0; 5964 *p->dst_y_prefetch = 0; 5965 s->LinesToRequestPrefetchPixelData = 0; 5966 *p->VRatioPrefetchY = 0; 5967 *p->VRatioPrefetchC = 0; 5968 *p->RequiredPrefetchPixelDataBWLuma = 0; 5969 *p->RequiredPrefetchPixelDataBWChroma = 0; 5970 *p->prefetch_vmrow_bw = 0; 5971 } 5972 5973 DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f (final)\n", __func__, *p->dst_y_per_vm_vblank); 5974 DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f (final)\n", __func__, *p->dst_y_per_row_vblank); 5975 DML_LOG_VERBOSE("DML::%s: prefetch_vmrow_bw = %f (final)\n", __func__, *p->prefetch_vmrow_bw); 5976 DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWLuma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWLuma); 5977 DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWChroma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWChroma); 5978 DML_LOG_VERBOSE("DML::%s: NoTimeToPrefetch=%d\n", __func__, s->NoTimeToPrefetch); 5979 5980 return s->NoTimeToPrefetch; 5981 } 5982 5983 static unsigned int get_num_lb_source_lines(unsigned int max_line_buffer_lines, 5984 unsigned int line_buffer_size_bits, 5985 unsigned int num_pipes, 5986 unsigned int vp_width, 5987 unsigned int vp_height, 5988 double h_ratio, 5989 enum dml2_rotation_angle rotation_angle) 5990 { 5991 unsigned int num_lb_source_lines = 0; 5992 double lb_bit_per_pixel = 57.0; 5993 unsigned recin_width = vp_width/num_pipes; 5994 5995 if (dml_is_vertical_rotation(rotation_angle)) 5996 recin_width = vp_height/num_pipes; 5997 5998 num_lb_source_lines = (unsigned int) math_min2((double) max_line_buffer_lines, 5999 math_floor2(line_buffer_size_bits / lb_bit_per_pixel / (recin_width / math_max2(h_ratio, 1.0)), 1.0)); 6000 6001 return num_lb_source_lines; 6002 } 6003 6004 static unsigned int find_max_impact_plane(unsigned int this_plane_idx, unsigned int num_planes, unsigned int Trpd_dcfclk_cycles[]) 6005 { 6006 int max_value = -1; 6007 int max_idx = -1; 6008 for (unsigned int i = 0; i < num_planes; i++) { 6009 if (i != this_plane_idx && (int) Trpd_dcfclk_cycles[i] > max_value) { 6010 max_value = Trpd_dcfclk_cycles[i]; 6011 max_idx = i; 6012 } 6013 } 6014 if (max_idx <= 0) { 6015 DML_ASSERT(max_idx >= 0); 6016 max_idx = this_plane_idx; 6017 } 6018 6019 return max_idx; 6020 } 6021 6022 static double calculate_impacted_Tsw(unsigned int exclude_plane_idx, unsigned int num_planes, double *prefetch_swath_bytes, double bw_mbps) 6023 { 6024 double sum = 0.; 6025 for (unsigned int i = 0; i < num_planes; i++) { 6026 if (i != exclude_plane_idx) { 6027 sum += prefetch_swath_bytes[i]; 6028 } 6029 } 6030 return sum / bw_mbps; 6031 } 6032 6033 // a global check against the aggregate effect of the per plane prefetch schedule 6034 static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core_internal_scratch *scratch, 6035 struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *p) 6036 { 6037 struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals *s = &scratch->CheckGlobalPrefetchAdmissibility_locals; 6038 unsigned int i, k; 6039 6040 memset(s, 0, sizeof(struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals)); 6041 6042 *p->recalc_prefetch_schedule = 0; 6043 s->prefetch_global_check_passed = 1; 6044 // worst case if the rob and cdb is fully hogged 6045 s->max_Trpd_dcfclk_cycles = (unsigned int) math_ceil2((p->rob_buffer_size_kbytes*1024 + p->compressed_buffer_size_kbytes*DML_MAX_COMPRESSION_RATIO*1024)/64.0, 1.0); 6046 #ifdef __DML_VBA_DEBUG__ 6047 DML_LOG_VERBOSE("DML::%s: num_active_planes = %d\n", __func__, p->num_active_planes); 6048 DML_LOG_VERBOSE("DML::%s: rob_buffer_size_kbytes = %d\n", __func__, p->rob_buffer_size_kbytes); 6049 DML_LOG_VERBOSE("DML::%s: compressed_buffer_size_kbytes = %d\n", __func__, p->compressed_buffer_size_kbytes); 6050 DML_LOG_VERBOSE("DML::%s: estimated_urg_bandwidth_required_mbps = %f\n", __func__, p->estimated_urg_bandwidth_required_mbps); 6051 DML_LOG_VERBOSE("DML::%s: estimated_dcfclk_mhz = %f\n", __func__, p->estimated_dcfclk_mhz); 6052 DML_LOG_VERBOSE("DML::%s: max_Trpd_dcfclk_cycles = %u\n", __func__, s->max_Trpd_dcfclk_cycles); 6053 #endif 6054 6055 // calculate the return impact from each plane, request is 256B per dcfclk 6056 for (i = 0; i < p->num_active_planes; i++) { 6057 s->src_detile_buf_size_bytes_l[i] = p->detile_buffer_size_bytes_l[i]; 6058 s->src_detile_buf_size_bytes_c[i] = p->detile_buffer_size_bytes_c[i]; 6059 s->src_swath_bytes_l[i] = p->full_swath_bytes_l[i]; 6060 s->src_swath_bytes_c[i] = p->full_swath_bytes_c[i]; 6061 6062 if (p->pixel_format[i] == dml2_420_10) { 6063 s->src_detile_buf_size_bytes_l[i] = (unsigned int) (s->src_detile_buf_size_bytes_l[i] * 1.5); 6064 s->src_detile_buf_size_bytes_c[i] = (unsigned int) (s->src_detile_buf_size_bytes_c[i] * 1.5); 6065 s->src_swath_bytes_l[i] = (unsigned int) (s->src_swath_bytes_l[i] * 1.5); 6066 s->src_swath_bytes_c[i] = (unsigned int) (s->src_swath_bytes_c[i] * 1.5); 6067 } 6068 6069 s->burst_bytes_to_fill_det = (unsigned int) (math_floor2(s->src_detile_buf_size_bytes_l[i] / p->chunk_bytes_l, 1) * p->chunk_bytes_l); 6070 s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_l[i] / p->swath_height_l[i], 1) * s->src_swath_bytes_l[i]); 6071 6072 #ifdef __DML_VBA_DEBUG__ 6073 DML_LOG_VERBOSE("DML::%s: i=%u pixel_format = %d\n", __func__, i, p->pixel_format[i]); 6074 DML_LOG_VERBOSE("DML::%s: i=%u chunk_bytes_l = %d\n", __func__, i, p->chunk_bytes_l); 6075 DML_LOG_VERBOSE("DML::%s: i=%u lb_source_lines_l = %d\n", __func__, i, p->lb_source_lines_l[i]); 6076 DML_LOG_VERBOSE("DML::%s: i=%u src_detile_buf_size_bytes_l=%d\n", __func__, i, s->src_detile_buf_size_bytes_l[i]); 6077 DML_LOG_VERBOSE("DML::%s: i=%u src_swath_bytes_l=%d\n", __func__, i, s->src_swath_bytes_l[i]); 6078 DML_LOG_VERBOSE("DML::%s: i=%u burst_bytes_to_fill_det=%d (luma)\n", __func__, i, s->burst_bytes_to_fill_det); 6079 #endif 6080 6081 if (s->src_swath_bytes_c[i] > 0) { // dual_plane 6082 s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(s->src_detile_buf_size_bytes_c[i] / p->chunk_bytes_c, 1) * p->chunk_bytes_c); 6083 6084 if (p->pixel_format[i] == dml2_422_planar_8 || p->pixel_format[i] == dml2_422_planar_10 || p->pixel_format[i] == dml2_422_planar_12) { 6085 s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_c[i] / p->swath_height_c[i], 1) * s->src_swath_bytes_c[i]); 6086 } 6087 6088 #ifdef __DML_VBA_DEBUG__ 6089 DML_LOG_VERBOSE("DML::%s: i=%u chunk_bytes_c = %d\n", __func__, i, p->chunk_bytes_c); 6090 DML_LOG_VERBOSE("DML::%s: i=%u lb_source_lines_c = %d\n", __func__, i, p->lb_source_lines_c[i]); 6091 DML_LOG_VERBOSE("DML::%s: i=%u src_detile_buf_size_bytes_c=%d\n", __func__, i, s->src_detile_buf_size_bytes_c[i]); 6092 DML_LOG_VERBOSE("DML::%s: i=%u src_swath_bytes_c=%d\n", __func__, i, s->src_swath_bytes_c[i]); 6093 #endif 6094 } 6095 6096 s->time_to_fill_det_us = (double) s->burst_bytes_to_fill_det / (256 * p->estimated_dcfclk_mhz); // fill time assume full burst at request rate 6097 s->accumulated_return_path_dcfclk_cycles[i] = (unsigned int) math_ceil2(((DML_MAX_COMPRESSION_RATIO-1) * 64 * p->estimated_dcfclk_mhz) * s->time_to_fill_det_us / 64.0, 1.0); //for 64B per DCFClk 6098 6099 #ifdef __DML_VBA_DEBUG__ 6100 DML_LOG_VERBOSE("DML::%s: i=%u burst_bytes_to_fill_det=%d\n", __func__, i, s->burst_bytes_to_fill_det); 6101 DML_LOG_VERBOSE("DML::%s: i=%u time_to_fill_det_us=%f\n", __func__, i, s->time_to_fill_det_us); 6102 DML_LOG_VERBOSE("DML::%s: i=%u accumulated_return_path_dcfclk_cycles=%u\n", __func__, i, s->accumulated_return_path_dcfclk_cycles[i]); 6103 #endif 6104 // clamping to worst case delay which is one which occupy the full rob+cdb 6105 if (s->accumulated_return_path_dcfclk_cycles[i] > s->max_Trpd_dcfclk_cycles) 6106 s->accumulated_return_path_dcfclk_cycles[i] = s->max_Trpd_dcfclk_cycles; 6107 } 6108 6109 // Figure out the impacted prefetch time for each plane 6110 // if impacted_Tre is > equ bw Tpre, we need to fail the prefetch schedule as we need a higher state to support the bw 6111 for (i = 0; i < p->num_active_planes; i++) { 6112 k = find_max_impact_plane(i, p->num_active_planes, s->accumulated_return_path_dcfclk_cycles); // plane k causes most impact to plane i 6113 // the rest of planes (except for k) complete for bw 6114 p->impacted_dst_y_pre[i] = s->accumulated_return_path_dcfclk_cycles[k]/p->estimated_dcfclk_mhz; 6115 p->impacted_dst_y_pre[i] += calculate_impacted_Tsw(k, p->num_active_planes, p->prefetch_sw_bytes, p->estimated_urg_bandwidth_required_mbps); 6116 p->impacted_dst_y_pre[i] = math_ceil2(p->impacted_dst_y_pre[i] / p->line_time[i], 0.25); 6117 6118 #ifdef __DML_VBA_DEBUG__ 6119 DML_LOG_VERBOSE("DML::%s: i=%u impacted_Tpre=%f (k=%u)\n", __func__, i, p->impacted_dst_y_pre[i], k); 6120 #endif 6121 } 6122 6123 if (p->Tpre_rounded != NULL && p->Tpre_oto != NULL) { 6124 for (i = 0; i < p->num_active_planes; i++) { 6125 if (p->impacted_dst_y_pre[i] > p->dst_y_prefetch[i]) { 6126 s->prefetch_global_check_passed = 0; 6127 *p->recalc_prefetch_schedule = 1; 6128 } 6129 #ifdef __DML_VBA_DEBUG__ 6130 DML_LOG_VERBOSE("DML::%s: i=%u Tpre_rounded=%f\n", __func__, i, p->Tpre_rounded[i]); 6131 DML_LOG_VERBOSE("DML::%s: i=%u Tpre_oto=%f\n", __func__, i, p->Tpre_oto[i]); 6132 #endif 6133 } 6134 } else { 6135 // likely a mode programming calls, assume support, and no recalc - not used anyways 6136 s->prefetch_global_check_passed = 1; 6137 *p->recalc_prefetch_schedule = 0; 6138 } 6139 6140 #ifdef __DML_VBA_DEBUG__ 6141 DML_LOG_VERBOSE("DML::%s: prefetch_global_check_passed=%u\n", __func__, s->prefetch_global_check_passed); 6142 DML_LOG_VERBOSE("DML::%s: recalc_prefetch_schedule=%u\n", __func__, *p->recalc_prefetch_schedule); 6143 #endif 6144 6145 return s->prefetch_global_check_passed; 6146 } 6147 6148 static void calculate_peak_bandwidth_required( 6149 struct dml2_core_internal_scratch *s, 6150 struct dml2_core_calcs_calculate_peak_bandwidth_required_params *p) 6151 { 6152 unsigned int n; 6153 unsigned int m; 6154 6155 struct dml2_core_shared_calculate_peak_bandwidth_required_locals *l = &s->calculate_peak_bandwidth_required_locals; 6156 6157 memset(l, 0, sizeof(struct dml2_core_shared_calculate_peak_bandwidth_required_locals)); 6158 6159 #ifdef __DML_VBA_DEBUG__ 6160 DML_LOG_VERBOSE("DML::%s: inc_flip_bw = %d\n", __func__, p->inc_flip_bw); 6161 DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, p->num_active_planes); 6162 #endif 6163 6164 for (unsigned int k = 0; k < p->num_active_planes; ++k) { 6165 l->unity_array[k] = 1.0; 6166 l->zero_array[k] = 0.0; 6167 } 6168 6169 for (m = 0; m < dml2_core_internal_soc_state_max; m++) { 6170 for (n = 0; n < dml2_core_internal_bw_max; n++) { 6171 get_urgent_bandwidth_required( 6172 &s->get_urgent_bandwidth_required_locals, 6173 p->display_cfg, 6174 m, 6175 n, 6176 0, //inc_flip_bw, 6177 0, //use_qual_row_bw 6178 p->num_active_planes, 6179 p->num_of_dpp, 6180 p->dcc_dram_bw_nom_overhead_factor_p0, 6181 p->dcc_dram_bw_nom_overhead_factor_p1, 6182 p->dcc_dram_bw_pref_overhead_factor_p0, 6183 p->dcc_dram_bw_pref_overhead_factor_p1, 6184 p->mall_prefetch_sdp_overhead_factor, 6185 p->mall_prefetch_dram_overhead_factor, 6186 p->surface_read_bandwidth_l, 6187 p->surface_read_bandwidth_c, 6188 l->zero_array, //PrefetchBandwidthLuma, 6189 l->zero_array, //PrefetchBandwidthChroma, 6190 l->zero_array, //PrefetchBWMax 6191 l->zero_array, 6192 l->zero_array, 6193 l->zero_array, 6194 p->dpte_row_bw, 6195 p->meta_row_bw, 6196 l->zero_array, //prefetch_cursor_bw, 6197 l->zero_array, //prefetch_vmrow_bw, 6198 l->zero_array, //flip_bw, 6199 l->zero_array, 6200 l->zero_array, 6201 l->zero_array, 6202 l->zero_array, 6203 l->zero_array, 6204 l->zero_array, 6205 p->surface_avg_vactive_required_bw[m][n], 6206 p->surface_peak_required_bw[m][n]); 6207 6208 p->urg_vactive_bandwidth_required[m][n] = get_urgent_bandwidth_required( 6209 &s->get_urgent_bandwidth_required_locals, 6210 p->display_cfg, 6211 m, 6212 n, 6213 0, //inc_flip_bw, 6214 0, //use_qual_row_bw 6215 p->num_active_planes, 6216 p->num_of_dpp, 6217 p->dcc_dram_bw_nom_overhead_factor_p0, 6218 p->dcc_dram_bw_nom_overhead_factor_p1, 6219 p->dcc_dram_bw_pref_overhead_factor_p0, 6220 p->dcc_dram_bw_pref_overhead_factor_p1, 6221 p->mall_prefetch_sdp_overhead_factor, 6222 p->mall_prefetch_dram_overhead_factor, 6223 p->surface_read_bandwidth_l, 6224 p->surface_read_bandwidth_c, 6225 l->zero_array, //PrefetchBandwidthLuma, 6226 l->zero_array, //PrefetchBandwidthChroma, 6227 l->zero_array, //PrefetchBWMax 6228 p->excess_vactive_fill_bw_l, 6229 p->excess_vactive_fill_bw_c, 6230 p->cursor_bw, 6231 p->dpte_row_bw, 6232 p->meta_row_bw, 6233 l->zero_array, //prefetch_cursor_bw, 6234 l->zero_array, //prefetch_vmrow_bw, 6235 l->zero_array, //flip_bw, 6236 p->urgent_burst_factor_l, 6237 p->urgent_burst_factor_c, 6238 p->urgent_burst_factor_cursor, 6239 p->urgent_burst_factor_prefetch_l, 6240 p->urgent_burst_factor_prefetch_c, 6241 p->urgent_burst_factor_prefetch_cursor, 6242 l->surface_dummy_bw, 6243 p->surface_peak_required_bw[m][n]); 6244 6245 p->urg_bandwidth_required[m][n] = get_urgent_bandwidth_required( 6246 &s->get_urgent_bandwidth_required_locals, 6247 p->display_cfg, 6248 m, 6249 n, 6250 p->inc_flip_bw, 6251 0, //use_qual_row_bw 6252 p->num_active_planes, 6253 p->num_of_dpp, 6254 p->dcc_dram_bw_nom_overhead_factor_p0, 6255 p->dcc_dram_bw_nom_overhead_factor_p1, 6256 p->dcc_dram_bw_pref_overhead_factor_p0, 6257 p->dcc_dram_bw_pref_overhead_factor_p1, 6258 p->mall_prefetch_sdp_overhead_factor, 6259 p->mall_prefetch_dram_overhead_factor, 6260 p->surface_read_bandwidth_l, 6261 p->surface_read_bandwidth_c, 6262 p->prefetch_bandwidth_l, 6263 p->prefetch_bandwidth_c, 6264 p->prefetch_bandwidth_max, // to prevent ms/mp mismatches where mp prefetch bw > ms prefetch bw 6265 p->excess_vactive_fill_bw_l, 6266 p->excess_vactive_fill_bw_c, 6267 p->cursor_bw, 6268 p->dpte_row_bw, 6269 p->meta_row_bw, 6270 p->prefetch_cursor_bw, 6271 p->prefetch_vmrow_bw, 6272 p->flip_bw, 6273 p->urgent_burst_factor_l, 6274 p->urgent_burst_factor_c, 6275 p->urgent_burst_factor_cursor, 6276 p->urgent_burst_factor_prefetch_l, 6277 p->urgent_burst_factor_prefetch_c, 6278 p->urgent_burst_factor_prefetch_cursor, 6279 l->surface_dummy_bw, 6280 p->surface_peak_required_bw[m][n]); 6281 6282 p->urg_bandwidth_required_qual[m][n] = get_urgent_bandwidth_required( 6283 &s->get_urgent_bandwidth_required_locals, 6284 p->display_cfg, 6285 m, 6286 n, 6287 0, //inc_flip_bw 6288 1, //use_qual_row_bw 6289 p->num_active_planes, 6290 p->num_of_dpp, 6291 p->dcc_dram_bw_nom_overhead_factor_p0, 6292 p->dcc_dram_bw_nom_overhead_factor_p1, 6293 p->dcc_dram_bw_pref_overhead_factor_p0, 6294 p->dcc_dram_bw_pref_overhead_factor_p1, 6295 p->mall_prefetch_sdp_overhead_factor, 6296 p->mall_prefetch_dram_overhead_factor, 6297 p->surface_read_bandwidth_l, 6298 p->surface_read_bandwidth_c, 6299 p->prefetch_bandwidth_l, 6300 p->prefetch_bandwidth_c, 6301 p->prefetch_bandwidth_max, // to prevent ms/mp mismatch where mp prefetch bw > ms prefetch bw 6302 p->excess_vactive_fill_bw_l, 6303 p->excess_vactive_fill_bw_c, 6304 p->cursor_bw, 6305 p->dpte_row_bw, 6306 p->meta_row_bw, 6307 p->prefetch_cursor_bw, 6308 p->prefetch_vmrow_bw, 6309 p->flip_bw, 6310 p->urgent_burst_factor_l, 6311 p->urgent_burst_factor_c, 6312 p->urgent_burst_factor_cursor, 6313 p->urgent_burst_factor_prefetch_l, 6314 p->urgent_burst_factor_prefetch_c, 6315 p->urgent_burst_factor_prefetch_cursor, 6316 l->surface_dummy_bw, 6317 p->surface_peak_required_bw[m][n]); 6318 6319 p->non_urg_bandwidth_required[m][n] = get_urgent_bandwidth_required( 6320 &s->get_urgent_bandwidth_required_locals, 6321 p->display_cfg, 6322 m, 6323 n, 6324 p->inc_flip_bw, 6325 0, //use_qual_row_bw 6326 p->num_active_planes, 6327 p->num_of_dpp, 6328 p->dcc_dram_bw_nom_overhead_factor_p0, 6329 p->dcc_dram_bw_nom_overhead_factor_p1, 6330 p->dcc_dram_bw_pref_overhead_factor_p0, 6331 p->dcc_dram_bw_pref_overhead_factor_p1, 6332 p->mall_prefetch_sdp_overhead_factor, 6333 p->mall_prefetch_dram_overhead_factor, 6334 p->surface_read_bandwidth_l, 6335 p->surface_read_bandwidth_c, 6336 p->prefetch_bandwidth_l, 6337 p->prefetch_bandwidth_c, 6338 p->prefetch_bandwidth_max, // to prevent ms/mp mismatches where mp prefetch bw > ms prefetch bw 6339 p->excess_vactive_fill_bw_l, 6340 p->excess_vactive_fill_bw_c, 6341 p->cursor_bw, 6342 p->dpte_row_bw, 6343 p->meta_row_bw, 6344 p->prefetch_cursor_bw, 6345 p->prefetch_vmrow_bw, 6346 p->flip_bw, 6347 l->unity_array, 6348 l->unity_array, 6349 l->unity_array, 6350 l->unity_array, 6351 l->unity_array, 6352 l->unity_array, 6353 l->surface_dummy_bw, 6354 p->surface_peak_required_bw[m][n]); 6355 6356 #ifdef __DML_VBA_DEBUG__ 6357 DML_LOG_VERBOSE("DML::%s: urg_vactive_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_vactive_bandwidth_required[m][n]); 6358 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]); 6359 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_qual[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]); 6360 DML_LOG_VERBOSE("DML::%s: non_urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->non_urg_bandwidth_required[m][n]); 6361 #endif 6362 DML_ASSERT(p->urg_bandwidth_required[m][n] >= p->non_urg_bandwidth_required[m][n]); 6363 } 6364 } 6365 } 6366 6367 static void check_urgent_bandwidth_support( 6368 double *frac_urg_bandwidth_nom, 6369 double *frac_urg_bandwidth_mall, 6370 bool *vactive_bandwidth_support_ok, // vactive ok 6371 bool *bandwidth_support_ok,// max of vm, prefetch, vactive all ok 6372 6373 unsigned int mall_allocated_for_dcn_mbytes, 6374 double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], 6375 double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], 6376 double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], 6377 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]) 6378 { 6379 double frac_urg_bandwidth_nom_sdp = non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] / urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; 6380 double frac_urg_bandwidth_nom_dram = non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] / urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]; 6381 double frac_urg_bandwidth_mall_sdp; 6382 double frac_urg_bandwidth_mall_dram; 6383 if (urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] > 0) 6384 frac_urg_bandwidth_mall_sdp = non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] / urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]; 6385 else 6386 frac_urg_bandwidth_mall_sdp = 0.0; 6387 if (urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] > 0) 6388 frac_urg_bandwidth_mall_dram = non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] / urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]; 6389 else 6390 frac_urg_bandwidth_mall_dram = 0.0; 6391 6392 *bandwidth_support_ok = 1; 6393 *vactive_bandwidth_support_ok = 1; 6394 6395 // Check urgent bandwidth required at sdp vs urgent bandwidth avail at sdp -> FractionOfUrgentBandwidth 6396 // Check urgent bandwidth required at dram vs urgent bandwidth avail at dram 6397 // Check urgent bandwidth required at sdp vs urgent bandwidth avail at sdp, svp_prefetch -> FractionOfUrgentBandwidthMALL 6398 // Check urgent bandwidth required at dram vs urgent bandwidth avail at dram, svp_prefetch 6399 6400 *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; 6401 *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]; 6402 6403 if (mall_allocated_for_dcn_mbytes > 0) { 6404 *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]; 6405 *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]; 6406 } 6407 6408 *frac_urg_bandwidth_nom = math_max2(frac_urg_bandwidth_nom_sdp, frac_urg_bandwidth_nom_dram); 6409 *frac_urg_bandwidth_mall = math_max2(frac_urg_bandwidth_mall_sdp, frac_urg_bandwidth_mall_dram); 6410 6411 *bandwidth_support_ok &= (*frac_urg_bandwidth_nom <= 1.0); 6412 6413 if (mall_allocated_for_dcn_mbytes > 0) 6414 *bandwidth_support_ok &= (*frac_urg_bandwidth_mall <= 1.0); 6415 6416 *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; 6417 *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]; 6418 if (mall_allocated_for_dcn_mbytes > 0) { 6419 *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]; 6420 *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]; 6421 } 6422 6423 #ifdef __DML_VBA_DEBUG__ 6424 DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom_sdp = %f\n", __func__, frac_urg_bandwidth_nom_sdp); 6425 DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom_dram = %f\n", __func__, frac_urg_bandwidth_nom_dram); 6426 DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom = %f\n", __func__, *frac_urg_bandwidth_nom); 6427 6428 DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall_sdp = %f\n", __func__, frac_urg_bandwidth_mall_sdp); 6429 DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall_dram = %f\n", __func__, frac_urg_bandwidth_mall_dram); 6430 DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall = %f\n", __func__, *frac_urg_bandwidth_mall); 6431 DML_LOG_VERBOSE("DML::%s: bandwidth_support_ok = %d\n", __func__, *bandwidth_support_ok); 6432 6433 for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) { 6434 for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { 6435 DML_LOG_VERBOSE("DML::%s: state:%s bw_type:%s urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n", 6436 __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), 6437 urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required[m][n]) ? "<" : ">=", urg_bandwidth_required[m][n]); 6438 } 6439 } 6440 #endif 6441 } 6442 6443 static double get_bandwidth_available_for_immediate_flip(enum dml2_core_internal_soc_state_type eval_state, 6444 double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], // no flip 6445 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]) 6446 { 6447 double flip_bw_available_mbps; 6448 double flip_bw_available_sdp_mbps; 6449 double flip_bw_available_dram_mbps; 6450 6451 flip_bw_available_sdp_mbps = urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp] - urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp]; 6452 flip_bw_available_dram_mbps = urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram] - urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram]; 6453 flip_bw_available_mbps = flip_bw_available_sdp_mbps < flip_bw_available_dram_mbps ? flip_bw_available_sdp_mbps : flip_bw_available_dram_mbps; 6454 6455 #ifdef __DML_VBA_DEBUG__ 6456 DML_LOG_VERBOSE("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state)); 6457 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_sdp_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp]); 6458 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_dram_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram]); 6459 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_sdp_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp]); 6460 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_dram_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram]); 6461 DML_LOG_VERBOSE("DML::%s: flip_bw_available_sdp_mbps = %f\n", __func__, flip_bw_available_sdp_mbps); 6462 DML_LOG_VERBOSE("DML::%s: flip_bw_available_dram_mbps = %f\n", __func__, flip_bw_available_dram_mbps); 6463 DML_LOG_VERBOSE("DML::%s: flip_bw_available_mbps = %f\n", __func__, flip_bw_available_mbps); 6464 #endif 6465 6466 return flip_bw_available_mbps; 6467 } 6468 6469 static void calculate_immediate_flip_bandwidth_support( 6470 // Output 6471 double *frac_urg_bandwidth_flip, 6472 bool *flip_bandwidth_support_ok, 6473 6474 // Input 6475 enum dml2_core_internal_soc_state_type eval_state, 6476 double urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], 6477 double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], 6478 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]) 6479 { 6480 double frac_urg_bw_flip_sdp = non_urg_bandwidth_required_flip[eval_state][dml2_core_internal_bw_sdp] / urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp]; 6481 double frac_urg_bw_flip_dram = non_urg_bandwidth_required_flip[eval_state][dml2_core_internal_bw_dram] / urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram]; 6482 6483 *flip_bandwidth_support_ok = true; 6484 for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { // check sdp and dram 6485 *flip_bandwidth_support_ok &= urg_bandwidth_available[eval_state][n] >= urg_bandwidth_required_flip[eval_state][n]; 6486 6487 #ifdef __DML_VBA_DEBUG__ 6488 DML_LOG_VERBOSE("DML::%s: n = %s\n", __func__, dml2_core_internal_bw_type_str(n)); 6489 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available = %f\n", __func__, urg_bandwidth_available[eval_state][n]); 6490 DML_LOG_VERBOSE("DML::%s: non_urg_bandwidth_required_flip = %f\n", __func__, non_urg_bandwidth_required_flip[eval_state][n]); 6491 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_flip = %f\n", __func__, urg_bandwidth_required_flip[eval_state][n]); 6492 DML_LOG_VERBOSE("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok); 6493 #endif 6494 DML_ASSERT(urg_bandwidth_required_flip[eval_state][n] >= non_urg_bandwidth_required_flip[eval_state][n]); 6495 } 6496 6497 *frac_urg_bandwidth_flip = (frac_urg_bw_flip_sdp > frac_urg_bw_flip_dram) ? frac_urg_bw_flip_sdp : frac_urg_bw_flip_dram; 6498 *flip_bandwidth_support_ok &= (*frac_urg_bandwidth_flip <= 1.0); 6499 6500 #ifdef __DML_VBA_DEBUG__ 6501 DML_LOG_VERBOSE("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state)); 6502 DML_LOG_VERBOSE("DML::%s: frac_urg_bw_flip_sdp = %f\n", __func__, frac_urg_bw_flip_sdp); 6503 DML_LOG_VERBOSE("DML::%s: frac_urg_bw_flip_dram = %f\n", __func__, frac_urg_bw_flip_dram); 6504 DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_flip = %f\n", __func__, *frac_urg_bandwidth_flip); 6505 DML_LOG_VERBOSE("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok); 6506 6507 for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) { 6508 for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { 6509 DML_LOG_VERBOSE("DML::%s: state:%s bw_type:%s, urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n", 6510 __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), 6511 urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required_flip[m][n]) ? "<" : ">=", urg_bandwidth_required_flip[m][n]); 6512 } 6513 } 6514 #endif 6515 } 6516 6517 static void CalculateFlipSchedule( 6518 struct dml2_core_internal_scratch *s, 6519 bool iflip_enable, 6520 bool use_lb_flip_bw, 6521 double HostVMInefficiencyFactor, 6522 double Tvm_trips_flip, 6523 double Tr0_trips_flip, 6524 double Tvm_trips_flip_rounded, 6525 double Tr0_trips_flip_rounded, 6526 bool GPUVMEnable, 6527 double vm_bytes, // vm_bytes 6528 double DPTEBytesPerRow, // dpte_row_bytes 6529 double BandwidthAvailableForImmediateFlip, 6530 unsigned int TotImmediateFlipBytes, 6531 enum dml2_source_format_class SourcePixelFormat, 6532 double LineTime, 6533 double VRatio, 6534 double VRatioChroma, 6535 double Tno_bw_flip, 6536 unsigned int dpte_row_height, 6537 unsigned int dpte_row_height_chroma, 6538 bool use_one_row_for_frame_flip, 6539 unsigned int max_flip_time_us, 6540 unsigned int max_flip_time_lines, 6541 unsigned int per_pipe_flip_bytes, 6542 unsigned int meta_row_bytes, 6543 unsigned int meta_row_height, 6544 unsigned int meta_row_height_chroma, 6545 bool dcc_mrq_enable, 6546 6547 // Output 6548 double *dst_y_per_vm_flip, 6549 double *dst_y_per_row_flip, 6550 double *final_flip_bw, 6551 bool *ImmediateFlipSupportedForPipe) 6552 { 6553 (void)use_one_row_for_frame_flip; 6554 struct dml2_core_shared_CalculateFlipSchedule_locals *l = &s->CalculateFlipSchedule_locals; 6555 6556 l->dual_plane = dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha; 6557 l->dpte_row_bytes = DPTEBytesPerRow; 6558 6559 #ifdef __DML_VBA_DEBUG__ 6560 DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable); 6561 DML_LOG_VERBOSE("DML::%s: ip.max_flip_time_us = %d\n", __func__, max_flip_time_us); 6562 DML_LOG_VERBOSE("DML::%s: ip.max_flip_time_lines = %d\n", __func__, max_flip_time_lines); 6563 DML_LOG_VERBOSE("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); 6564 DML_LOG_VERBOSE("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes); 6565 DML_LOG_VERBOSE("DML::%s: use_lb_flip_bw = %u\n", __func__, use_lb_flip_bw); 6566 DML_LOG_VERBOSE("DML::%s: iflip_enable = %u\n", __func__, iflip_enable); 6567 DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 6568 DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, LineTime); 6569 DML_LOG_VERBOSE("DML::%s: Tno_bw_flip = %f\n", __func__, Tno_bw_flip); 6570 DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip = %f\n", __func__, Tvm_trips_flip); 6571 DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip = %f\n", __func__, Tr0_trips_flip); 6572 DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip_rounded = %f\n", __func__, Tvm_trips_flip_rounded); 6573 DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip_rounded = %f\n", __func__, Tr0_trips_flip_rounded); 6574 DML_LOG_VERBOSE("DML::%s: vm_bytes = %f\n", __func__, vm_bytes); 6575 DML_LOG_VERBOSE("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow); 6576 DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %d\n", __func__, meta_row_bytes); 6577 DML_LOG_VERBOSE("DML::%s: dpte_row_bytes = %f\n", __func__, l->dpte_row_bytes); 6578 DML_LOG_VERBOSE("DML::%s: dpte_row_height = %d\n", __func__, dpte_row_height); 6579 DML_LOG_VERBOSE("DML::%s: meta_row_height = %d\n", __func__, meta_row_height); 6580 DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio); 6581 #endif 6582 6583 if (TotImmediateFlipBytes > 0 && (GPUVMEnable || dcc_mrq_enable)) { 6584 if (l->dual_plane) { 6585 if (dcc_mrq_enable & GPUVMEnable) { 6586 l->min_row_height = math_min2(dpte_row_height, meta_row_height); 6587 l->min_row_height_chroma = math_min2(dpte_row_height_chroma, meta_row_height_chroma); 6588 } else if (GPUVMEnable) { 6589 l->min_row_height = dpte_row_height; 6590 l->min_row_height_chroma = dpte_row_height_chroma; 6591 } else { 6592 l->min_row_height = meta_row_height; 6593 l->min_row_height_chroma = meta_row_height_chroma; 6594 } 6595 l->min_row_time = math_min2(l->min_row_height * LineTime / VRatio, l->min_row_height_chroma * LineTime / VRatioChroma); 6596 } else { 6597 if (dcc_mrq_enable & GPUVMEnable) 6598 l->min_row_height = math_min2(dpte_row_height, meta_row_height); 6599 else if (GPUVMEnable) 6600 l->min_row_height = dpte_row_height; 6601 else 6602 l->min_row_height = meta_row_height; 6603 6604 l->min_row_time = l->min_row_height * LineTime / VRatio; 6605 } 6606 #ifdef __DML_VBA_DEBUG__ 6607 DML_LOG_VERBOSE("DML::%s: min_row_time = %f\n", __func__, l->min_row_time); 6608 #endif 6609 DML_ASSERT(l->min_row_time > 0); 6610 6611 if (use_lb_flip_bw) { 6612 // For mode check, calculation the flip bw requirement with worst case flip time 6613 l->max_flip_time = math_min2(math_min2(l->min_row_time, (double)max_flip_time_lines * LineTime / VRatio), 6614 math_max2(Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded, (double)max_flip_time_us)); 6615 6616 //The lower bound on flip bandwidth 6617 // Note: The get_urgent_bandwidth_required already consider dpte_row_bw and meta_row_bw in bandwidth calculation, so leave final_flip_bw = 0 if iflip not required 6618 l->lb_flip_bw = 0; 6619 6620 if (iflip_enable) { 6621 l->hvm_scaled_vm_bytes = vm_bytes * HostVMInefficiencyFactor; 6622 l->num_rows = 2; 6623 l->hvm_scaled_row_bytes = (l->num_rows * l->dpte_row_bytes * HostVMInefficiencyFactor + l->num_rows * meta_row_bytes); 6624 l->hvm_scaled_vm_row_bytes = l->hvm_scaled_vm_bytes + l->hvm_scaled_row_bytes; 6625 l->lb_flip_bw = math_max3( 6626 l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip), 6627 l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded), 6628 l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded)); 6629 #ifdef __DML_VBA_DEBUG__ 6630 DML_LOG_VERBOSE("DML::%s: max_flip_time = %f\n", __func__, l->max_flip_time); 6631 DML_LOG_VERBOSE("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_bytes); 6632 DML_LOG_VERBOSE("DML::%s: total row bytes (%f row, hvm ineff scaled) = %f\n", __func__, l->num_rows, l->hvm_scaled_row_bytes); 6633 DML_LOG_VERBOSE("DML::%s: total vm+row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_row_bytes); 6634 DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm and row = %f\n", __func__, l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip)); 6635 DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm = %f\n", __func__, l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded)); 6636 DML_LOG_VERBOSE("DML::%s: lb_flip_bw for row = %f\n", __func__, l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded)); 6637 6638 if (l->lb_flip_bw > 0) { 6639 DML_LOG_VERBOSE("DML::%s: mode_support est Tvm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw); 6640 DML_LOG_VERBOSE("DML::%s: mode_support est Tr0_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / l->num_rows); 6641 DML_LOG_VERBOSE("DML::%s: mode_support est dst_y_per_vm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw / LineTime); 6642 DML_LOG_VERBOSE("DML::%s: mode_support est dst_y_per_row_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / LineTime / l->num_rows); 6643 DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip_rounded + 2*Tr0_trips_flip_rounded = %f\n", __func__, (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded)); 6644 } 6645 #endif 6646 l->lb_flip_bw = math_max3(l->lb_flip_bw, 6647 l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip, 6648 (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime)); 6649 6650 #ifdef __DML_VBA_DEBUG__ 6651 DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm reg limit = %f\n", __func__, l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip); 6652 DML_LOG_VERBOSE("DML::%s: lb_flip_bw for row reg limit = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime)); 6653 #endif 6654 } 6655 6656 *final_flip_bw = l->lb_flip_bw; 6657 6658 *dst_y_per_vm_flip = 1; // not used 6659 *dst_y_per_row_flip = 1; // not used 6660 *ImmediateFlipSupportedForPipe = l->min_row_time >= (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded); 6661 } else { 6662 if (iflip_enable) { 6663 l->ImmediateFlipBW = (double)per_pipe_flip_bytes * BandwidthAvailableForImmediateFlip / (double)TotImmediateFlipBytes; // flip_bw(i) 6664 6665 #ifdef __DML_VBA_DEBUG__ 6666 DML_LOG_VERBOSE("DML::%s: per_pipe_flip_bytes = %d\n", __func__, per_pipe_flip_bytes); 6667 DML_LOG_VERBOSE("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); 6668 DML_LOG_VERBOSE("DML::%s: ImmediateFlipBW = %f\n", __func__, l->ImmediateFlipBW); 6669 DML_LOG_VERBOSE("DML::%s: portion of flip bw = %f\n", __func__, (double)per_pipe_flip_bytes / (double)TotImmediateFlipBytes); 6670 #endif 6671 if (l->ImmediateFlipBW == 0) { 6672 l->Tvm_flip = 0; 6673 l->Tr0_flip = 0; 6674 } else { 6675 l->Tvm_flip = math_max3(Tvm_trips_flip, 6676 Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW, 6677 LineTime / 4.0); 6678 6679 l->Tr0_flip = math_max3(Tr0_trips_flip, 6680 (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW, 6681 LineTime / 4.0); 6682 } 6683 #ifdef __DML_VBA_DEBUG__ 6684 DML_LOG_VERBOSE("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, vm_bytes * HostVMInefficiencyFactor); 6685 DML_LOG_VERBOSE("DML::%s: total row bytes (hvm ineff scaled, one row) = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes)); 6686 6687 DML_LOG_VERBOSE("DML::%s: Tvm_flip = %f (bw-based), Tvm_trips_flip = %f (latency-based)\n", __func__, Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW, Tvm_trips_flip); 6688 DML_LOG_VERBOSE("DML::%s: Tr0_flip = %f (bw-based), Tr0_trips_flip = %f (latency-based)\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW, Tr0_trips_flip); 6689 #endif 6690 *dst_y_per_vm_flip = math_ceil2(4.0 * (l->Tvm_flip / LineTime), 1.0) / 4.0; 6691 *dst_y_per_row_flip = math_ceil2(4.0 * (l->Tr0_flip / LineTime), 1.0) / 4.0; 6692 6693 *final_flip_bw = math_max2(vm_bytes * HostVMInefficiencyFactor / (*dst_y_per_vm_flip * LineTime), 6694 (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (*dst_y_per_row_flip * LineTime)); 6695 6696 if (*dst_y_per_vm_flip >= 32 || *dst_y_per_row_flip >= 16 || l->Tvm_flip + 2 * l->Tr0_flip > l->min_row_time) { 6697 *ImmediateFlipSupportedForPipe = false; 6698 } else { 6699 *ImmediateFlipSupportedForPipe = iflip_enable; 6700 } 6701 } else { 6702 l->Tvm_flip = 0; 6703 l->Tr0_flip = 0; 6704 *dst_y_per_vm_flip = 0; 6705 *dst_y_per_row_flip = 0; 6706 *final_flip_bw = 0; 6707 *ImmediateFlipSupportedForPipe = iflip_enable; 6708 } 6709 } 6710 } else { 6711 l->Tvm_flip = 0; 6712 l->Tr0_flip = 0; 6713 *dst_y_per_vm_flip = 0; 6714 *dst_y_per_row_flip = 0; 6715 *final_flip_bw = 0; 6716 *ImmediateFlipSupportedForPipe = iflip_enable; 6717 } 6718 6719 #ifdef __DML_VBA_DEBUG__ 6720 if (!use_lb_flip_bw) { 6721 DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_flip = %f (should be < 32)\n", __func__, *dst_y_per_vm_flip); 6722 DML_LOG_VERBOSE("DML::%s: dst_y_per_row_flip = %f (should be < 16)\n", __func__, *dst_y_per_row_flip); 6723 DML_LOG_VERBOSE("DML::%s: Tvm_flip = %f (final)\n", __func__, l->Tvm_flip); 6724 DML_LOG_VERBOSE("DML::%s: Tr0_flip = %f (final)\n", __func__, l->Tr0_flip); 6725 DML_LOG_VERBOSE("DML::%s: Tvm_flip + 2*Tr0_flip = %f (should be <= min_row_time=%f)\n", __func__, l->Tvm_flip + 2 * l->Tr0_flip, l->min_row_time); 6726 } 6727 DML_LOG_VERBOSE("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw); 6728 DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe); 6729 #endif 6730 } 6731 6732 static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( 6733 struct dml2_core_internal_scratch *scratch, 6734 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *p) 6735 { 6736 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals *s = &scratch->CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals; 6737 6738 enum dml2_uclk_pstate_change_strategy uclk_pstate_change_strategy; 6739 double reserved_vblank_time_us; 6740 bool FoundCriticalSurface = false; 6741 6742 s->TotalActiveWriteback = 0; 6743 p->Watermark->UrgentWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency; 6744 6745 #ifdef __DML_VBA_DEBUG__ 6746 DML_LOG_VERBOSE("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark); 6747 #endif 6748 6749 p->Watermark->USRRetrainingWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency + p->mmSOCParameters.USRRetrainingLatency + p->mmSOCParameters.SMNLatency; 6750 p->Watermark->DRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->Watermark->UrgentWatermark; 6751 p->Watermark->FCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->Watermark->UrgentWatermark; 6752 p->Watermark->StutterExitWatermark = p->mmSOCParameters.SRExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; 6753 p->Watermark->StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; 6754 p->Watermark->Z8StutterExitWatermark = p->mmSOCParameters.SRExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; 6755 p->Watermark->Z8StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; 6756 if (p->mmSOCParameters.qos_type == dml2_qos_param_type_dcn4x) { 6757 p->Watermark->StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; 6758 p->Watermark->StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; 6759 p->Watermark->Z8StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; 6760 p->Watermark->Z8StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; 6761 } 6762 p->Watermark->temp_read_or_ppt_watermark_us = p->mmSOCParameters.g6_temp_read_blackout_us + p->Watermark->UrgentWatermark; 6763 6764 #ifdef __DML_VBA_DEBUG__ 6765 DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency); 6766 DML_LOG_VERBOSE("DML::%s: ExtraLatency = %f\n", __func__, p->mmSOCParameters.ExtraLatency); 6767 DML_LOG_VERBOSE("DML::%s: DRAMClockChangeLatency = %f\n", __func__, p->mmSOCParameters.DRAMClockChangeLatency); 6768 DML_LOG_VERBOSE("DML::%s: SREnterPlusExitZ8Time = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitZ8Time); 6769 DML_LOG_VERBOSE("DML::%s: SREnterPlusExitTime = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitTime); 6770 DML_LOG_VERBOSE("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark); 6771 DML_LOG_VERBOSE("DML::%s: USRRetrainingWatermark = %f\n", __func__, p->Watermark->USRRetrainingWatermark); 6772 DML_LOG_VERBOSE("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, p->Watermark->DRAMClockChangeWatermark); 6773 DML_LOG_VERBOSE("DML::%s: FCLKChangeWatermark = %f\n", __func__, p->Watermark->FCLKChangeWatermark); 6774 DML_LOG_VERBOSE("DML::%s: StutterExitWatermark = %f\n", __func__, p->Watermark->StutterExitWatermark); 6775 DML_LOG_VERBOSE("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark); 6776 DML_LOG_VERBOSE("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark); 6777 DML_LOG_VERBOSE("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark); 6778 DML_LOG_VERBOSE("DML::%s: temp_read_or_ppt_watermark_us = %f\n", __func__, p->Watermark->temp_read_or_ppt_watermark_us); 6779 #endif 6780 6781 s->TotalActiveWriteback = 0; 6782 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { 6783 if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { 6784 s->TotalActiveWriteback = s->TotalActiveWriteback + 1; 6785 } 6786 } 6787 6788 if (s->TotalActiveWriteback <= 1) { 6789 p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency; 6790 } else { 6791 p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK; 6792 } 6793 if (p->USRRetrainingRequired) 6794 p->Watermark->WritebackUrgentWatermark = p->Watermark->WritebackUrgentWatermark + p->mmSOCParameters.USRRetrainingLatency; 6795 6796 if (s->TotalActiveWriteback <= 1) { 6797 p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency; 6798 p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency; 6799 } else { 6800 p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK; 6801 p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024 / 32 / p->SOCCLK; 6802 } 6803 6804 if (p->USRRetrainingRequired) 6805 p->Watermark->WritebackDRAMClockChangeWatermark = p->Watermark->WritebackDRAMClockChangeWatermark + p->mmSOCParameters.USRRetrainingLatency; 6806 6807 if (p->USRRetrainingRequired) 6808 p->Watermark->WritebackFCLKChangeWatermark = p->Watermark->WritebackFCLKChangeWatermark + p->mmSOCParameters.USRRetrainingLatency; 6809 6810 #ifdef __DML_VBA_DEBUG__ 6811 DML_LOG_VERBOSE("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", __func__, p->Watermark->WritebackDRAMClockChangeWatermark); 6812 DML_LOG_VERBOSE("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, p->Watermark->WritebackFCLKChangeWatermark); 6813 DML_LOG_VERBOSE("DML::%s: WritebackUrgentWatermark = %f\n", __func__, p->Watermark->WritebackUrgentWatermark); 6814 DML_LOG_VERBOSE("DML::%s: USRRetrainingRequired = %u\n", __func__, p->USRRetrainingRequired); 6815 DML_LOG_VERBOSE("DML::%s: USRRetrainingLatency = %f\n", __func__, p->mmSOCParameters.USRRetrainingLatency); 6816 #endif 6817 6818 s->TotalPixelBW = 0.0; 6819 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { 6820 double h_total = (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total; 6821 double pixel_clock_mhz = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000.0; 6822 double v_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; 6823 double v_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; 6824 s->TotalPixelBW = s->TotalPixelBW + p->DPPPerSurface[k] 6825 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * v_ratio + p->SwathWidthC[k] * p->BytePerPixelDETC[k] * v_ratio_c) / (h_total / pixel_clock_mhz); 6826 } 6827 6828 *p->global_fclk_change_supported = true; 6829 *p->global_dram_clock_change_supported = true; 6830 6831 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { 6832 double h_total = (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total; 6833 double pixel_clock_mhz = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000.0; 6834 double v_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; 6835 double v_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; 6836 double v_taps = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; 6837 double v_taps_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; 6838 double h_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio; 6839 double h_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio; 6840 double LBBitPerPixel = 57; 6841 6842 s->LBLatencyHidingSourceLinesY[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthY[k] / math_max2(h_ratio, 1.0)), 1)) - (v_taps - 1)); 6843 s->LBLatencyHidingSourceLinesC[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthC[k] / math_max2(h_ratio_c, 1.0)), 1)) - (v_taps_c - 1)); 6844 6845 #ifdef __DML_VBA_DEBUG__ 6846 DML_LOG_VERBOSE("DML::%s: k=%u, MaxLineBufferLines = %u\n", __func__, k, p->MaxLineBufferLines); 6847 DML_LOG_VERBOSE("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize); 6848 DML_LOG_VERBOSE("DML::%s: k=%u, LBBitPerPixel = %f\n", __func__, k, LBBitPerPixel); 6849 DML_LOG_VERBOSE("DML::%s: k=%u, HRatio = %f\n", __func__, k, h_ratio); 6850 DML_LOG_VERBOSE("DML::%s: k=%u, VTaps = %f\n", __func__, k, v_taps); 6851 #endif 6852 6853 s->EffectiveLBLatencyHidingY = s->LBLatencyHidingSourceLinesY[k] / v_ratio * (h_total / pixel_clock_mhz); 6854 s->EffectiveLBLatencyHidingC = s->LBLatencyHidingSourceLinesC[k] / v_ratio_c * (h_total / pixel_clock_mhz); 6855 6856 s->EffectiveDETBufferSizeY = p->DETBufferSizeY[k]; 6857 if (p->UnboundedRequestEnabled) { 6858 s->EffectiveDETBufferSizeY = s->EffectiveDETBufferSizeY + p->CompressedBufferSizeInkByte * 1024 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * v_ratio) / (h_total / pixel_clock_mhz) / s->TotalPixelBW; 6859 } 6860 6861 s->LinesInDETY[k] = (double)s->EffectiveDETBufferSizeY / p->BytePerPixelDETY[k] / p->SwathWidthY[k]; 6862 s->LinesInDETYRoundedDownToSwath[k] = (unsigned int)(math_floor2(s->LinesInDETY[k], p->SwathHeightY[k])); 6863 s->FullDETBufferingTimeY = s->LinesInDETYRoundedDownToSwath[k] * (h_total / pixel_clock_mhz) / v_ratio; 6864 6865 s->ActiveClockChangeLatencyHidingY = s->EffectiveLBLatencyHidingY + s->FullDETBufferingTimeY - ((double)p->DSTXAfterScaler[k] / h_total + (double)p->DSTYAfterScaler[k]) * h_total / pixel_clock_mhz; 6866 6867 if (p->NumberOfActiveSurfaces > 1) { 6868 s->ActiveClockChangeLatencyHidingY = s->ActiveClockChangeLatencyHidingY - (1.0 - 1.0 / (double)p->NumberOfActiveSurfaces) * (double)p->SwathHeightY[k] * (double)h_total / pixel_clock_mhz / v_ratio; 6869 } 6870 6871 if (p->BytePerPixelDETC[k] > 0) { 6872 s->LinesInDETC[k] = p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k]; 6873 s->LinesInDETCRoundedDownToSwath[k] = (unsigned int)(math_floor2(s->LinesInDETC[k], p->SwathHeightC[k])); 6874 s->FullDETBufferingTimeC = s->LinesInDETCRoundedDownToSwath[k] * (h_total / pixel_clock_mhz) / v_ratio_c; 6875 s->ActiveClockChangeLatencyHidingC = s->EffectiveLBLatencyHidingC + s->FullDETBufferingTimeC - ((double)p->DSTXAfterScaler[k] / (double)h_total + (double)p->DSTYAfterScaler[k]) * (double)h_total / pixel_clock_mhz; 6876 if (p->NumberOfActiveSurfaces > 1) { 6877 s->ActiveClockChangeLatencyHidingC = s->ActiveClockChangeLatencyHidingC - (1.0 - 1.0 / (double)p->NumberOfActiveSurfaces) * (double)p->SwathHeightC[k] * (double)h_total / pixel_clock_mhz / v_ratio_c; 6878 } 6879 s->ActiveClockChangeLatencyHiding = math_min2(s->ActiveClockChangeLatencyHidingY, s->ActiveClockChangeLatencyHidingC); 6880 } else { 6881 s->ActiveClockChangeLatencyHiding = s->ActiveClockChangeLatencyHidingY; 6882 } 6883 6884 s->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->DRAMClockChangeWatermark; 6885 s->ActiveFCLKChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->FCLKChangeWatermark; 6886 s->USRRetrainingLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->USRRetrainingWatermark; 6887 s->g6_temp_read_latency_margin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->temp_read_or_ppt_watermark_us; 6888 6889 if (p->VActiveLatencyHidingMargin) 6890 p->VActiveLatencyHidingMargin[k] = s->ActiveDRAMClockChangeLatencyMargin[k]; 6891 6892 if (p->VActiveLatencyHidingUs) 6893 p->VActiveLatencyHidingUs[k] = s->ActiveClockChangeLatencyHiding; 6894 6895 if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { 6896 s->WritebackLatencyHiding = (double)p->WritebackInterfaceBufferSize * 1024.0 6897 / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height 6898 * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width 6899 / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height * (double)h_total / pixel_clock_mhz) * 4.0); 6900 if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format == dml2_444_64) { 6901 s->WritebackLatencyHiding = s->WritebackLatencyHiding / 2; 6902 } 6903 s->WritebackDRAMClockChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackDRAMClockChangeWatermark; 6904 6905 s->WritebackFCLKChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackFCLKChangeWatermark; 6906 6907 s->ActiveDRAMClockChangeLatencyMargin[k] = math_min2(s->ActiveDRAMClockChangeLatencyMargin[k], s->WritebackDRAMClockChangeLatencyMargin); 6908 s->ActiveFCLKChangeLatencyMargin[k] = math_min2(s->ActiveFCLKChangeLatencyMargin[k], s->WritebackFCLKChangeLatencyMargin); 6909 } 6910 p->MaxActiveDRAMClockChangeLatencySupported[k] = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 0 : (s->ActiveDRAMClockChangeLatencyMargin[k] + p->mmSOCParameters.DRAMClockChangeLatency); 6911 6912 uclk_pstate_change_strategy = p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy; 6913 reserved_vblank_time_us = (double)p->display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns / 1000; 6914 6915 p->FCLKChangeSupport[k] = dml2_pstate_change_unsupported; 6916 if (s->ActiveFCLKChangeLatencyMargin[k] > 0) 6917 p->FCLKChangeSupport[k] = dml2_pstate_change_vactive; 6918 else if (reserved_vblank_time_us >= p->mmSOCParameters.FCLKChangeLatency) 6919 p->FCLKChangeSupport[k] = dml2_pstate_change_vblank; 6920 6921 if (p->FCLKChangeSupport[k] == dml2_pstate_change_unsupported) 6922 *p->global_fclk_change_supported = false; 6923 6924 p->DRAMClockChangeSupport[k] = dml2_pstate_change_unsupported; 6925 if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_auto) { 6926 if (p->display_cfg->overrides.all_streams_blanked || 6927 (s->ActiveDRAMClockChangeLatencyMargin[k] > 0 && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)) 6928 p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank_and_vactive; 6929 else if (s->ActiveDRAMClockChangeLatencyMargin[k] > 0) 6930 p->DRAMClockChangeSupport[k] = dml2_pstate_change_vactive; 6931 else if (reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency) 6932 p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank; 6933 } else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vactive && s->ActiveDRAMClockChangeLatencyMargin[k] > 0) 6934 p->DRAMClockChangeSupport[k] = dml2_pstate_change_vactive; 6935 else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vblank && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency) 6936 p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank; 6937 else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_drr) 6938 p->DRAMClockChangeSupport[k] = dml2_pstate_change_drr; 6939 else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_svp) 6940 p->DRAMClockChangeSupport[k] = dml2_pstate_change_mall_svp; 6941 else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame) 6942 p->DRAMClockChangeSupport[k] = dml2_pstate_change_mall_full_frame; 6943 6944 if (p->DRAMClockChangeSupport[k] == dml2_pstate_change_unsupported) 6945 *p->global_dram_clock_change_supported = false; 6946 6947 s->dst_y_pstate = (unsigned int)(math_ceil2((p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.UrgentLatency) / (h_total / pixel_clock_mhz), 1)); 6948 s->src_y_pstate_l = (unsigned int)(math_ceil2(s->dst_y_pstate * v_ratio, p->SwathHeightY[k])); 6949 s->src_y_ahead_l = (unsigned int)(math_floor2(p->DETBufferSizeY[k] / p->BytePerPixelDETY[k] / p->SwathWidthY[k], p->SwathHeightY[k]) + s->LBLatencyHidingSourceLinesY[k]); 6950 s->sub_vp_lines_l = s->src_y_pstate_l + s->src_y_ahead_l + p->meta_row_height_l[k]; 6951 6952 #ifdef __DML_VBA_DEBUG__ 6953 DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); 6954 DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]); 6955 DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]); 6956 DML_LOG_VERBOSE("DML::%s: k=%u, SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]); 6957 DML_LOG_VERBOSE("DML::%s: k=%u, LBLatencyHidingSourceLinesY = %u\n", __func__, k, s->LBLatencyHidingSourceLinesY[k]); 6958 DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_pstate = %u\n", __func__, k, s->dst_y_pstate); 6959 DML_LOG_VERBOSE("DML::%s: k=%u, src_y_pstate_l = %u\n", __func__, k, s->src_y_pstate_l); 6960 DML_LOG_VERBOSE("DML::%s: k=%u, src_y_ahead_l = %u\n", __func__, k, s->src_y_ahead_l); 6961 DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_height_l = %u\n", __func__, k, p->meta_row_height_l[k]); 6962 DML_LOG_VERBOSE("DML::%s: k=%u, sub_vp_lines_l = %u\n", __func__, k, s->sub_vp_lines_l); 6963 #endif 6964 p->SubViewportLinesNeededInMALL[k] = s->sub_vp_lines_l; 6965 6966 if (p->BytePerPixelDETC[k] > 0) { 6967 s->src_y_pstate_c = (unsigned int)(math_ceil2(s->dst_y_pstate * v_ratio_c, p->SwathHeightC[k])); 6968 s->src_y_ahead_c = (unsigned int)(math_floor2(p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k], p->SwathHeightC[k]) + s->LBLatencyHidingSourceLinesC[k]); 6969 s->sub_vp_lines_c = s->src_y_pstate_c + s->src_y_ahead_c + p->meta_row_height_c[k]; 6970 6971 if (dml_is_420(p->display_cfg->plane_descriptors[k].pixel_format)) 6972 p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, 2 * s->sub_vp_lines_c)); 6973 else 6974 p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, s->sub_vp_lines_c)); 6975 6976 #ifdef __DML_VBA_DEBUG__ 6977 DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, k, p->meta_row_height_c[k]); 6978 DML_LOG_VERBOSE("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c); 6979 DML_LOG_VERBOSE("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c); 6980 DML_LOG_VERBOSE("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c); 6981 #endif 6982 } 6983 } 6984 6985 *p->g6_temp_read_support = true; 6986 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { 6987 if ((!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) && 6988 (s->g6_temp_read_latency_margin[k] < 0)) { 6989 *p->g6_temp_read_support = false; 6990 } 6991 } 6992 6993 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { 6994 if ((!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) && ((!FoundCriticalSurface) 6995 || ((s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency) < *p->MaxActiveFCLKChangeLatencySupported))) { 6996 FoundCriticalSurface = true; 6997 *p->MaxActiveFCLKChangeLatencySupported = s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency; 6998 } 6999 } 7000 7001 #ifdef __DML_VBA_DEBUG__ 7002 DML_LOG_VERBOSE("DML::%s: DRAMClockChangeSupport = %u\n", __func__, *p->global_dram_clock_change_supported); 7003 DML_LOG_VERBOSE("DML::%s: FCLKChangeSupport = %u\n", __func__, *p->global_fclk_change_supported); 7004 DML_LOG_VERBOSE("DML::%s: MaxActiveFCLKChangeLatencySupported = %f\n", __func__, *p->MaxActiveFCLKChangeLatencySupported); 7005 DML_LOG_VERBOSE("DML::%s: USRRetrainingSupport = %u\n", __func__, *p->USRRetrainingSupport); 7006 #endif 7007 } 7008 7009 static void calculate_bytes_to_fetch_required_to_hide_latency( 7010 struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *p) 7011 { 7012 unsigned int dst_lines_to_hide; 7013 unsigned int src_lines_to_hide_l; 7014 unsigned int src_lines_to_hide_c; 7015 unsigned int plane_index; 7016 unsigned int stream_index; 7017 7018 for (plane_index = 0; plane_index < p->num_active_planes; plane_index++) { 7019 if (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[plane_index])) 7020 continue; 7021 7022 stream_index = p->display_cfg->plane_descriptors[plane_index].stream_index; 7023 7024 dst_lines_to_hide = (unsigned int)math_ceil(p->latency_to_hide_us[0] / 7025 ((double)p->display_cfg->stream_descriptors[stream_index].timing.h_total / 7026 (double)p->display_cfg->stream_descriptors[stream_index].timing.pixel_clock_khz * 1000.0)); 7027 7028 src_lines_to_hide_l = (unsigned int)math_ceil2(p->display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane0.v_ratio * dst_lines_to_hide, 7029 p->swath_height_l[plane_index]); 7030 p->bytes_required_l[plane_index] = src_lines_to_hide_l * p->num_of_dpp[plane_index] * p->swath_width_l[plane_index] * p->byte_per_pix_l[plane_index]; 7031 7032 src_lines_to_hide_c = (unsigned int)math_ceil2(p->display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane1.v_ratio * dst_lines_to_hide, 7033 p->swath_height_c[plane_index]); 7034 p->bytes_required_c[plane_index] = src_lines_to_hide_c * p->num_of_dpp[plane_index] * p->swath_width_c[plane_index] * p->byte_per_pix_c[plane_index]; 7035 7036 if (p->display_cfg->plane_descriptors[plane_index].surface.dcc.enable && p->mrq_present) { 7037 p->bytes_required_l[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_l / p->meta_row_height_l[plane_index]) * p->meta_row_bytes_per_row_ub_l[plane_index]; 7038 if (p->meta_row_height_c[plane_index]) { 7039 p->bytes_required_c[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_c / p->meta_row_height_c[plane_index]) * p->meta_row_bytes_per_row_ub_c[plane_index]; 7040 } 7041 } 7042 7043 if (p->display_cfg->gpuvm_enable == true) { 7044 p->bytes_required_l[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_l / p->dpte_row_height_l[plane_index]) * p->dpte_bytes_per_row_l[plane_index]; 7045 if (p->dpte_row_height_c[plane_index]) { 7046 p->bytes_required_c[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_c / p->dpte_row_height_c[plane_index]) * p->dpte_bytes_per_row_c[plane_index]; 7047 } 7048 } 7049 } 7050 } 7051 7052 static noinline_for_stack void calculate_vactive_det_fill_latency( 7053 const struct dml2_display_cfg *display_cfg, 7054 unsigned int num_active_planes, 7055 unsigned int bytes_required_l[], 7056 unsigned int bytes_required_c[], 7057 double dcc_dram_bw_nom_overhead_factor_p0[], 7058 double dcc_dram_bw_nom_overhead_factor_p1[], 7059 double surface_read_bw_l[], 7060 double surface_read_bw_c[], 7061 double (*surface_avg_vactive_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES], 7062 double (*surface_peak_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES], 7063 /* output */ 7064 double vactive_det_fill_delay_us[]) 7065 { 7066 double effective_excess_bandwidth; 7067 double effective_excess_bandwidth_l; 7068 double effective_excess_bandwidth_c; 7069 double adj_factor; 7070 unsigned int plane_index; 7071 unsigned int soc_state; 7072 unsigned int bw_type; 7073 7074 for (plane_index = 0; plane_index < num_active_planes; plane_index++) { 7075 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_index])) 7076 continue; 7077 7078 vactive_det_fill_delay_us[plane_index] = 0.0; 7079 for (soc_state = 0; soc_state < dml2_core_internal_soc_state_max; soc_state++) { 7080 for (bw_type = 0; bw_type < dml2_core_internal_bw_max; bw_type++) { 7081 effective_excess_bandwidth = (surface_peak_required_bw[soc_state][bw_type][plane_index] - surface_avg_vactive_required_bw[soc_state][bw_type][plane_index]); 7082 7083 /* luma */ 7084 adj_factor = bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p0[plane_index] : 1.0; 7085 7086 effective_excess_bandwidth_l = effective_excess_bandwidth * surface_read_bw_l[plane_index] / (surface_read_bw_l[plane_index] + surface_read_bw_c[plane_index]) / adj_factor; 7087 if (effective_excess_bandwidth_l > 0.0) { 7088 vactive_det_fill_delay_us[plane_index] = math_max2(vactive_det_fill_delay_us[plane_index], bytes_required_l[plane_index] / effective_excess_bandwidth_l); 7089 } 7090 7091 /* chroma */ 7092 adj_factor = bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p1[plane_index] : 1.0; 7093 7094 effective_excess_bandwidth_c = effective_excess_bandwidth * surface_read_bw_c[plane_index] / (surface_read_bw_l[plane_index] + surface_read_bw_c[plane_index]) / adj_factor; 7095 if (effective_excess_bandwidth_c > 0.0) { 7096 vactive_det_fill_delay_us[plane_index] = math_max2(vactive_det_fill_delay_us[plane_index], bytes_required_c[plane_index] / effective_excess_bandwidth_c); 7097 } 7098 } 7099 } 7100 } 7101 } 7102 7103 static void calculate_excess_vactive_bandwidth_required( 7104 const struct dml2_display_cfg *display_cfg, 7105 unsigned int num_active_planes, 7106 unsigned int bytes_required_l[], 7107 unsigned int bytes_required_c[], 7108 /* outputs */ 7109 double excess_vactive_fill_bw_l[], 7110 double excess_vactive_fill_bw_c[]) 7111 { 7112 unsigned int plane_index; 7113 7114 for (plane_index = 0; plane_index < num_active_planes; plane_index++) { 7115 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_index])) 7116 continue; 7117 7118 excess_vactive_fill_bw_l[plane_index] = 0.0; 7119 excess_vactive_fill_bw_c[plane_index] = 0.0; 7120 7121 if (display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us[dml2_pstate_type_uclk] > 0) { 7122 excess_vactive_fill_bw_l[plane_index] = (double)bytes_required_l[plane_index] / (double)display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us[dml2_pstate_type_uclk]; 7123 excess_vactive_fill_bw_c[plane_index] = (double)bytes_required_c[plane_index] / (double)display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us[dml2_pstate_type_uclk]; 7124 } 7125 } 7126 } 7127 7128 static double uclk_khz_to_dram_bw_mbps(unsigned long uclk_khz, const struct dml2_dram_params *dram_config, const struct dml2_mcg_dram_bw_to_min_clk_table *dram_bw_table) 7129 { 7130 double bw_mbps = 0; 7131 unsigned int i; 7132 7133 if (!dram_config->alt_clock_bw_conversion) 7134 bw_mbps = ((double)uclk_khz * dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock) / 1000.0; 7135 else 7136 for (i = 0; i < dram_bw_table->num_entries; i++) 7137 if (dram_bw_table->entries[i].min_uclk_khz >= uclk_khz) { 7138 bw_mbps = (double)dram_bw_table->entries[i].pre_derate_dram_bw_kbps / 1000.0; 7139 break; 7140 } 7141 7142 DML_ASSERT(bw_mbps > 0); 7143 7144 return bw_mbps; 7145 } 7146 7147 static double dram_bw_kbps_to_uclk_mhz(unsigned long long bw_kbps, const struct dml2_dram_params *dram_config) 7148 { 7149 double uclk_mhz = 0; 7150 7151 uclk_mhz = (double)bw_kbps / (dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock) / 1000.0; 7152 7153 return uclk_mhz; 7154 } 7155 7156 static unsigned int get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params) 7157 { 7158 unsigned int i; 7159 unsigned int index = 0; 7160 7161 for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { 7162 DML_LOG_VERBOSE("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %ld\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz); 7163 7164 if (i == 0) 7165 index = 0; 7166 else 7167 index = i - 1; 7168 7169 if (uclk_freq_khz < per_uclk_dpm_params[i].minimum_uclk_khz || 7170 per_uclk_dpm_params[i].minimum_uclk_khz == 0) { 7171 break; 7172 } 7173 } 7174 DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); 7175 DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, index); 7176 return index; 7177 } 7178 7179 static unsigned int get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table) 7180 { 7181 unsigned int i; 7182 bool clk_entry_found = false; 7183 7184 for (i = 0; i < clk_table->uclk.num_clk_values; i++) { 7185 DML_LOG_VERBOSE("DML::%s: clk_table.uclk.clk_values_khz[%d] = %ld\n", __func__, i, clk_table->uclk.clk_values_khz[i]); 7186 7187 if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) { 7188 clk_entry_found = true; 7189 break; 7190 } 7191 } 7192 7193 if (!clk_entry_found) 7194 DML_ASSERT(clk_entry_found); 7195 #if defined(__DML_VBA_DEBUG__) 7196 DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); 7197 DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, i); 7198 #endif 7199 return i; 7200 } 7201 7202 static unsigned int get_pipe_flip_bytes( 7203 double hostvm_inefficiency_factor, 7204 unsigned int vm_bytes, 7205 unsigned int dpte_row_bytes, 7206 unsigned int meta_row_bytes) 7207 { 7208 unsigned int flip_bytes = 0; 7209 7210 flip_bytes += (unsigned int) ((vm_bytes * hostvm_inefficiency_factor) + 2*meta_row_bytes); 7211 flip_bytes += (unsigned int) (2*dpte_row_bytes * hostvm_inefficiency_factor); 7212 7213 return flip_bytes; 7214 } 7215 7216 static void calculate_hostvm_inefficiency_factor( 7217 double *HostVMInefficiencyFactor, 7218 double *HostVMInefficiencyFactorPrefetch, 7219 7220 bool gpuvm_enable, 7221 bool hostvm_enable, 7222 unsigned int remote_iommu_outstanding_translations, 7223 unsigned int max_outstanding_reqs, 7224 double urg_bandwidth_avail_active_pixel_and_vm, 7225 double urg_bandwidth_avail_active_vm_only) 7226 { 7227 *HostVMInefficiencyFactor = 1; 7228 *HostVMInefficiencyFactorPrefetch = 1; 7229 7230 if (gpuvm_enable && hostvm_enable) { 7231 *HostVMInefficiencyFactor = urg_bandwidth_avail_active_pixel_and_vm / urg_bandwidth_avail_active_vm_only; 7232 *HostVMInefficiencyFactorPrefetch = *HostVMInefficiencyFactor; 7233 7234 if ((*HostVMInefficiencyFactorPrefetch < 4) && (remote_iommu_outstanding_translations < max_outstanding_reqs)) 7235 *HostVMInefficiencyFactorPrefetch = 4; 7236 #ifdef __DML_VBA_DEBUG__ 7237 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_avail_active_pixel_and_vm = %f\n", __func__, urg_bandwidth_avail_active_pixel_and_vm); 7238 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_avail_active_vm_only = %f\n", __func__, urg_bandwidth_avail_active_vm_only); 7239 DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, *HostVMInefficiencyFactor); 7240 DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactorPrefetch = %f\n", __func__, *HostVMInefficiencyFactorPrefetch); 7241 #endif 7242 } 7243 } 7244 7245 struct dml2_core_internal_g6_temp_read_blackouts_table { 7246 struct { 7247 unsigned int uclk_khz; 7248 unsigned int blackout_us; 7249 } entries[DML_MAX_CLK_TABLE_SIZE]; 7250 }; 7251 7252 struct dml2_core_internal_g6_temp_read_blackouts_table core_dcn4_g6_temp_read_blackout_table = { 7253 .entries = { 7254 { 7255 .uclk_khz = 96000, 7256 .blackout_us = 23, 7257 }, 7258 { 7259 .uclk_khz = 435000, 7260 .blackout_us = 10, 7261 }, 7262 { 7263 .uclk_khz = 521000, 7264 .blackout_us = 10, 7265 }, 7266 { 7267 .uclk_khz = 731000, 7268 .blackout_us = 8, 7269 }, 7270 { 7271 .uclk_khz = 822000, 7272 .blackout_us = 8, 7273 }, 7274 { 7275 .uclk_khz = 962000, 7276 .blackout_us = 5, 7277 }, 7278 { 7279 .uclk_khz = 1069000, 7280 .blackout_us = 5, 7281 }, 7282 { 7283 .uclk_khz = 1187000, 7284 .blackout_us = 5, 7285 }, 7286 }, 7287 }; 7288 7289 static double get_g6_temp_read_blackout_us( 7290 struct dml2_soc_bb *soc, 7291 unsigned int uclk_freq_khz, 7292 unsigned int min_clk_index) 7293 { 7294 unsigned int i; 7295 unsigned int blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[0].blackout_us; 7296 7297 if (soc->power_management_parameters.g6_temp_read_blackout_us[0] > 0.0) { 7298 /* overrides are present in the SoC BB */ 7299 return soc->power_management_parameters.g6_temp_read_blackout_us[min_clk_index]; 7300 } 7301 7302 /* use internal table */ 7303 blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[0].blackout_us; 7304 7305 for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { 7306 if (uclk_freq_khz < core_dcn4_g6_temp_read_blackout_table.entries[i].uclk_khz || 7307 core_dcn4_g6_temp_read_blackout_table.entries[i].uclk_khz == 0) { 7308 break; 7309 } 7310 7311 blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[i].blackout_us; 7312 } 7313 7314 return (double)blackout_us; 7315 } 7316 7317 static double get_max_urgent_latency_us( 7318 struct dml2_dcn4x_soc_qos_params *dcn4x, 7319 double uclk_freq_mhz, 7320 double FabricClock, 7321 unsigned int min_clk_index) 7322 { 7323 double latency; 7324 latency = dcn4x->per_uclk_dpm_params[min_clk_index].maximum_latency_when_urgent_uclk_cycles / uclk_freq_mhz 7325 * (1 + dcn4x->umc_max_latency_margin / 100.0) 7326 + dcn4x->mall_overhead_fclk_cycles / FabricClock 7327 + dcn4x->max_round_trip_to_furthest_cs_fclk_cycles / FabricClock 7328 * (1 + dcn4x->fabric_max_transport_latency_margin / 100.0); 7329 return latency; 7330 } 7331 7332 static void calculate_pstate_keepout_dst_lines( 7333 const struct dml2_display_cfg *display_cfg, 7334 const struct dml2_core_internal_watermarks *watermarks, 7335 unsigned int pstate_keepout_dst_lines[]) 7336 { 7337 const struct dml2_stream_parameters *stream_descriptor; 7338 unsigned int i; 7339 7340 for (i = 0; i < display_cfg->num_planes; i++) { 7341 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[i])) { 7342 stream_descriptor = &display_cfg->stream_descriptors[display_cfg->plane_descriptors[i].stream_index]; 7343 7344 pstate_keepout_dst_lines[i] = 7345 (unsigned int)math_ceil(watermarks->DRAMClockChangeWatermark / ((double)stream_descriptor->timing.h_total * 1000.0 / (double)stream_descriptor->timing.pixel_clock_khz)); 7346 7347 if (pstate_keepout_dst_lines[i] > stream_descriptor->timing.v_total - 1) { 7348 pstate_keepout_dst_lines[i] = stream_descriptor->timing.v_total - 1; 7349 } 7350 } 7351 } 7352 } 7353 7354 static noinline_for_stack void dml_core_ms_prefetch_check(struct dml2_core_internal_display_mode_lib *mode_lib, 7355 const struct dml2_display_cfg *display_cfg) 7356 { 7357 struct dml2_core_calcs_mode_support_locals *s = &mode_lib->scratch.dml_core_mode_support_locals; 7358 struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params; 7359 struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; 7360 struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params; 7361 #ifdef DML_GLOBAL_PREFETCH_CHECK 7362 struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params; 7363 #endif 7364 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params; 7365 7366 double min_return_bw_for_latency; 7367 unsigned int k; 7368 7369 mode_lib->ms.TimeCalc = 24 / mode_lib->ms.dcfclk_deepsleep; 7370 7371 calculate_hostvm_inefficiency_factor( 7372 &s->HostVMInefficiencyFactor, 7373 &s->HostVMInefficiencyFactorPrefetch, 7374 7375 display_cfg->gpuvm_enable, 7376 display_cfg->hostvm_enable, 7377 mode_lib->ip.remote_iommu_outstanding_translations, 7378 mode_lib->soc.max_outstanding_reqs, 7379 mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active], 7380 mode_lib->ms.support.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]); 7381 7382 mode_lib->ms.Total3dlutActive = 0; 7383 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { 7384 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut) 7385 mode_lib->ms.Total3dlutActive = mode_lib->ms.Total3dlutActive + 1; 7386 7387 // Calculate tdlut schedule related terms 7388 calculate_tdlut_setting_params->dispclk_mhz = mode_lib->ms.RequiredDISPCLK; 7389 calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; 7390 calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode; 7391 calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode; 7392 calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size; 7393 calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable; 7394 calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; 7395 calculate_tdlut_setting_params->tdlut_mpc_width_flag = display_cfg->plane_descriptors[k].tdlut.tdlut_mpc_width_flag; 7396 calculate_tdlut_setting_params->is_gfx11 = dml_get_gfx_version(display_cfg->plane_descriptors[k].surface.tiling); 7397 7398 // output 7399 calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k]; 7400 calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k]; 7401 calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k]; 7402 calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k]; 7403 calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k]; 7404 calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k]; 7405 calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k]; 7406 7407 calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); 7408 } 7409 7410 min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active]; 7411 7412 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) 7413 s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes, 7414 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, 7415 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes)); 7416 7417 CalculateExtraLatency( 7418 display_cfg, 7419 mode_lib->ip.rob_buffer_size_kbytes, 7420 mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles, 7421 s->ReorderingBytes, 7422 mode_lib->ms.DCFCLK, 7423 mode_lib->ms.FabricClock, 7424 mode_lib->ip.pixel_chunk_size_kbytes, 7425 min_return_bw_for_latency, 7426 mode_lib->ms.num_active_planes, 7427 mode_lib->ms.NoOfDPP, 7428 mode_lib->ms.dpte_group_bytes, 7429 s->tdlut_bytes_per_group, 7430 s->HostVMInefficiencyFactor, 7431 s->HostVMInefficiencyFactorPrefetch, 7432 mode_lib->soc.hostvm_min_page_size_kbytes * 1024, 7433 mode_lib->soc.qos_parameters.qos_type, 7434 !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable), 7435 mode_lib->soc.max_outstanding_reqs, 7436 mode_lib->ms.support.request_size_bytes_luma, 7437 mode_lib->ms.support.request_size_bytes_chroma, 7438 mode_lib->ip.meta_chunk_size_kbytes, 7439 mode_lib->ip.dchub_arb_to_ret_delay, 7440 mode_lib->ms.TripToMemory, 7441 mode_lib->ip.hostvm_mode, 7442 7443 // output 7444 &mode_lib->ms.ExtraLatency, 7445 &mode_lib->ms.ExtraLatency_sr, 7446 &mode_lib->ms.ExtraLatencyPrefetch); 7447 7448 for (k = 0; k < mode_lib->ms.num_active_planes; k++) 7449 s->impacted_dst_y_pre[k] = 0; 7450 7451 s->recalc_prefetch_schedule = 0; 7452 s->recalc_prefetch_done = 0; 7453 do { 7454 mode_lib->ms.support.PrefetchSupported = true; 7455 7456 for (k = 0; k < mode_lib->ms.num_active_planes; k++) { 7457 s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); 7458 s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format; 7459 7460 s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, 7461 mode_lib->ms.NoOfDPP[k], 7462 display_cfg->plane_descriptors[k].composition.viewport.plane0.width, 7463 display_cfg->plane_descriptors[k].composition.viewport.plane0.height, 7464 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, 7465 display_cfg->plane_descriptors[k].composition.rotation_angle); 7466 7467 s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, 7468 mode_lib->ms.NoOfDPP[k], 7469 display_cfg->plane_descriptors[k].composition.viewport.plane1.width, 7470 display_cfg->plane_descriptors[k].composition.viewport.plane1.height, 7471 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, 7472 display_cfg->plane_descriptors[k].composition.rotation_angle); 7473 7474 struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe; 7475 7476 mode_lib->ms.TWait[k] = CalculateTWait( 7477 display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, 7478 mode_lib->ms.UrgLatency, 7479 mode_lib->ms.TripToMemory, 7480 !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ? 7481 get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), mode_lib->ms.state_idx) : 0.0); 7482 7483 myPipe->Dppclk = mode_lib->ms.RequiredDPPCLK[k]; 7484 myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK; 7485 myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); 7486 myPipe->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep; 7487 myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[k]; 7488 myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled; 7489 myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; 7490 myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; 7491 myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; 7492 myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; 7493 myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle; 7494 myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored; 7495 myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k]; 7496 myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k]; 7497 myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k]; 7498 myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k]; 7499 myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced; 7500 myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors; 7501 myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active; 7502 myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total; 7503 myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active; 7504 myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable; 7505 myPipe->ODMMode = mode_lib->ms.ODMMode[k]; 7506 myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format; 7507 myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k]; 7508 myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k]; 7509 myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; 7510 7511 #ifdef __DML_VBA_DEBUG__ 7512 DML_LOG_VERBOSE("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k); 7513 DML_LOG_VERBOSE("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[k]); 7514 #endif 7515 CalculatePrefetchSchedule_params->display_cfg = display_cfg; 7516 CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch; 7517 CalculatePrefetchSchedule_params->myPipe = myPipe; 7518 CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelay[k]; 7519 CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter; 7520 CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl; 7521 CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only; 7522 CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor; 7523 CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal; 7524 CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->ms.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); 7525 CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format; 7526 CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters; 7527 CalculatePrefetchSchedule_params->VStartup = s->MaximumVStartup[k]; 7528 CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes * 1024; 7529 CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable; 7530 CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled; 7531 CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required; 7532 CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes; 7533 CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->ms.ExtraLatencyPrefetch; 7534 CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc; 7535 CalculatePrefetchSchedule_params->vm_bytes = mode_lib->ms.vm_bytes[k]; 7536 CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[k]; 7537 CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[k]; 7538 CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k]; 7539 CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY[k]; 7540 CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[k]; 7541 CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k]; 7542 CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC[k]; 7543 CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub[k]; 7544 CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub[k]; 7545 CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightY[k]; 7546 CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightC[k]; 7547 CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait[k]; 7548 CalculatePrefetchSchedule_params->Ttrip = mode_lib->ms.TripToMemory; 7549 CalculatePrefetchSchedule_params->Turg = mode_lib->ms.UrgLatency; 7550 CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; 7551 CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k]; 7552 CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k]; 7553 CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k]; 7554 CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k]; 7555 CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0); 7556 CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k]; 7557 CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k]; 7558 CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable; 7559 CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present; 7560 CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->ms.meta_row_bytes[k]; 7561 CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor[k]; 7562 CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k]; 7563 CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->ms.vactive_sw_bw_l[k]; 7564 CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->ms.vactive_sw_bw_c[k]; 7565 7566 // output 7567 CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k]; 7568 CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k]; 7569 CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->ms.dst_y_prefetch[k]; 7570 CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->ms.LinesForVM[k]; 7571 CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->ms.LinesForDPTERow[k]; 7572 CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[k]; 7573 CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[k]; 7574 CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; // prefetch_sw_bw_l 7575 CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; // prefetch_sw_bw_c 7576 CalculatePrefetchSchedule_params->RequiredPrefetchBWMax = &mode_lib->ms.RequiredPrefetchBWMax[k]; 7577 CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.NoTimeForDynamicMetadata[k]; 7578 CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k]; 7579 CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->ms.Tno_bw_flip[k]; 7580 CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k]; 7581 CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0]; 7582 CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1]; 7583 CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2]; 7584 CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k]; 7585 CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k]; 7586 CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k]; 7587 CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k]; 7588 CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k]; 7589 CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k]; 7590 CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0]; 7591 CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1]; 7592 CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2]; 7593 CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->ms.prefetch_cursor_bw[k]; 7594 CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k]; 7595 CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k]; 7596 CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k]; 7597 CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->prefetch_swath_time_us[k]; 7598 7599 mode_lib->ms.NoTimeForPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params); 7600 7601 mode_lib->ms.support.PrefetchSupported &= !mode_lib->ms.NoTimeForPrefetch[k]; 7602 DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_per_vm_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_vm_vblank); 7603 DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_per_row_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_row_vblank); 7604 } // for k num_planes 7605 7606 CalculateDCFCLKDeepSleepTdlut( 7607 display_cfg, 7608 mode_lib->ms.num_active_planes, 7609 mode_lib->ms.BytePerPixelY, 7610 mode_lib->ms.BytePerPixelC, 7611 mode_lib->ms.SwathWidthY, 7612 mode_lib->ms.SwathWidthC, 7613 mode_lib->ms.NoOfDPP, 7614 mode_lib->ms.PSCL_FACTOR, 7615 mode_lib->ms.PSCL_FACTOR_CHROMA, 7616 mode_lib->ms.RequiredDPPCLK, 7617 mode_lib->ms.vactive_sw_bw_l, 7618 mode_lib->ms.vactive_sw_bw_c, 7619 mode_lib->soc.return_bus_width_bytes, 7620 mode_lib->ms.RequiredDISPCLK, 7621 s->tdlut_bytes_to_deliver, 7622 s->prefetch_swath_time_us, 7623 7624 /* Output */ 7625 &mode_lib->ms.dcfclk_deepsleep); 7626 7627 for (k = 0; k < mode_lib->ms.num_active_planes; k++) { 7628 if (mode_lib->ms.dst_y_prefetch[k] < 2.0 7629 || mode_lib->ms.LinesForVM[k] >= 32.0 7630 || mode_lib->ms.LinesForDPTERow[k] >= 16.0 7631 || mode_lib->ms.NoTimeForPrefetch[k] == true 7632 || s->DSTYAfterScaler[k] > 8) { 7633 mode_lib->ms.support.PrefetchSupported = false; 7634 DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_prefetch=%f (should not be < 2)\n", __func__, k, mode_lib->ms.dst_y_prefetch[k]); 7635 DML_LOG_VERBOSE("DML::%s: k=%d, LinesForVM=%f (should not be >= 32)\n", __func__, k, mode_lib->ms.LinesForVM[k]); 7636 DML_LOG_VERBOSE("DML::%s: k=%d, LinesForDPTERow=%f (should not be >= 16)\n", __func__, k, mode_lib->ms.LinesForDPTERow[k]); 7637 DML_LOG_VERBOSE("DML::%s: k=%d, DSTYAfterScaler=%d (should be <= 8)\n", __func__, k, s->DSTYAfterScaler[k]); 7638 DML_LOG_VERBOSE("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]); 7639 } 7640 } 7641 7642 mode_lib->ms.support.DynamicMetadataSupported = true; 7643 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { 7644 if (mode_lib->ms.NoTimeForDynamicMetadata[k] == true) { 7645 mode_lib->ms.support.DynamicMetadataSupported = false; 7646 } 7647 } 7648 7649 mode_lib->ms.support.VRatioInPrefetchSupported = true; 7650 for (k = 0; k < mode_lib->ms.num_active_planes; k++) { 7651 if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ || 7652 mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) { 7653 mode_lib->ms.support.VRatioInPrefetchSupported = false; 7654 DML_LOG_VERBOSE("DML::%s: k=%d VRatioPreY = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreY[k], __DML2_CALCS_MAX_VRATIO_PRE__); 7655 DML_LOG_VERBOSE("DML::%s: k=%d VRatioPreC = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreC[k], __DML2_CALCS_MAX_VRATIO_PRE__); 7656 DML_LOG_VERBOSE("DML::%s: VRatioInPrefetchSupported = %u\n", __func__, mode_lib->ms.support.VRatioInPrefetchSupported); 7657 } 7658 } 7659 7660 mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.VRatioInPrefetchSupported; 7661 7662 // By default, do not recalc prefetch schedule 7663 s->recalc_prefetch_schedule = 0; 7664 7665 // Only do urg vs prefetch bandwidth check, flip schedule check, power saving feature support check IF the Prefetch Schedule Check is ok 7666 if (mode_lib->ms.support.PrefetchSupported) { 7667 for (k = 0; k < mode_lib->ms.num_active_planes; k++) { 7668 // Calculate Urgent burst factor for prefetch 7669 #ifdef __DML_VBA_DEBUG__ 7670 DML_LOG_VERBOSE("DML::%s: k=%d, Calling CalculateUrgentBurstFactor (for prefetch)\n", __func__, k); 7671 DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPreY=%f\n", __func__, k, mode_lib->ms.VRatioPreY[k]); 7672 DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPreC=%f\n", __func__, k, mode_lib->ms.VRatioPreC[k]); 7673 #endif 7674 CalculateUrgentBurstFactor( 7675 &display_cfg->plane_descriptors[k], 7676 mode_lib->ms.swath_width_luma_ub[k], 7677 mode_lib->ms.swath_width_chroma_ub[k], 7678 mode_lib->ms.SwathHeightY[k], 7679 mode_lib->ms.SwathHeightC[k], 7680 s->line_times[k], 7681 mode_lib->ms.UrgLatency, 7682 mode_lib->ms.VRatioPreY[k], 7683 mode_lib->ms.VRatioPreC[k], 7684 mode_lib->ms.BytePerPixelInDETY[k], 7685 mode_lib->ms.BytePerPixelInDETC[k], 7686 mode_lib->ms.DETBufferSizeY[k], 7687 mode_lib->ms.DETBufferSizeC[k], 7688 /* Output */ 7689 &mode_lib->ms.UrgentBurstFactorLumaPre[k], 7690 &mode_lib->ms.UrgentBurstFactorChromaPre[k], 7691 &mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]); 7692 } 7693 7694 // Calculate urgent bandwidth required, both urg and non urg peak bandwidth 7695 // assume flip bw is 0 at this point 7696 for (k = 0; k < mode_lib->ms.num_active_planes; k++) 7697 mode_lib->ms.final_flip_bw[k] = 0; 7698 7699 calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = mode_lib->ms.support.urg_vactive_bandwidth_required; 7700 calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required; 7701 calculate_peak_bandwidth_params->urg_bandwidth_required_qual = mode_lib->ms.support.urg_bandwidth_required_qual; 7702 calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required; 7703 calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = mode_lib->ms.surface_avg_vactive_required_bw; 7704 calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw; 7705 7706 calculate_peak_bandwidth_params->display_cfg = display_cfg; 7707 calculate_peak_bandwidth_params->inc_flip_bw = 0; 7708 calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes; 7709 calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP; 7710 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0; 7711 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1; 7712 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0; 7713 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1; 7714 calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor; 7715 calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor; 7716 7717 calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l; 7718 calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c; 7719 calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; 7720 calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; 7721 calculate_peak_bandwidth_params->prefetch_bandwidth_max = mode_lib->ms.RequiredPrefetchBWMax; 7722 calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; 7723 calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c; 7724 calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw; 7725 calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw; 7726 calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw; 7727 calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw; 7728 calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw; 7729 calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw; 7730 calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma; 7731 calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma; 7732 calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor; 7733 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre; 7734 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre; 7735 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre; 7736 7737 calculate_peak_bandwidth_required( 7738 &mode_lib->scratch, 7739 calculate_peak_bandwidth_params); 7740 7741 // Check urg peak bandwidth against available urg bw 7742 // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active) 7743 check_urgent_bandwidth_support( 7744 &s->dummy_single[0], // double* frac_urg_bandwidth 7745 &s->dummy_single[1], // double* frac_urg_bandwidth_mall 7746 &mode_lib->ms.support.UrgVactiveBandwidthSupport, 7747 &mode_lib->ms.support.PrefetchBandwidthSupported, 7748 7749 mode_lib->soc.mall_allocated_for_dcn_mbytes, 7750 mode_lib->ms.support.non_urg_bandwidth_required, 7751 mode_lib->ms.support.urg_vactive_bandwidth_required, 7752 mode_lib->ms.support.urg_bandwidth_required, 7753 mode_lib->ms.support.urg_bandwidth_available); 7754 7755 mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.PrefetchBandwidthSupported; 7756 DML_LOG_VERBOSE("DML::%s: PrefetchBandwidthSupported=%0d\n", __func__, mode_lib->ms.support.PrefetchBandwidthSupported); 7757 7758 for (k = 0; k < mode_lib->ms.num_active_planes; k++) { 7759 if (mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]) { 7760 mode_lib->ms.support.PrefetchSupported = false; 7761 DML_LOG_VERBOSE("DML::%s: k=%d, NotEnoughUrgentLatencyHidingPre=%d\n", __func__, k, mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]); 7762 } 7763 } 7764 7765 #ifdef DML_GLOBAL_PREFETCH_CHECK 7766 if (mode_lib->ms.support.PrefetchSupported && mode_lib->ms.num_active_planes > 1 && s->recalc_prefetch_done == 0) { 7767 CheckGlobalPrefetchAdmissibility_params->num_active_planes = mode_lib->ms.num_active_planes; 7768 CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format; 7769 CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024; 7770 CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024; 7771 CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l; 7772 CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c; 7773 CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->ms.SwathHeightY; 7774 CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->ms.SwathHeightC; 7775 CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; 7776 CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->ms.CompressedBufferSizeInkByte; 7777 CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->ms.DETBufferSizeY; 7778 CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->ms.DETBufferSizeC; 7779 CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l; 7780 CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c; 7781 CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes; 7782 CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = s->Tpre_rounded; 7783 CheckGlobalPrefetchAdmissibility_params->Tpre_oto = s->Tpre_oto; 7784 CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->ms.support.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; 7785 CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times; 7786 CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->ms.dst_y_prefetch; 7787 if (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps < 10 * 1024) 7788 CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = 10 * 1024; 7789 7790 CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps / (double) mode_lib->soc.return_bus_width_bytes) / 7791 ((double)mode_lib->soc.qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0); 7792 7793 // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible 7794 CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->recalc_prefetch_schedule; 7795 CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre; 7796 mode_lib->ms.support.PrefetchSupported = CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params); 7797 s->recalc_prefetch_done = 1; 7798 s->recalc_prefetch_schedule = 1; 7799 } 7800 #endif 7801 } // prefetch schedule ok, do urg bw and flip schedule 7802 } while (s->recalc_prefetch_schedule); 7803 7804 // Flip Schedule 7805 // Both prefetch schedule and BW okay 7806 if (mode_lib->ms.support.PrefetchSupported == true) { 7807 mode_lib->ms.BandwidthAvailableForImmediateFlip = 7808 get_bandwidth_available_for_immediate_flip( 7809 dml2_core_internal_soc_state_sys_active, 7810 mode_lib->ms.support.urg_bandwidth_required_qual, // no flip 7811 mode_lib->ms.support.urg_bandwidth_available); 7812 7813 mode_lib->ms.TotImmediateFlipBytes = 0; 7814 for (k = 0; k < mode_lib->ms.num_active_planes; k++) { 7815 if (display_cfg->plane_descriptors[k].immediate_flip) { 7816 s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes( 7817 s->HostVMInefficiencyFactor, 7818 mode_lib->ms.vm_bytes[k], 7819 mode_lib->ms.DPTEBytesPerRow[k], 7820 mode_lib->ms.meta_row_bytes[k]); 7821 } else { 7822 s->per_pipe_flip_bytes[k] = 0; 7823 } 7824 mode_lib->ms.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->ms.NoOfDPP[k]; 7825 7826 } 7827 7828 for (k = 0; k < mode_lib->ms.num_active_planes; k++) { 7829 CalculateFlipSchedule( 7830 &mode_lib->scratch, 7831 display_cfg->plane_descriptors[k].immediate_flip, 7832 1, // use_lb_flip_bw 7833 s->HostVMInefficiencyFactor, 7834 s->Tvm_trips_flip[k], 7835 s->Tr0_trips_flip[k], 7836 s->Tvm_trips_flip_rounded[k], 7837 s->Tr0_trips_flip_rounded[k], 7838 display_cfg->gpuvm_enable, 7839 mode_lib->ms.vm_bytes[k], 7840 mode_lib->ms.DPTEBytesPerRow[k], 7841 mode_lib->ms.BandwidthAvailableForImmediateFlip, 7842 mode_lib->ms.TotImmediateFlipBytes, 7843 display_cfg->plane_descriptors[k].pixel_format, 7844 (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)), 7845 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, 7846 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, 7847 mode_lib->ms.Tno_bw_flip[k], 7848 mode_lib->ms.dpte_row_height[k], 7849 mode_lib->ms.dpte_row_height_chroma[k], 7850 mode_lib->ms.use_one_row_for_frame_flip[k], 7851 mode_lib->ip.max_flip_time_us, 7852 mode_lib->ip.max_flip_time_lines, 7853 s->per_pipe_flip_bytes[k], 7854 mode_lib->ms.meta_row_bytes[k], 7855 s->meta_row_height_luma[k], 7856 s->meta_row_height_chroma[k], 7857 mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable, 7858 7859 /* Output */ 7860 &mode_lib->ms.dst_y_per_vm_flip[k], 7861 &mode_lib->ms.dst_y_per_row_flip[k], 7862 &mode_lib->ms.final_flip_bw[k], 7863 &mode_lib->ms.ImmediateFlipSupportedForPipe[k]); 7864 } 7865 7866 calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw; 7867 calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required_flip; 7868 calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw; 7869 calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required_flip; 7870 calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw; 7871 calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw; 7872 7873 calculate_peak_bandwidth_params->display_cfg = display_cfg; 7874 calculate_peak_bandwidth_params->inc_flip_bw = 1; 7875 calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes; 7876 calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP; 7877 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0; 7878 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1; 7879 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0; 7880 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1; 7881 calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor; 7882 calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor; 7883 7884 calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l; 7885 calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c; 7886 calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; 7887 calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; 7888 calculate_peak_bandwidth_params->prefetch_bandwidth_max = mode_lib->ms.RequiredPrefetchBWMax; 7889 calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; 7890 calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c; 7891 calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw; 7892 calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw; 7893 calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw; 7894 calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw; 7895 calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw; 7896 calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw; 7897 calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma; 7898 calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma; 7899 calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor; 7900 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre; 7901 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre; 7902 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre; 7903 7904 calculate_peak_bandwidth_required( 7905 &mode_lib->scratch, 7906 calculate_peak_bandwidth_params); 7907 7908 calculate_immediate_flip_bandwidth_support( 7909 &s->dummy_single[0], // double* frac_urg_bandwidth_flip 7910 &mode_lib->ms.support.ImmediateFlipSupport, 7911 7912 dml2_core_internal_soc_state_sys_active, 7913 mode_lib->ms.support.urg_bandwidth_required_flip, 7914 mode_lib->ms.support.non_urg_bandwidth_required_flip, 7915 mode_lib->ms.support.urg_bandwidth_available); 7916 7917 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { 7918 if (display_cfg->plane_descriptors[k].immediate_flip == true && mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false) 7919 mode_lib->ms.support.ImmediateFlipSupport = false; 7920 } 7921 7922 } else { // if prefetch not support, assume iflip is not supported too 7923 mode_lib->ms.support.ImmediateFlipSupport = false; 7924 } 7925 7926 s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency; 7927 s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency; 7928 s->mSOCParameters.ExtraLatency_sr = mode_lib->ms.ExtraLatency_sr; 7929 s->mSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us; 7930 s->mSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us; 7931 s->mSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us; 7932 s->mSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us; 7933 s->mSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us; 7934 s->mSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us; 7935 s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us; 7936 s->mSOCParameters.USRRetrainingLatency = 0; 7937 s->mSOCParameters.SMNLatency = 0; 7938 s->mSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), mode_lib->ms.state_idx); 7939 s->mSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, mode_lib->ms.state_idx); 7940 s->mSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock; 7941 s->mSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type; 7942 7943 CalculateWatermarks_params->display_cfg = display_cfg; 7944 CalculateWatermarks_params->USRRetrainingRequired = false; 7945 CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; 7946 CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines; 7947 CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits; 7948 CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes; 7949 CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLK; 7950 CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings; 7951 CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change; 7952 CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes; 7953 CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters; 7954 CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes; 7955 CalculateWatermarks_params->SOCCLK = mode_lib->ms.SOCCLK; 7956 CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep; 7957 CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY; 7958 CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC; 7959 CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightY; 7960 CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightC; 7961 CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthY; 7962 CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthC; 7963 CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPP; 7964 CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY; 7965 CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC; 7966 CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler; 7967 CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler; 7968 CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabled; 7969 CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByte; 7970 CalculateWatermarks_params->meta_row_height_l = s->meta_row_height_luma; 7971 CalculateWatermarks_params->meta_row_height_c = s->meta_row_height_chroma; 7972 7973 // Output 7974 CalculateWatermarks_params->Watermark = &mode_lib->ms.support.watermarks; // Watermarks *Watermark 7975 CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->ms.support.DRAMClockChangeSupport; 7976 CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->ms.support.global_dram_clock_change_supported; 7977 CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // double *MaxActiveDRAMClockChangeLatencySupported[] 7978 CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->ms.SubViewportLinesNeededInMALL; // unsigned int SubViewportLinesNeededInMALL[] 7979 CalculateWatermarks_params->FCLKChangeSupport = mode_lib->ms.support.FCLKChangeSupport; 7980 CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->ms.support.global_fclk_change_supported; 7981 CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // double *MaxActiveFCLKChangeLatencySupported 7982 CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport; 7983 CalculateWatermarks_params->g6_temp_read_support = &mode_lib->ms.support.g6_temp_read_support; 7984 CalculateWatermarks_params->VActiveLatencyHidingMargin = mode_lib->ms.VActiveLatencyHidingMargin; 7985 CalculateWatermarks_params->VActiveLatencyHidingUs = mode_lib->ms.VActiveLatencyHidingUs; 7986 7987 CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params); 7988 7989 calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->ms.support.watermarks, s->dummy_integer_array[0]); 7990 DML_LOG_VERBOSE("DML::%s: Done prefetch calculation\n", __func__); 7991 7992 } 7993 7994 7995 static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out_params) 7996 { 7997 struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib; 7998 const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg; 7999 const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table; 8000 8001 double outstanding_latency_us = 0; 8002 8003 struct dml2_core_calcs_mode_support_locals *s = &mode_lib->scratch.dml_core_mode_support_locals; 8004 struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params; 8005 struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params; 8006 struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params; 8007 struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *calculate_bytes_to_fetch_required_to_hide_latency_params = &mode_lib->scratch.calculate_bytes_to_fetch_required_to_hide_latency_params; 8008 unsigned int k, m, n; 8009 8010 memset(&mode_lib->scratch, 0, sizeof(struct dml2_core_internal_scratch)); 8011 memset(&mode_lib->ms, 0, sizeof(struct dml2_core_internal_mode_support)); 8012 8013 mode_lib->ms.num_active_planes = display_cfg->num_planes; 8014 get_stream_output_bpp(s->OutputBpp, display_cfg); 8015 8016 mode_lib->ms.state_idx = in_out_params->min_clk_index; 8017 mode_lib->ms.SOCCLK = ((double)mode_lib->soc.clk_table.socclk.clk_values_khz[0] / 1000); 8018 mode_lib->ms.DCFCLK = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_dcfclk_khz / 1000); 8019 mode_lib->ms.FabricClock = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz / 1000); 8020 mode_lib->ms.MaxDCFCLK = (double)min_clk_table->max_clocks_khz.dcfclk / 1000; 8021 mode_lib->ms.MaxFabricClock = (double)min_clk_table->max_clocks_khz.fclk / 1000; 8022 mode_lib->ms.max_dispclk_freq_mhz = (double)min_clk_table->max_ss_clocks_khz.dispclk / 1000; 8023 mode_lib->ms.max_dscclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dscclk / 1000; 8024 mode_lib->ms.max_dppclk_freq_mhz = (double)min_clk_table->max_ss_clocks_khz.dppclk / 1000; 8025 mode_lib->ms.uclk_freq_mhz = (double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_uclk_khz / 1000.0; 8026 if (!mode_lib->ms.uclk_freq_mhz) 8027 mode_lib->ms.uclk_freq_mhz = dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config); 8028 mode_lib->ms.dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps / 1000); 8029 mode_lib->ms.max_dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[min_clk_table->dram_bw_table.num_entries - 1].pre_derate_dram_bw_kbps / 1000); 8030 mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params); 8031 mode_lib->ms.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), &mode_lib->soc.clk_table); 8032 8033 #if defined(__DML_VBA_DEBUG__) 8034 DML_LOG_VERBOSE("DML::%s: --- START --- \n", __func__); 8035 DML_LOG_VERBOSE("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes); 8036 DML_LOG_VERBOSE("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index); 8037 DML_LOG_VERBOSE("DML::%s: qos_param_index = %0d\n", __func__, mode_lib->ms.qos_param_index); 8038 DML_LOG_VERBOSE("DML::%s: SOCCLK = %f\n", __func__, mode_lib->ms.SOCCLK); 8039 DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->ms.dram_bw_mbps); 8040 DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz); 8041 DML_LOG_VERBOSE("DML::%s: DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK); 8042 DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); 8043 DML_LOG_VERBOSE("DML::%s: MaxDCFCLK = %f\n", __func__, mode_lib->ms.MaxDCFCLK); 8044 DML_LOG_VERBOSE("DML::%s: max_dispclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dispclk_freq_mhz); 8045 DML_LOG_VERBOSE("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz); 8046 DML_LOG_VERBOSE("DML::%s: max_dppclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dppclk_freq_mhz); 8047 DML_LOG_VERBOSE("DML::%s: MaxFabricClock = %f\n", __func__, mode_lib->ms.MaxFabricClock); 8048 DML_LOG_VERBOSE("DML::%s: ip.compressed_buffer_segment_size_in_kbytes = %u\n", __func__, mode_lib->ip.compressed_buffer_segment_size_in_kbytes); 8049 DML_LOG_VERBOSE("DML::%s: ip.dcn_mrq_present = %u\n", __func__, mode_lib->ip.dcn_mrq_present); 8050 8051 for (k = 0; k < mode_lib->ms.num_active_planes; k++) 8052 DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); 8053 #endif 8054 8055 CalculateMaxDETAndMinCompressedBufferSize( 8056 mode_lib->ip.config_return_buffer_size_in_kbytes, 8057 mode_lib->ip.config_return_buffer_segment_size_in_kbytes, 8058 mode_lib->ip.rob_buffer_size_kbytes, 8059 mode_lib->ip.max_num_dpp, 8060 display_cfg->overrides.hw.force_nom_det_size_kbytes.enable, 8061 display_cfg->overrides.hw.force_nom_det_size_kbytes.value, 8062 mode_lib->ip.dcn_mrq_present, 8063 8064 /* Output */ 8065 &mode_lib->ms.MaxTotalDETInKByte, 8066 &mode_lib->ms.NomDETInKByte, 8067 &mode_lib->ms.MinCompressedBufferSizeInKByte); 8068 8069 PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg, mode_lib->ip.ptoi_supported, s->PixelClockBackEnd); 8070 8071 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ 8072 8073 /*Scale Ratio, taps Support Check*/ 8074 mode_lib->ms.support.ScaleRatioAndTapsSupport = true; 8075 // Many core tests are still setting scaling parameters "incorrectly" 8076 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { 8077 if (display_cfg->plane_descriptors[k].composition.scaler_info.enabled == false 8078 && (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) 8079 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio != 1.0 8080 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps != 1.0 8081 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio != 1.0 8082 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps != 1.0)) { 8083 mode_lib->ms.support.ScaleRatioAndTapsSupport = false; 8084 } else if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps < 1.0 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps > 8.0 8085 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps < 1.0 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps > 8.0 8086 || (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps > 1.0 && (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps % 2) == 1) 8087 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio > mode_lib->ip.max_hscl_ratio 8088 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio > mode_lib->ip.max_vscl_ratio 8089 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps 8090 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps 8091 || (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) 8092 && (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps < 1 || display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps > 8 || 8093 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps < 1 || display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps > 8 || 8094 (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps > 1 && display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps % 2 == 1) || 8095 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio > mode_lib->ip.max_hscl_ratio || 8096 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio > mode_lib->ip.max_vscl_ratio || 8097 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps || 8098 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps))) { 8099 mode_lib->ms.support.ScaleRatioAndTapsSupport = false; 8100 } 8101 } 8102 8103 /*Source Format, Pixel Format and Scan Support Check*/ 8104 mode_lib->ms.support.SourceFormatPixelAndScanSupport = true; 8105 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { 8106 if (display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear && dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) { 8107 mode_lib->ms.support.SourceFormatPixelAndScanSupport = false; 8108 } 8109 } 8110 8111 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { 8112 CalculateBytePerPixelAndBlockSizes( 8113 display_cfg->plane_descriptors[k].pixel_format, 8114 display_cfg->plane_descriptors[k].surface.tiling, 8115 display_cfg->plane_descriptors[k].surface.plane0.pitch, 8116 display_cfg->plane_descriptors[k].surface.plane1.pitch, 8117 8118 /* Output */ 8119 &mode_lib->ms.BytePerPixelY[k], 8120 &mode_lib->ms.BytePerPixelC[k], 8121 &mode_lib->ms.BytePerPixelInDETY[k], 8122 &mode_lib->ms.BytePerPixelInDETC[k], 8123 &mode_lib->ms.Read256BlockHeightY[k], 8124 &mode_lib->ms.Read256BlockHeightC[k], 8125 &mode_lib->ms.Read256BlockWidthY[k], 8126 &mode_lib->ms.Read256BlockWidthC[k], 8127 &mode_lib->ms.MacroTileHeightY[k], 8128 &mode_lib->ms.MacroTileHeightC[k], 8129 &mode_lib->ms.MacroTileWidthY[k], 8130 &mode_lib->ms.MacroTileWidthC[k], 8131 &mode_lib->ms.surf_linear128_l[k], 8132 &mode_lib->ms.surf_linear128_c[k]); 8133 } 8134 8135 /*Bandwidth Support Check*/ 8136 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { 8137 if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) { 8138 mode_lib->ms.SwathWidthYSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane0.width; 8139 mode_lib->ms.SwathWidthCSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane1.width; 8140 } else { 8141 mode_lib->ms.SwathWidthYSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; 8142 mode_lib->ms.SwathWidthCSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; 8143 } 8144 } 8145 8146 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { 8147 mode_lib->ms.vactive_sw_bw_l[k] = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; 8148 mode_lib->ms.vactive_sw_bw_c[k] = mode_lib->ms.SwathWidthCSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; 8149 8150 mode_lib->ms.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width * 8151 display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)); 8152 8153 #ifdef __DML_VBA_DEBUG__ 8154 DML_LOG_VERBOSE("DML::%s: k=%u, old_ReadBandwidthLuma = %f\n", __func__, k, mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelInDETY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); 8155 DML_LOG_VERBOSE("DML::%s: k=%u, old_ReadBandwidthChroma = %f\n", __func__, k, mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * math_ceil2(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio / 2.0); 8156 DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_l[k]); 8157 DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_c[k]); 8158 #endif 8159 } 8160 8161 // Writeback bandwidth 8162 for (k = 0; k < mode_lib->ms.num_active_planes; k++) { 8163 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format == dml2_444_64) { 8164 mode_lib->ms.WriteBandwidth[k][0] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height 8165 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width 8166 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height 8167 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total 8168 / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 8.0; 8169 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { 8170 mode_lib->ms.WriteBandwidth[k][0] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height 8171 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width 8172 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height 8173 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total 8174 / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 4.0; 8175 } else { 8176 mode_lib->ms.WriteBandwidth[k][0] = 0.0; 8177 } 8178 } 8179 8180 /*Writeback Latency support check*/ 8181 mode_lib->ms.support.WritebackLatencySupport = true; 8182 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { 8183 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0 && 8184 (mode_lib->ms.WriteBandwidth[k][0] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024 / ((double)mode_lib->soc.qos_parameters.writeback.base_latency_us))) { 8185 mode_lib->ms.support.WritebackLatencySupport = false; 8186 } 8187 } 8188 8189 8190 /* Writeback Scale Ratio and Taps Support Check */ 8191 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = true; 8192 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { 8193 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { 8194 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio > mode_lib->ip.writeback_max_hscl_ratio 8195 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio > mode_lib->ip.writeback_max_vscl_ratio 8196 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio < mode_lib->ip.writeback_min_hscl_ratio 8197 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio < mode_lib->ip.writeback_min_vscl_ratio 8198 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps > (unsigned int) mode_lib->ip.writeback_max_hscl_taps 8199 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps > (unsigned int) mode_lib->ip.writeback_max_vscl_taps 8200 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps 8201 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps 8202 || (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps > 2.0 && ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps % 2) == 1))) { 8203 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false; 8204 } 8205 if (2.0 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height * (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps - 1) * 57 > mode_lib->ip.writeback_line_buffer_buffer_size) { 8206 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false; 8207 } 8208 } 8209 } 8210 8211 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { 8212 CalculateSinglePipeDPPCLKAndSCLThroughput( 8213 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, 8214 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, 8215 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, 8216 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, 8217 mode_lib->ip.max_dchub_pscl_bw_pix_per_clk, 8218 mode_lib->ip.max_pscl_lb_bw_pix_per_clk, 8219 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), 8220 display_cfg->plane_descriptors[k].pixel_format, 8221 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps, 8222 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps, 8223 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps, 8224 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps, 8225 /* Output */ 8226 &mode_lib->ms.PSCL_FACTOR[k], 8227 &mode_lib->ms.PSCL_FACTOR_CHROMA[k], 8228 &mode_lib->ms.MinDPPCLKUsingSingleDPP[k]); 8229 } 8230 8231 // Max Viewport Size support 8232 for (k = 0; k < mode_lib->ms.num_active_planes; k++) { 8233 if (display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) { 8234 s->MaximumSwathWidthSupportLuma = 15360; 8235 } else if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelC[k] > 0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { // horz video 8236 s->MaximumSwathWidthSupportLuma = 7680 + 16; 8237 } else if (dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelC[k] > 0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { // vert video 8238 s->MaximumSwathWidthSupportLuma = 4320 + 16; 8239 } else if (display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { // rgbe + alpha 8240 s->MaximumSwathWidthSupportLuma = 5120 + 16; 8241 } else if (dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelY[k] == 8 && display_cfg->plane_descriptors[k].surface.dcc.enable == true) { // vert 64bpp 8242 s->MaximumSwathWidthSupportLuma = 3072 + 16; 8243 } else { 8244 s->MaximumSwathWidthSupportLuma = 6144 + 16; 8245 } 8246 8247 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) { 8248 s->MaximumSwathWidthSupportChroma = (unsigned int)(s->MaximumSwathWidthSupportLuma / 2.0); 8249 } else { 8250 s->MaximumSwathWidthSupportChroma = s->MaximumSwathWidthSupportLuma; 8251 } 8252 8253 unsigned lb_buffer_size_bits_luma = mode_lib->ip.line_buffer_size_bits; 8254 unsigned lb_buffer_size_bits_chroma = mode_lib->ip.line_buffer_size_bits; 8255 8256 /* 8257 #if defined(DV_BUILD) 8258 // Assume a memory config setting of 3 in 420 mode or get a new ip parameter that reflects the programming. 8259 if (mode_lib->ms.BytePerPixelC[k] != 0.0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { 8260 lb_buffer_size_bits_luma = 34620 * 57; 8261 lb_buffer_size_bits_chroma = 13560 * 57; 8262 } 8263 #endif 8264 */ 8265 mode_lib->ms.MaximumSwathWidthInLineBufferLuma = lb_buffer_size_bits_luma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, 1.0) / 57 / 8266 (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, 1.0) - 2, 0.0)); 8267 if (mode_lib->ms.BytePerPixelC[k] == 0.0) { 8268 mode_lib->ms.MaximumSwathWidthInLineBufferChroma = 0; 8269 } else { 8270 mode_lib->ms.MaximumSwathWidthInLineBufferChroma = lb_buffer_size_bits_chroma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, 1.0) / 57 / 8271 (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, 1.0) - 2, 0.0)); 8272 } 8273 8274 mode_lib->ms.MaximumSwathWidthLuma[k] = math_min2(s->MaximumSwathWidthSupportLuma, mode_lib->ms.MaximumSwathWidthInLineBufferLuma); 8275 mode_lib->ms.MaximumSwathWidthChroma[k] = math_min2(s->MaximumSwathWidthSupportChroma, mode_lib->ms.MaximumSwathWidthInLineBufferChroma); 8276 8277 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthLuma[k]); 8278 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthSupportLuma=%u\n", __func__, k, s->MaximumSwathWidthSupportLuma); 8279 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthInLineBufferLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferLuma); 8280 8281 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthChroma[k]); 8282 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthSupportChroma=%u\n", __func__, k, s->MaximumSwathWidthSupportChroma); 8283 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthInLineBufferChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferChroma); 8284 } 8285 8286 /* Cursor Support Check */ 8287 mode_lib->ms.support.CursorSupport = true; 8288 for (k = 0; k < mode_lib->ms.num_active_planes; k++) { 8289 if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) { 8290 if (display_cfg->plane_descriptors[k].cursor.cursor_bpp == 64 && mode_lib->ip.cursor_64bpp_support == false) 8291 mode_lib->ms.support.CursorSupport = false; 8292 } 8293 } 8294 8295 /* Valid Pitch Check */ 8296 mode_lib->ms.support.PitchSupport = true; 8297 for (k = 0; k < mode_lib->ms.num_active_planes; k++) { 8298 8299 // data pitch 8300 unsigned int alignment_l = mode_lib->ms.MacroTileWidthY[k]; 8301 8302 if (mode_lib->ms.surf_linear128_l[k]) 8303 alignment_l = alignment_l / 2; 8304 8305 mode_lib->ms.support.AlignedYPitch[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.plane0.pitch, display_cfg->plane_descriptors[k].surface.plane0.width), alignment_l); 8306 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { 8307 unsigned int alignment_c = mode_lib->ms.MacroTileWidthC[k]; 8308 8309 if (mode_lib->ms.surf_linear128_c[k]) 8310 alignment_c = alignment_c / 2; 8311 mode_lib->ms.support.AlignedCPitch[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.plane1.pitch, display_cfg->plane_descriptors[k].surface.plane1.width), alignment_c); 8312 } else { 8313 mode_lib->ms.support.AlignedCPitch[k] = display_cfg->plane_descriptors[k].surface.plane1.pitch; 8314 } 8315 8316 if (mode_lib->ms.support.AlignedYPitch[k] > display_cfg->plane_descriptors[k].surface.plane0.pitch || 8317 mode_lib->ms.support.AlignedCPitch[k] > display_cfg->plane_descriptors[k].surface.plane1.pitch) { 8318 mode_lib->ms.support.PitchSupport = false; 8319 #if defined(__DML_VBA_DEBUG__) 8320 DML_LOG_VERBOSE("DML::%s: k=%u AlignedYPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedYPitch[k]); 8321 DML_LOG_VERBOSE("DML::%s: k=%u PitchY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.pitch); 8322 DML_LOG_VERBOSE("DML::%s: k=%u AlignedCPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedCPitch[k]); 8323 DML_LOG_VERBOSE("DML::%s: k=%u PitchC = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane1.pitch); 8324 DML_LOG_VERBOSE("DML::%s: k=%u PitchSupport = %d\n", __func__, k, mode_lib->ms.support.PitchSupport); 8325 #endif 8326 } 8327 8328 // meta pitch 8329 if (mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable) { 8330 mode_lib->ms.support.AlignedDCCMetaPitchY[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch, 8331 display_cfg->plane_descriptors[k].surface.plane0.width), 64.0 * mode_lib->ms.Read256BlockWidthY[k]); 8332 8333 if (mode_lib->ms.support.AlignedDCCMetaPitchY[k] > display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch) 8334 mode_lib->ms.support.PitchSupport = false; 8335 8336 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { 8337 mode_lib->ms.support.AlignedDCCMetaPitchC[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch, 8338 display_cfg->plane_descriptors[k].surface.plane1.width), 64.0 * mode_lib->ms.Read256BlockWidthC[k]); 8339 8340 if (mode_lib->ms.support.AlignedDCCMetaPitchC[k] > display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch) 8341 mode_lib->ms.support.PitchSupport = false; 8342 } 8343 } else { 8344 mode_lib->ms.support.AlignedDCCMetaPitchY[k] = 0; 8345 mode_lib->ms.support.AlignedDCCMetaPitchC[k] = 0; 8346 } 8347 } 8348 8349 mode_lib->ms.support.ViewportExceedsSurface = false; 8350 if (!display_cfg->overrides.hw.surface_viewport_size_check_disable) { 8351 for (k = 0; k < mode_lib->ms.num_active_planes; k++) { 8352 if (display_cfg->plane_descriptors[k].composition.viewport.plane0.width > display_cfg->plane_descriptors[k].surface.plane0.width || 8353 display_cfg->plane_descriptors[k].composition.viewport.plane0.height > display_cfg->plane_descriptors[k].surface.plane0.height) { 8354 mode_lib->ms.support.ViewportExceedsSurface = true; 8355 #if defined(__DML_VBA_DEBUG__) 8356 DML_LOG_VERBOSE("DML::%s: k=%u ViewportWidth = %ld\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width); 8357 DML_LOG_VERBOSE("DML::%s: k=%u SurfaceWidthY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.width); 8358 DML_LOG_VERBOSE("DML::%s: k=%u ViewportHeight = %ld\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height); 8359 DML_LOG_VERBOSE("DML::%s: k=%u SurfaceHeightY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.height); 8360 DML_LOG_VERBOSE("DML::%s: k=%u ViewportExceedsSurface = %d\n", __func__, k, mode_lib->ms.support.ViewportExceedsSurface); 8361 #endif 8362 } 8363 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { 8364 if (display_cfg->plane_descriptors[k].composition.viewport.plane1.width > display_cfg->plane_descriptors[k].surface.plane1.width || 8365 display_cfg->plane_descriptors[k].composition.viewport.plane1.height > display_cfg->plane_descriptors[k].surface.plane1.height) { 8366 mode_lib->ms.support.ViewportExceedsSurface = true; 8367 } 8368 } 8369 } 8370 } 8371 8372 CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg; 8373 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ip.config_return_buffer_size_in_kbytes; 8374 CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte; 8375 CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte; 8376 CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; 8377 CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes; 8378 CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; 8379 CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes; 8380 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = 1; 8381 CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; 8382 CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte; 8383 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes; 8384 CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes; 8385 CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.vactive_sw_bw_l; 8386 CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.vactive_sw_bw_c; 8387 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma; 8388 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma; 8389 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY; 8390 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC; 8391 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY; 8392 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC; 8393 CalculateSwathAndDETConfiguration_params->surf_linear128_l = mode_lib->ms.surf_linear128_l; 8394 CalculateSwathAndDETConfiguration_params->surf_linear128_c = mode_lib->ms.surf_linear128_c; 8395 CalculateSwathAndDETConfiguration_params->ODMMode = s->dummy_odm_mode; 8396 CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY; 8397 CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC; 8398 CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY; 8399 CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC; 8400 CalculateSwathAndDETConfiguration_params->DPPPerSurface = s->dummy_integer_array[2]; 8401 CalculateSwathAndDETConfiguration_params->mrq_present = mode_lib->ip.dcn_mrq_present; 8402 8403 // output 8404 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = s->dummy_integer_array[0]; 8405 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = s->dummy_integer_array[1]; 8406 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_integer_array[3]; 8407 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_integer_array[4]; 8408 CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_integer_array[5]; 8409 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_integer_array[6]; 8410 CalculateSwathAndDETConfiguration_params->SwathHeightY = s->dummy_integer_array[7]; 8411 CalculateSwathAndDETConfiguration_params->SwathHeightC = s->dummy_integer_array[8]; 8412 CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = s->dummy_integer_array[26]; 8413 CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = s->dummy_integer_array[27]; 8414 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = s->dummy_integer_array[9]; 8415 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = s->dummy_integer_array[10]; 8416 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = s->dummy_integer_array[11]; 8417 CalculateSwathAndDETConfiguration_params->full_swath_bytes_l = s->full_swath_bytes_l; 8418 CalculateSwathAndDETConfiguration_params->full_swath_bytes_c = s->full_swath_bytes_c; 8419 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &s->dummy_boolean[0]; 8420 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[1]; 8421 CalculateSwathAndDETConfiguration_params->hw_debug5 = &s->dummy_boolean[2]; 8422 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &s->dummy_integer[0]; 8423 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = mode_lib->ms.SingleDPPViewportSizeSupportPerSurface; 8424 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[1]; 8425 8426 // This calls is just to find out if there is enough DET space to support full vp in 1 pipe. 8427 CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params); 8428 8429 mode_lib->ms.TotalNumberOfActiveDPP = 0; 8430 mode_lib->ms.TotalNumberOfActiveOPP = 0; 8431 mode_lib->ms.support.TotalAvailablePipesSupport = true; 8432 8433 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { 8434 /*Number Of DSC Slices*/ 8435 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable || 8436 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) { 8437 8438 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices != 0) 8439 mode_lib->ms.support.NumberOfDSCSlices[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices; 8440 else { 8441 if (s->PixelClockBackEnd[k] > 4800) { 8442 mode_lib->ms.support.NumberOfDSCSlices[k] = (unsigned int)(math_ceil2(s->PixelClockBackEnd[k] / 600, 4)); 8443 } else if (s->PixelClockBackEnd[k] > 2400) { 8444 mode_lib->ms.support.NumberOfDSCSlices[k] = 8; 8445 } else if (s->PixelClockBackEnd[k] > 1200) { 8446 mode_lib->ms.support.NumberOfDSCSlices[k] = 4; 8447 } else if (s->PixelClockBackEnd[k] > 340) { 8448 mode_lib->ms.support.NumberOfDSCSlices[k] = 2; 8449 } else { 8450 mode_lib->ms.support.NumberOfDSCSlices[k] = 1; 8451 } 8452 } 8453 } else { 8454 mode_lib->ms.support.NumberOfDSCSlices[k] = 0; 8455 } 8456 8457 CalculateODMMode( 8458 mode_lib->ip.maximum_pixels_per_line_per_dsc_unit, 8459 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, 8460 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, 8461 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, 8462 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode, 8463 mode_lib->ms.max_dispclk_freq_mhz, 8464 false, // DSCEnable 8465 mode_lib->ms.TotalNumberOfActiveDPP, 8466 mode_lib->ms.TotalNumberOfActiveOPP, 8467 mode_lib->ip.max_num_dpp, 8468 mode_lib->ip.max_num_opp, 8469 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), 8470 mode_lib->ms.support.NumberOfDSCSlices[k], 8471 8472 /* Output */ 8473 &s->TotalAvailablePipesSupportNoDSC, 8474 &s->NumberOfDPPNoDSC, 8475 &s->ODMModeNoDSC, 8476 &s->RequiredDISPCLKPerSurfaceNoDSC); 8477 8478 CalculateODMMode( 8479 mode_lib->ip.maximum_pixels_per_line_per_dsc_unit, 8480 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, 8481 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, 8482 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, 8483 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode, 8484 mode_lib->ms.max_dispclk_freq_mhz, 8485 true, // DSCEnable 8486 mode_lib->ms.TotalNumberOfActiveDPP, 8487 mode_lib->ms.TotalNumberOfActiveOPP, 8488 mode_lib->ip.max_num_dpp, 8489 mode_lib->ip.max_num_opp, 8490 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), 8491 mode_lib->ms.support.NumberOfDSCSlices[k], 8492 8493 /* Output */ 8494 &s->TotalAvailablePipesSupportDSC, 8495 &s->NumberOfDPPDSC, 8496 &s->ODMModeDSC, 8497 &s->RequiredDISPCLKPerSurfaceDSC); 8498 8499 CalculateOutputLink( 8500 &mode_lib->scratch, 8501 ((double)mode_lib->soc.clk_table.phyclk.clk_values_khz[0] / 1000), 8502 ((double)mode_lib->soc.clk_table.phyclk_d18.clk_values_khz[0] / 1000), 8503 ((double)mode_lib->soc.clk_table.phyclk_d32.clk_values_khz[0] / 1000), 8504 mode_lib->soc.phy_downspread_percent, 8505 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, 8506 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, 8507 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, 8508 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, 8509 s->PixelClockBackEnd[k], 8510 s->OutputBpp[k], 8511 mode_lib->ip.maximum_dsc_bits_per_component, 8512 mode_lib->ms.support.NumberOfDSCSlices[k], 8513 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate, 8514 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout, 8515 s->ODMModeNoDSC, 8516 s->ODMModeDSC, 8517 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable, 8518 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count, 8519 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate, 8520 8521 /* Output */ 8522 &mode_lib->ms.RequiresDSC[k], 8523 &mode_lib->ms.RequiresFEC[k], 8524 &mode_lib->ms.OutputBpp[k], 8525 &mode_lib->ms.OutputType[k], 8526 &mode_lib->ms.OutputRate[k], 8527 &mode_lib->ms.RequiredSlots[k]); 8528 8529 if (s->OutputBpp[k] == 0.0) { 8530 s->OutputBpp[k] = mode_lib->ms.OutputBpp[k]; 8531 } 8532 8533 if (mode_lib->ms.RequiresDSC[k] == false) { 8534 mode_lib->ms.ODMMode[k] = s->ODMModeNoDSC; 8535 mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceNoDSC; 8536 if (!s->TotalAvailablePipesSupportNoDSC) 8537 mode_lib->ms.support.TotalAvailablePipesSupport = false; 8538 mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPNoDSC; 8539 } else { 8540 mode_lib->ms.ODMMode[k] = s->ODMModeDSC; 8541 mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceDSC; 8542 if (!s->TotalAvailablePipesSupportDSC) 8543 mode_lib->ms.support.TotalAvailablePipesSupport = false; 8544 mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPDSC; 8545 } 8546 #if defined(__DML_VBA_DEBUG__) 8547 DML_LOG_VERBOSE("DML::%s: k=%d RequiresDSC = %d\n", __func__, k, mode_lib->ms.RequiresDSC[k]); 8548 DML_LOG_VERBOSE("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]); 8549 #endif 8550 8551 // ensure the number dsc slices is integer multiple based on ODM mode 8552 mode_lib->ms.support.DSCSlicesODMModeSupported = true; 8553 if (mode_lib->ms.RequiresDSC[k]) { 8554 // fail a ms check if the override num_slices doesn't align with odm mode setting 8555 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices != 0) { 8556 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) 8557 mode_lib->ms.support.DSCSlicesODMModeSupported = ((mode_lib->ms.support.NumberOfDSCSlices[k] % 2) == 0); 8558 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) 8559 mode_lib->ms.support.DSCSlicesODMModeSupported = (mode_lib->ms.support.NumberOfDSCSlices[k] == 12); 8560 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) 8561 mode_lib->ms.support.DSCSlicesODMModeSupported = ((mode_lib->ms.support.NumberOfDSCSlices[k] % 4) == 0); 8562 #if defined(__DML_VBA_DEBUG__) 8563 if (!mode_lib->ms.support.DSCSlicesODMModeSupported) { 8564 DML_LOG_VERBOSE("DML::%s: k=%d Invalid dsc num_slices and ODM mode setting\n", __func__, k); 8565 DML_LOG_VERBOSE("DML::%s: k=%d num_slices = %d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices); 8566 DML_LOG_VERBOSE("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]); 8567 } 8568 #endif 8569 } else { 8570 // safe guard to ensure the dml derived dsc slices and odm setting are compatible 8571 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) 8572 mode_lib->ms.support.NumberOfDSCSlices[k] = 2 * (unsigned int)math_ceil2(mode_lib->ms.support.NumberOfDSCSlices[k] / 2.0, 1.0); 8573 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) 8574 mode_lib->ms.support.NumberOfDSCSlices[k] = 12; 8575 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) 8576 mode_lib->ms.support.NumberOfDSCSlices[k] = 4 * (unsigned int)math_ceil2(mode_lib->ms.support.NumberOfDSCSlices[k] / 4.0, 1.0); 8577 } 8578 8579 } else { 8580 mode_lib->ms.support.NumberOfDSCSlices[k] = 0; 8581 } 8582 } 8583 8584 mode_lib->ms.support.incorrect_imall_usage = 0; 8585 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { 8586 if (mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall) 8587 mode_lib->ms.support.incorrect_imall_usage = 1; 8588 } 8589 8590 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { 8591 mode_lib->ms.MPCCombine[k] = false; 8592 mode_lib->ms.NoOfDPP[k] = 1; 8593 mode_lib->ms.NoOfOPP[k] = 1; 8594 8595 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) { 8596 mode_lib->ms.MPCCombine[k] = false; 8597 mode_lib->ms.NoOfDPP[k] = 4; 8598 mode_lib->ms.NoOfOPP[k] = 4; 8599 } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) { 8600 mode_lib->ms.MPCCombine[k] = false; 8601 mode_lib->ms.NoOfDPP[k] = 3; 8602 mode_lib->ms.NoOfOPP[k] = 3; 8603 } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) { 8604 mode_lib->ms.MPCCombine[k] = false; 8605 mode_lib->ms.NoOfDPP[k] = 2; 8606 mode_lib->ms.NoOfOPP[k] = 2; 8607 } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 2) { 8608 mode_lib->ms.MPCCombine[k] = true; 8609 mode_lib->ms.NoOfDPP[k] = 2; 8610 } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 1) { 8611 mode_lib->ms.MPCCombine[k] = false; 8612 mode_lib->ms.NoOfDPP[k] = 1; 8613 if (!mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) { 8614 DML_LOG_VERBOSE("WARNING: DML::%s: MPCC is override to disable but viewport is too large to be supported with single pipe!\n", __func__); 8615 } 8616 } else { 8617 if ((mode_lib->ms.MinDPPCLKUsingSingleDPP[k] > mode_lib->ms.max_dppclk_freq_mhz) || !mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) { 8618 mode_lib->ms.MPCCombine[k] = true; 8619 mode_lib->ms.NoOfDPP[k] = 2; 8620 } 8621 } 8622 #if defined(__DML_VBA_DEBUG__) 8623 DML_LOG_VERBOSE("DML::%s: k=%d, NoOfDPP = %d\n", __func__, k, mode_lib->ms.NoOfDPP[k]); 8624 #endif 8625 } 8626 8627 mode_lib->ms.TotalNumberOfActiveDPP = 0; 8628 mode_lib->ms.TotalNumberOfActiveOPP = 0; 8629 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { 8630 mode_lib->ms.TotalNumberOfActiveDPP += mode_lib->ms.NoOfDPP[k]; 8631 mode_lib->ms.TotalNumberOfActiveOPP += mode_lib->ms.NoOfOPP[k]; 8632 } 8633 if (mode_lib->ms.TotalNumberOfActiveDPP > (unsigned int)mode_lib->ip.max_num_dpp) 8634 mode_lib->ms.support.TotalAvailablePipesSupport = false; 8635 if (mode_lib->ms.TotalNumberOfActiveOPP > (unsigned int)mode_lib->ip.max_num_opp) 8636 mode_lib->ms.support.TotalAvailablePipesSupport = false; 8637 8638 8639 mode_lib->ms.TotalNumberOfSingleDPPSurfaces = 0; 8640 for (k = 0; k < (unsigned int)mode_lib->ms.num_active_planes; ++k) { 8641 if (mode_lib->ms.NoOfDPP[k] == 1) 8642 mode_lib->ms.TotalNumberOfSingleDPPSurfaces = mode_lib->ms.TotalNumberOfSingleDPPSurfaces + 1; 8643 } 8644 8645 //DISPCLK/DPPCLK 8646 mode_lib->ms.WritebackRequiredDISPCLK = 0; 8647 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { 8648 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { 8649 mode_lib->ms.WritebackRequiredDISPCLK = math_max2(mode_lib->ms.WritebackRequiredDISPCLK, 8650 CalculateWriteBackDISPCLK(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format, 8651 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), 8652 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio, 8653 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio, 8654 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps, 8655 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps, 8656 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_width, 8657 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width, 8658 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, 8659 mode_lib->ip.writeback_line_buffer_buffer_size)); 8660 } 8661 } 8662 8663 mode_lib->ms.RequiredDISPCLK = mode_lib->ms.WritebackRequiredDISPCLK; 8664 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { 8665 mode_lib->ms.RequiredDISPCLK = math_max2(mode_lib->ms.RequiredDISPCLK, mode_lib->ms.RequiredDISPCLKPerSurface[k]); 8666 } 8667 8668 mode_lib->ms.GlobalDPPCLK = 0; 8669 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { 8670 mode_lib->ms.RequiredDPPCLK[k] = mode_lib->ms.MinDPPCLKUsingSingleDPP[k] / mode_lib->ms.NoOfDPP[k]; 8671 mode_lib->ms.GlobalDPPCLK = math_max2(mode_lib->ms.GlobalDPPCLK, mode_lib->ms.RequiredDPPCLK[k]); 8672 } 8673 8674 mode_lib->ms.support.DISPCLK_DPPCLK_Support = !((mode_lib->ms.RequiredDISPCLK > mode_lib->ms.max_dispclk_freq_mhz) || (mode_lib->ms.GlobalDPPCLK > mode_lib->ms.max_dppclk_freq_mhz)); 8675 8676 /* Total Available OTG, Writeback, HDMIFRL, DP Support Check */ 8677 s->TotalNumberOfActiveOTG = 0; 8678 s->TotalNumberOfActiveHDMIFRL = 0; 8679 s->TotalNumberOfActiveDP2p0 = 0; 8680 s->TotalNumberOfActiveDP2p0Outputs = 0; 8681 s->TotalNumberOfActiveWriteback = 0; 8682 memset(s->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool)); 8683 8684 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { 8685 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { 8686 if (!s->stream_visited[display_cfg->plane_descriptors[k].stream_index]) { 8687 s->stream_visited[display_cfg->plane_descriptors[k].stream_index] = 1; 8688 8689 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) 8690 s->TotalNumberOfActiveWriteback = s->TotalNumberOfActiveWriteback + 1; 8691 8692 s->TotalNumberOfActiveOTG = s->TotalNumberOfActiveOTG + 1; 8693 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) 8694 s->TotalNumberOfActiveHDMIFRL = s->TotalNumberOfActiveHDMIFRL + 1; 8695 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0) { 8696 s->TotalNumberOfActiveDP2p0 = s->TotalNumberOfActiveDP2p0 + 1; 8697 // FIXME_STAGE2: SW not using backend related stuff, need mapping for mst setup 8698 //if (display_cfg->output.OutputMultistreamId[k] == k || display_cfg->output.OutputMultistreamEn[k] == false) { 8699 s->TotalNumberOfActiveDP2p0Outputs = s->TotalNumberOfActiveDP2p0Outputs + 1; 8700 //} 8701 } 8702 } 8703 } 8704 } 8705 8706 /* Writeback Mode Support Check */ 8707 mode_lib->ms.support.EnoughWritebackUnits = 1; 8708 if (s->TotalNumberOfActiveWriteback > (unsigned int)mode_lib->ip.max_num_wb) { 8709 mode_lib->ms.support.EnoughWritebackUnits = false; 8710 } 8711 mode_lib->ms.support.NumberOfOTGSupport = (s->TotalNumberOfActiveOTG <= (unsigned int)mode_lib->ip.max_num_otg); 8712 mode_lib->ms.support.NumberOfHDMIFRLSupport = (s->TotalNumberOfActiveHDMIFRL <= (unsigned int)mode_lib->ip.max_num_hdmi_frl_outputs); 8713 mode_lib->ms.support.NumberOfDP2p0Support = (s->TotalNumberOfActiveDP2p0 <= (unsigned int)mode_lib->ip.max_num_dp2p0_streams && s->TotalNumberOfActiveDP2p0Outputs <= (unsigned int)mode_lib->ip.max_num_dp2p0_outputs); 8714 8715 8716 mode_lib->ms.support.ExceededMultistreamSlots = false; 8717 mode_lib->ms.support.LinkCapacitySupport = true; 8718 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { 8719 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_disabled == false && 8720 (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp || 8721 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) && mode_lib->ms.OutputBpp[k] == 0) { 8722 mode_lib->ms.support.LinkCapacitySupport = false; 8723 } 8724 } 8725 8726 mode_lib->ms.support.P2IWith420 = false; 8727 mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = false; 8728 mode_lib->ms.support.DSC422NativeNotSupported = false; 8729 mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = false; 8730 mode_lib->ms.support.LinkRateForMultistreamNotIndicated = false; 8731 mode_lib->ms.support.BPPForMultistreamNotIndicated = false; 8732 mode_lib->ms.support.MultistreamWithHDMIOreDP = false; 8733 mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = false; 8734 mode_lib->ms.support.NotEnoughLanesForMSO = false; 8735 8736 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { 8737 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp || 8738 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) { 8739 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && mode_lib->ip.ptoi_supported == true) 8740 mode_lib->ms.support.P2IWith420 = true; 8741 8742 if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 && !mode_lib->ip.dsc422_native_support) 8743 mode_lib->ms.support.DSC422NativeNotSupported = true; 8744 8745 if (((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr2 || 8746 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr3) && 8747 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_edp) || 8748 ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr10 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr13p5 || 8749 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr20) && 8750 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp2p0)) 8751 mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = true; 8752 8753 // FIXME_STAGE2 8754 //if (display_cfg->output.OutputMultistreamEn[k] == 1) { 8755 // if (display_cfg->output.OutputMultistreamId[k] == k && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_na) 8756 // mode_lib->ms.support.LinkRateForMultistreamNotIndicated = true; 8757 // if (display_cfg->output.OutputMultistreamId[k] == k && s->OutputBpp[k] == 0) 8758 // mode_lib->ms.support.BPPForMultistreamNotIndicated = true; 8759 // for (n = 0; n < mode_lib->ms.num_active_planes; ++n) { 8760 // if (display_cfg->output.OutputMultistreamId[k] == n && s->OutputBpp[k] == 0) 8761 // mode_lib->ms.support.BPPForMultistreamNotIndicated = true; 8762 // } 8763 //} 8764 8765 if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp || 8766 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || 8767 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl)) { 8768 // FIXME_STAGE2 8769 //if (display_cfg->output.OutputMultistreamEn[k] == 1 && display_cfg->output.OutputMultistreamId[k] == k) 8770 // mode_lib->ms.support.MultistreamWithHDMIOreDP = true; 8771 //for (n = 0; n < mode_lib->ms.num_active_planes; ++n) { 8772 // if (display_cfg->output.OutputMultistreamEn[k] == 1 && display_cfg->output.OutputMultistreamId[k] == n) 8773 // mode_lib->ms.support.MultistreamWithHDMIOreDP = true; 8774 //} 8775 } 8776 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp && (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_split_1to2 || 8777 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to2 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to4)) 8778 mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = true; 8779 8780 if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to2 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count < 2) || 8781 (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to4 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count < 4)) 8782 mode_lib->ms.support.NotEnoughLanesForMSO = true; 8783 } 8784 } 8785 8786 mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = false; 8787 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { 8788 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl && 8789 !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { 8790 mode_lib->ms.RequiredDTBCLK[k] = RequiredDTBCLK( 8791 mode_lib->ms.RequiresDSC[k], 8792 s->PixelClockBackEnd[k], 8793 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, 8794 mode_lib->ms.OutputBpp[k], 8795 mode_lib->ms.support.NumberOfDSCSlices[k], 8796 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, 8797 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, 8798 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate, 8799 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout); 8800 8801 if (mode_lib->ms.RequiredDTBCLK[k] > ((double)min_clk_table->max_ss_clocks_khz.dtbclk / 1000)) { 8802 mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = true; 8803 } 8804 } else { 8805 /* Phantom DTBCLK can be calculated different from main because phantom has no DSC and thus 8806 * will have a different output BPP. Ignore phantom DTBCLK requirement and only consider 8807 * non-phantom DTBCLK requirements. In map_mode_to_soc_dpm we choose the highest DTBCLK 8808 * required - by setting phantom dtbclk to 0 we ignore it. 8809 */ 8810 mode_lib->ms.RequiredDTBCLK[k] = 0; 8811 } 8812 } 8813 8814 mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = false; 8815 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { 8816 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || 8817 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || 8818 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp || 8819 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) { 8820 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420) { 8821 s->DSCFormatFactor = 2; 8822 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_444) { 8823 s->DSCFormatFactor = 1; 8824 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) { 8825 s->DSCFormatFactor = 2; 8826 } else { 8827 s->DSCFormatFactor = 1; 8828 } 8829 #ifdef __DML_VBA_DEBUG__ 8830 DML_LOG_VERBOSE("DML::%s: k=%u, RequiresDSC = %u\n", __func__, k, mode_lib->ms.RequiresDSC[k]); 8831 #endif 8832 if (mode_lib->ms.RequiresDSC[k] == true) { 8833 s->PixelClockBackEndFactor = 3.0; 8834 8835 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) 8836 s->PixelClockBackEndFactor = 12.0; 8837 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) 8838 s->PixelClockBackEndFactor = 9.0; 8839 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) 8840 s->PixelClockBackEndFactor = 6.0; 8841 8842 mode_lib->ms.required_dscclk_freq_mhz[k] = s->PixelClockBackEnd[k] / s->PixelClockBackEndFactor / (double)s->DSCFormatFactor; 8843 if (mode_lib->ms.required_dscclk_freq_mhz[k] > mode_lib->ms.max_dscclk_freq_mhz) { 8844 mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true; 8845 } 8846 8847 #ifdef __DML_VBA_DEBUG__ 8848 DML_LOG_VERBOSE("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, s->PixelClockBackEnd[k]); 8849 DML_LOG_VERBOSE("DML::%s: k=%u, required_dscclk_freq_mhz = %f\n", __func__, k, mode_lib->ms.required_dscclk_freq_mhz[k]); 8850 DML_LOG_VERBOSE("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor); 8851 DML_LOG_VERBOSE("DML::%s: k=%u, DSCCLKRequiredMoreThanSupported = %u\n", __func__, k, mode_lib->ms.support.DSCCLKRequiredMoreThanSupported); 8852 #endif 8853 } 8854 } 8855 } 8856 8857 /* Check DSC Unit and Slices Support */ 8858 mode_lib->ms.support.NotEnoughDSCSlices = false; 8859 s->TotalDSCUnitsRequired = 0; 8860 mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = true; 8861 memset(s->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool)); 8862 8863 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { 8864 if (mode_lib->ms.RequiresDSC[k] == true && !s->stream_visited[display_cfg->plane_descriptors[k].stream_index]) { 8865 s->NumDSCUnitRequired = 1; 8866 8867 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) 8868 s->NumDSCUnitRequired = 4; 8869 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) 8870 s->NumDSCUnitRequired = 3; 8871 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) 8872 s->NumDSCUnitRequired = 2; 8873 8874 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active > s->NumDSCUnitRequired * (unsigned int)mode_lib->ip.maximum_pixels_per_line_per_dsc_unit) 8875 mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false; 8876 s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + s->NumDSCUnitRequired; 8877 8878 if (mode_lib->ms.support.NumberOfDSCSlices[k] > 4 * s->NumDSCUnitRequired) 8879 mode_lib->ms.support.NotEnoughDSCSlices = true; 8880 } 8881 s->stream_visited[display_cfg->plane_descriptors[k].stream_index] = 1; 8882 } 8883 8884 mode_lib->ms.support.NotEnoughDSCUnits = false; 8885 if (s->TotalDSCUnitsRequired > (unsigned int)mode_lib->ip.num_dsc) { 8886 mode_lib->ms.support.NotEnoughDSCUnits = true; 8887 } 8888 8889 /*DSC Delay per state*/ 8890 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { 8891 mode_lib->ms.DSCDelay[k] = DSCDelayRequirement(mode_lib->ms.RequiresDSC[k], 8892 mode_lib->ms.ODMMode[k], 8893 mode_lib->ip.maximum_dsc_bits_per_component, 8894 s->OutputBpp[k], 8895 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, 8896 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, 8897 mode_lib->ms.support.NumberOfDSCSlices[k], 8898 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, 8899 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, 8900 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), 8901 s->PixelClockBackEnd[k]); 8902 } 8903 8904 // Figure out the swath and DET configuration after the num dpp per plane is figured out 8905 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false; 8906 CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.ODMMode; 8907 CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.NoOfDPP; 8908 8909 // output 8910 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = s->dummy_integer_array[0]; 8911 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = s->dummy_integer_array[1]; 8912 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub; 8913 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub; 8914 CalculateSwathAndDETConfiguration_params->SwathWidth = mode_lib->ms.SwathWidthY; 8915 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = mode_lib->ms.SwathWidthC; 8916 CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->ms.SwathHeightY; 8917 CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->ms.SwathHeightC; 8918 CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = mode_lib->ms.support.request_size_bytes_luma; 8919 CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = mode_lib->ms.support.request_size_bytes_chroma; 8920 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->ms.DETBufferSizeInKByte; // FIXME: This is per pipe but the pipes in plane will use that 8921 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY; 8922 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC; 8923 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->ms.UnboundedRequestEnabled; 8924 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = s->dummy_integer_array[3]; 8925 CalculateSwathAndDETConfiguration_params->hw_debug5 = s->dummy_boolean_array[1]; 8926 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->ms.CompressedBufferSizeInkByte; 8927 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = s->dummy_boolean_array[0]; 8928 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &mode_lib->ms.support.ViewportSizeSupport; 8929 8930 CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params); 8931 8932 if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0) { 8933 for (k = 0; k < mode_lib->ms.num_active_planes; k++) 8934 mode_lib->ms.SurfaceSizeInMALL[k] = 0; 8935 mode_lib->ms.support.ExceededMALLSize = 0; 8936 } else { 8937 CalculateSurfaceSizeInMall( 8938 display_cfg, 8939 mode_lib->ms.num_active_planes, 8940 mode_lib->soc.mall_allocated_for_dcn_mbytes, 8941 8942 mode_lib->ms.BytePerPixelY, 8943 mode_lib->ms.BytePerPixelC, 8944 mode_lib->ms.Read256BlockWidthY, 8945 mode_lib->ms.Read256BlockWidthC, 8946 mode_lib->ms.Read256BlockHeightY, 8947 mode_lib->ms.Read256BlockHeightC, 8948 mode_lib->ms.MacroTileWidthY, 8949 mode_lib->ms.MacroTileWidthC, 8950 mode_lib->ms.MacroTileHeightY, 8951 mode_lib->ms.MacroTileHeightC, 8952 8953 /* Output */ 8954 mode_lib->ms.SurfaceSizeInMALL, 8955 &mode_lib->ms.support.ExceededMALLSize); 8956 } 8957 8958 mode_lib->ms.TotalNumberOfDCCActiveDPP = 0; 8959 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { 8960 if (display_cfg->plane_descriptors[k].surface.dcc.enable == true) { 8961 mode_lib->ms.TotalNumberOfDCCActiveDPP = mode_lib->ms.TotalNumberOfDCCActiveDPP + mode_lib->ms.NoOfDPP[k]; 8962 } 8963 } 8964 8965 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { 8966 s->SurfParameters[k].PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); 8967 s->SurfParameters[k].DPPPerSurface = mode_lib->ms.NoOfDPP[k]; 8968 s->SurfParameters[k].RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle; 8969 s->SurfParameters[k].ViewportHeight = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; 8970 s->SurfParameters[k].ViewportHeightC = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; 8971 s->SurfParameters[k].BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k]; 8972 s->SurfParameters[k].BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k]; 8973 s->SurfParameters[k].BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k]; 8974 s->SurfParameters[k].BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k]; 8975 s->SurfParameters[k].BlockWidthY = mode_lib->ms.MacroTileWidthY[k]; 8976 s->SurfParameters[k].BlockHeightY = mode_lib->ms.MacroTileHeightY[k]; 8977 s->SurfParameters[k].BlockWidthC = mode_lib->ms.MacroTileWidthC[k]; 8978 s->SurfParameters[k].BlockHeightC = mode_lib->ms.MacroTileHeightC[k]; 8979 s->SurfParameters[k].InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced; 8980 s->SurfParameters[k].HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total; 8981 s->SurfParameters[k].DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable; 8982 s->SurfParameters[k].SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format; 8983 s->SurfParameters[k].SurfaceTiling = display_cfg->plane_descriptors[k].surface.tiling; 8984 s->SurfParameters[k].BytePerPixelY = mode_lib->ms.BytePerPixelY[k]; 8985 s->SurfParameters[k].BytePerPixelC = mode_lib->ms.BytePerPixelC[k]; 8986 s->SurfParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; 8987 s->SurfParameters[k].VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; 8988 s->SurfParameters[k].VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; 8989 s->SurfParameters[k].VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; 8990 s->SurfParameters[k].VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; 8991 s->SurfParameters[k].PitchY = display_cfg->plane_descriptors[k].surface.plane0.pitch; 8992 s->SurfParameters[k].PitchC = display_cfg->plane_descriptors[k].surface.plane1.pitch; 8993 s->SurfParameters[k].ViewportStationary = display_cfg->plane_descriptors[k].composition.viewport.stationary; 8994 s->SurfParameters[k].ViewportXStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start; 8995 s->SurfParameters[k].ViewportYStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start; 8996 s->SurfParameters[k].ViewportXStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; 8997 s->SurfParameters[k].ViewportYStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; 8998 s->SurfParameters[k].FORCE_ONE_ROW_FOR_FRAME = display_cfg->plane_descriptors[k].overrides.hw.force_one_row_for_frame; 8999 s->SurfParameters[k].SwathHeightY = mode_lib->ms.SwathHeightY[k]; 9000 s->SurfParameters[k].SwathHeightC = mode_lib->ms.SwathHeightC[k]; 9001 9002 s->SurfParameters[k].DCCMetaPitchY = display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch; 9003 s->SurfParameters[k].DCCMetaPitchC = display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch; 9004 } 9005 9006 CalculateVMRowAndSwath_params->display_cfg = display_cfg; 9007 CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; 9008 CalculateVMRowAndSwath_params->myPipe = s->SurfParameters; 9009 CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->ms.SurfaceSizeInMALL; 9010 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; 9011 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma; 9012 CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes; 9013 CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->ms.SwathWidthY; 9014 CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->ms.SwathWidthC; 9015 CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes * 1024; 9016 CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes; 9017 CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present; 9018 9019 // output 9020 CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceeded; 9021 CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = s->dummy_integer_array[12]; 9022 CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = s->dummy_integer_array[13]; 9023 CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->ms.dpte_row_height; 9024 CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->ms.dpte_row_height_chroma; 9025 CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = s->dummy_integer_array[14]; // VBA_DELTA 9026 CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = s->dummy_integer_array[15]; // VBA_DELTA 9027 CalculateVMRowAndSwath_params->vm_group_bytes = s->dummy_integer_array[16]; 9028 CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes; 9029 CalculateVMRowAndSwath_params->PixelPTEReqWidthY = s->dummy_integer_array[17]; 9030 CalculateVMRowAndSwath_params->PixelPTEReqHeightY = s->dummy_integer_array[18]; 9031 CalculateVMRowAndSwath_params->PTERequestSizeY = s->dummy_integer_array[19]; 9032 CalculateVMRowAndSwath_params->PixelPTEReqWidthC = s->dummy_integer_array[20]; 9033 CalculateVMRowAndSwath_params->PixelPTEReqHeightC = s->dummy_integer_array[21]; 9034 CalculateVMRowAndSwath_params->PTERequestSizeC = s->dummy_integer_array[22]; 9035 CalculateVMRowAndSwath_params->vmpg_width_y = s->vmpg_width_y; 9036 CalculateVMRowAndSwath_params->vmpg_height_y = s->vmpg_height_y; 9037 CalculateVMRowAndSwath_params->vmpg_width_c = s->vmpg_width_c; 9038 CalculateVMRowAndSwath_params->vmpg_height_c = s->vmpg_height_c; 9039 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = s->dummy_integer_array[23]; 9040 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = s->dummy_integer_array[24]; 9041 CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY; 9042 CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC; 9043 CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->ms.PrefillY; 9044 CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->ms.PrefillC; 9045 CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY; 9046 CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC; 9047 CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->ms.dpte_row_bw; 9048 CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow; 9049 CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_l = s->dpte_row_bytes_per_row_l; 9050 CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_c = s->dpte_row_bytes_per_row_c; 9051 CalculateVMRowAndSwath_params->vm_bytes = mode_lib->ms.vm_bytes; 9052 CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->ms.use_one_row_for_frame; 9053 CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->ms.use_one_row_for_frame_flip; 9054 CalculateVMRowAndSwath_params->is_using_mall_for_ss = s->dummy_boolean_array[0]; 9055 CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = s->dummy_boolean_array[1]; 9056 CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = s->dummy_integer_array[25]; 9057 CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = mode_lib->ms.DCCMetaBufferSizeNotExceeded; 9058 CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->ms.meta_row_bw; 9059 CalculateVMRowAndSwath_params->meta_row_bytes = mode_lib->ms.meta_row_bytes; 9060 CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l; 9061 CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c; 9062 CalculateVMRowAndSwath_params->meta_req_width_luma = s->dummy_integer_array[26]; 9063 CalculateVMRowAndSwath_params->meta_req_height_luma = s->dummy_integer_array[27]; 9064 CalculateVMRowAndSwath_params->meta_row_width_luma = s->dummy_integer_array[28]; 9065 CalculateVMRowAndSwath_params->meta_row_height_luma = s->meta_row_height_luma; 9066 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = s->dummy_integer_array[29]; 9067 CalculateVMRowAndSwath_params->meta_req_width_chroma = s->dummy_integer_array[30]; 9068 CalculateVMRowAndSwath_params->meta_req_height_chroma = s->dummy_integer_array[31]; 9069 CalculateVMRowAndSwath_params->meta_row_width_chroma = s->dummy_integer_array[32]; 9070 CalculateVMRowAndSwath_params->meta_row_height_chroma = s->meta_row_height_chroma; 9071 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = s->dummy_integer_array[33]; 9072 9073 CalculateVMRowAndSwath(&mode_lib->scratch, CalculateVMRowAndSwath_params); 9074 9075 mode_lib->ms.support.PTEBufferSizeNotExceeded = true; 9076 mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = true; 9077 9078 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { 9079 if (mode_lib->ms.PTEBufferSizeNotExceeded[k] == false) 9080 mode_lib->ms.support.PTEBufferSizeNotExceeded = false; 9081 9082 if (mode_lib->ms.DCCMetaBufferSizeNotExceeded[k] == false) 9083 mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = false; 9084 9085 #ifdef __DML_VBA_DEBUG__ 9086 DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.PTEBufferSizeNotExceeded[k]); 9087 DML_LOG_VERBOSE("DML::%s: k=%u, DCCMetaBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.DCCMetaBufferSizeNotExceeded[k]); 9088 #endif 9089 } 9090 #ifdef __DML_VBA_DEBUG__ 9091 DML_LOG_VERBOSE("DML::%s: PTEBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.PTEBufferSizeNotExceeded); 9092 DML_LOG_VERBOSE("DML::%s: DCCMetaBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.DCCMetaBufferSizeNotExceeded); 9093 #endif 9094 9095 /* VActive bytes to fetch for UCLK P-State */ 9096 calculate_bytes_to_fetch_required_to_hide_latency_params->display_cfg = display_cfg; 9097 calculate_bytes_to_fetch_required_to_hide_latency_params->mrq_present = mode_lib->ip.dcn_mrq_present; 9098 9099 calculate_bytes_to_fetch_required_to_hide_latency_params->num_active_planes = mode_lib->ms.num_active_planes; 9100 calculate_bytes_to_fetch_required_to_hide_latency_params->num_of_dpp = mode_lib->ms.NoOfDPP; 9101 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_l = s->meta_row_height_luma; 9102 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_c = s->meta_row_height_chroma; 9103 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l; 9104 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c; 9105 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_l = mode_lib->ms.dpte_row_height; 9106 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_c = mode_lib->ms.dpte_row_height_chroma; 9107 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_l = s->dpte_row_bytes_per_row_l; 9108 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_c = s->dpte_row_bytes_per_row_c; 9109 calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_l = mode_lib->ms.BytePerPixelY; 9110 calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_c = mode_lib->ms.BytePerPixelC; 9111 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_l = mode_lib->ms.SwathWidthY; 9112 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_c = mode_lib->ms.SwathWidthC; 9113 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_l = mode_lib->ms.SwathHeightY; 9114 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_c = mode_lib->ms.SwathHeightC; 9115 calculate_bytes_to_fetch_required_to_hide_latency_params->latency_to_hide_us[0] = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us; 9116 9117 /* outputs */ 9118 calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_l = s->pstate_bytes_required_l[dml2_pstate_type_uclk]; 9119 calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_c = s->pstate_bytes_required_c[dml2_pstate_type_uclk]; 9120 9121 calculate_bytes_to_fetch_required_to_hide_latency(calculate_bytes_to_fetch_required_to_hide_latency_params); 9122 9123 /* Excess VActive bandwidth required to fill DET */ 9124 calculate_excess_vactive_bandwidth_required( 9125 display_cfg, 9126 mode_lib->ms.num_active_planes, 9127 s->pstate_bytes_required_l[dml2_pstate_type_uclk], 9128 s->pstate_bytes_required_c[dml2_pstate_type_uclk], 9129 /* outputs */ 9130 mode_lib->ms.excess_vactive_fill_bw_l, 9131 mode_lib->ms.excess_vactive_fill_bw_c); 9132 9133 mode_lib->ms.UrgLatency = CalculateUrgentLatency( 9134 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us, 9135 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us, 9136 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us, 9137 mode_lib->soc.do_urgent_latency_adjustment, 9138 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us, 9139 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz, 9140 mode_lib->ms.FabricClock, 9141 mode_lib->ms.uclk_freq_mhz, 9142 mode_lib->soc.qos_parameters.qos_type, 9143 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles, 9144 mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles, 9145 mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, 9146 mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, 9147 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin, 9148 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); 9149 9150 mode_lib->ms.TripToMemory = CalculateTripToMemory( 9151 mode_lib->ms.UrgLatency, 9152 mode_lib->ms.FabricClock, 9153 mode_lib->ms.uclk_freq_mhz, 9154 mode_lib->soc.qos_parameters.qos_type, 9155 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles, 9156 mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, 9157 mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, 9158 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, 9159 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); 9160 9161 mode_lib->ms.TripToMemory = math_max2(mode_lib->ms.UrgLatency, mode_lib->ms.TripToMemory); 9162 9163 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { 9164 double line_time_us = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); 9165 bool cursor_not_enough_urgent_latency_hiding = false; 9166 9167 if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) { 9168 calculate_cursor_req_attributes( 9169 display_cfg->plane_descriptors[k].cursor.cursor_width, 9170 display_cfg->plane_descriptors[k].cursor.cursor_bpp, 9171 9172 // output 9173 &s->cursor_lines_per_chunk[k], 9174 &s->cursor_bytes_per_line[k], 9175 &s->cursor_bytes_per_chunk[k], 9176 &s->cursor_bytes[k]); 9177 9178 calculate_cursor_urgent_burst_factor( 9179 mode_lib->ip.cursor_buffer_size, 9180 display_cfg->plane_descriptors[k].cursor.cursor_width, 9181 s->cursor_bytes_per_chunk[k], 9182 s->cursor_lines_per_chunk[k], 9183 line_time_us, 9184 mode_lib->ms.UrgLatency, 9185 9186 // output 9187 &mode_lib->ms.UrgentBurstFactorCursor[k], 9188 &cursor_not_enough_urgent_latency_hiding); 9189 } 9190 9191 mode_lib->ms.UrgentBurstFactorCursorPre[k] = mode_lib->ms.UrgentBurstFactorCursor[k]; 9192 9193 #ifdef __DML_VBA_DEBUG__ 9194 DML_LOG_VERBOSE("DML::%s: k=%d, Calling CalculateUrgentBurstFactor\n", __func__, k); 9195 DML_LOG_VERBOSE("DML::%s: k=%d, VRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); 9196 DML_LOG_VERBOSE("DML::%s: k=%d, VRatioChroma=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio); 9197 #endif 9198 9199 CalculateUrgentBurstFactor( 9200 &display_cfg->plane_descriptors[k], 9201 mode_lib->ms.swath_width_luma_ub[k], 9202 mode_lib->ms.swath_width_chroma_ub[k], 9203 mode_lib->ms.SwathHeightY[k], 9204 mode_lib->ms.SwathHeightC[k], 9205 line_time_us, 9206 mode_lib->ms.UrgLatency, 9207 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, 9208 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, 9209 mode_lib->ms.BytePerPixelInDETY[k], 9210 mode_lib->ms.BytePerPixelInDETC[k], 9211 mode_lib->ms.DETBufferSizeY[k], 9212 mode_lib->ms.DETBufferSizeC[k], 9213 9214 // Output 9215 &mode_lib->ms.UrgentBurstFactorLuma[k], 9216 &mode_lib->ms.UrgentBurstFactorChroma[k], 9217 &mode_lib->ms.NotEnoughUrgentLatencyHiding[k]); 9218 9219 mode_lib->ms.NotEnoughUrgentLatencyHiding[k] = mode_lib->ms.NotEnoughUrgentLatencyHiding[k] || cursor_not_enough_urgent_latency_hiding; 9220 } 9221 9222 CalculateDCFCLKDeepSleep( 9223 display_cfg, 9224 mode_lib->ms.num_active_planes, 9225 mode_lib->ms.BytePerPixelY, 9226 mode_lib->ms.BytePerPixelC, 9227 mode_lib->ms.SwathWidthY, 9228 mode_lib->ms.SwathWidthC, 9229 mode_lib->ms.NoOfDPP, 9230 mode_lib->ms.PSCL_FACTOR, 9231 mode_lib->ms.PSCL_FACTOR_CHROMA, 9232 mode_lib->ms.RequiredDPPCLK, 9233 mode_lib->ms.vactive_sw_bw_l, 9234 mode_lib->ms.vactive_sw_bw_c, 9235 mode_lib->soc.return_bus_width_bytes, 9236 9237 /* Output */ 9238 &mode_lib->ms.dcfclk_deepsleep); 9239 9240 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { 9241 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { 9242 mode_lib->ms.WritebackDelayTime[k] = mode_lib->soc.qos_parameters.writeback.base_latency_us + CalculateWriteBackDelay( 9243 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format, 9244 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio, 9245 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio, 9246 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps, 9247 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width, 9248 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height, 9249 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height, 9250 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->ms.RequiredDISPCLK; 9251 } else { 9252 mode_lib->ms.WritebackDelayTime[k] = 0.0; 9253 } 9254 } 9255 9256 // MaximumVStartup is actually Tvstartup_min in DCN4 programming guide 9257 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { 9258 bool isInterlaceTiming = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !mode_lib->ip.ptoi_supported); 9259 s->MaximumVStartup[k] = CalculateMaxVStartup( 9260 mode_lib->ip.ptoi_supported, 9261 mode_lib->ip.vblank_nom_default_us, 9262 &display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing, 9263 mode_lib->ms.WritebackDelayTime[k]); 9264 mode_lib->ms.MaxVStartupLines[k] = (isInterlaceTiming ? (2 * s->MaximumVStartup[k]) : s->MaximumVStartup[k]); 9265 } 9266 9267 #ifdef __DML_VBA_DEBUG__ 9268 DML_LOG_VERBOSE("DML::%s: k=%u, MaximumVStartup = %u\n", __func__, k, s->MaximumVStartup[k]); 9269 #endif 9270 9271 /* Immediate Flip and MALL parameters */ 9272 s->ImmediateFlipRequired = false; 9273 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { 9274 s->ImmediateFlipRequired = s->ImmediateFlipRequired || display_cfg->plane_descriptors[k].immediate_flip; 9275 } 9276 9277 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = false; 9278 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { 9279 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = 9280 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe || 9281 ((display_cfg->hostvm_enable == true || display_cfg->plane_descriptors[k].immediate_flip == true) && 9282 (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame || dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))); 9283 } 9284 9285 mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = false; 9286 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { 9287 mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen || 9288 ((display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable || display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_auto) && (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))) || 9289 ((display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_disable || display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_auto) && (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame)); 9290 } 9291 9292 s->FullFrameMALLPStateMethod = false; 9293 s->SubViewportMALLPStateMethod = false; 9294 s->PhantomPipeMALLPStateMethod = false; 9295 s->SubViewportMALLRefreshGreaterThan120Hz = false; 9296 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { 9297 if (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame) 9298 s->FullFrameMALLPStateMethod = true; 9299 if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) { 9300 s->SubViewportMALLPStateMethod = true; 9301 if (!display_cfg->overrides.enable_subvp_implicit_pmo) { 9302 // For dv, small frame tests will have very high refresh rate 9303 unsigned long long refresh_rate = (unsigned long long) ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz * 1000 / 9304 (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / 9305 (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total); 9306 if (refresh_rate > 120) 9307 s->SubViewportMALLRefreshGreaterThan120Hz = true; 9308 } 9309 } 9310 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) 9311 s->PhantomPipeMALLPStateMethod = true; 9312 } 9313 mode_lib->ms.support.InvalidCombinationOfMALLUseForPState = (s->SubViewportMALLPStateMethod != s->PhantomPipeMALLPStateMethod) || 9314 (s->SubViewportMALLPStateMethod && s->FullFrameMALLPStateMethod) || s->SubViewportMALLRefreshGreaterThan120Hz; 9315 9316 #ifdef __DML_VBA_DEBUG__ 9317 DML_LOG_VERBOSE("DML::%s: SubViewportMALLPStateMethod = %u\n", __func__, s->SubViewportMALLPStateMethod); 9318 DML_LOG_VERBOSE("DML::%s: PhantomPipeMALLPStateMethod = %u\n", __func__, s->PhantomPipeMALLPStateMethod); 9319 DML_LOG_VERBOSE("DML::%s: FullFrameMALLPStateMethod = %u\n", __func__, s->FullFrameMALLPStateMethod); 9320 DML_LOG_VERBOSE("DML::%s: SubViewportMALLRefreshGreaterThan120Hz = %u\n", __func__, s->SubViewportMALLRefreshGreaterThan120Hz); 9321 DML_LOG_VERBOSE("DML::%s: InvalidCombinationOfMALLUseForPState = %u\n", __func__, mode_lib->ms.support.InvalidCombinationOfMALLUseForPState); 9322 DML_LOG_VERBOSE("DML::%s: in_out_params->min_clk_index = %u\n", __func__, in_out_params->min_clk_index); 9323 DML_LOG_VERBOSE("DML::%s: mode_lib->ms.DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK); 9324 DML_LOG_VERBOSE("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); 9325 DML_LOG_VERBOSE("DML::%s: mode_lib->ms.uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz); 9326 #endif 9327 9328 mode_lib->ms.support.OutstandingRequestsSupport = true; 9329 mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = true; 9330 9331 mode_lib->ms.support.avg_urgent_latency_us 9332 = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz 9333 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0) 9334 + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) 9335 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0); 9336 9337 mode_lib->ms.support.avg_non_urgent_latency_us 9338 = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz 9339 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0) 9340 + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) 9341 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0); 9342 9343 mode_lib->ms.support.max_non_urgent_latency_us 9344 = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles 9345 / mode_lib->ms.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0) 9346 + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock 9347 + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock 9348 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0); 9349 9350 for (k = 0; k < mode_lib->ms.num_active_planes; k++) { 9351 9352 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { 9353 outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_luma[k] 9354 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)); 9355 9356 if (outstanding_latency_us < mode_lib->ms.support.avg_urgent_latency_us) { 9357 mode_lib->ms.support.OutstandingRequestsSupport = false; 9358 } 9359 9360 if (outstanding_latency_us < mode_lib->ms.support.avg_non_urgent_latency_us) { 9361 mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false; 9362 } 9363 9364 #ifdef __DML_VBA_DEBUG__ 9365 DML_LOG_VERBOSE("DML::%s: avg_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_urgent_latency_us); 9366 DML_LOG_VERBOSE("DML::%s: avg_non_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_non_urgent_latency_us); 9367 DML_LOG_VERBOSE("DML::%s: k=%d, request_size_bytes_luma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_luma[k]); 9368 DML_LOG_VERBOSE("DML::%s: k=%d, outstanding_latency_us = %f (luma)\n", __func__, k, outstanding_latency_us); 9369 #endif 9370 } 9371 9372 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x && mode_lib->ms.BytePerPixelC[k] > 0) { 9373 outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_chroma[k] 9374 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)); 9375 9376 if (outstanding_latency_us < mode_lib->ms.support.avg_urgent_latency_us) { 9377 mode_lib->ms.support.OutstandingRequestsSupport = false; 9378 } 9379 9380 if (outstanding_latency_us < mode_lib->ms.support.avg_non_urgent_latency_us) { 9381 mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false; 9382 } 9383 #ifdef __DML_VBA_DEBUG__ 9384 DML_LOG_VERBOSE("DML::%s: k=%d, request_size_bytes_chroma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_chroma[k]); 9385 DML_LOG_VERBOSE("DML::%s: k=%d, outstanding_latency_us = %f (chroma)\n", __func__, k, outstanding_latency_us); 9386 #endif 9387 } 9388 } 9389 9390 memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params)); 9391 if (mode_lib->soc.mcache_size_bytes == 0 || mode_lib->ip.dcn_mrq_present) { 9392 for (k = 0; k < mode_lib->ms.num_active_planes; k++) { 9393 mode_lib->ms.mall_prefetch_sdp_overhead_factor[k] = 1.0; 9394 mode_lib->ms.mall_prefetch_dram_overhead_factor[k] = 1.0; 9395 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0[k] = 1.0; 9396 mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0[k] = 1.0; 9397 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1[k] = 1.0; 9398 mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1[k] = 1.0; 9399 } 9400 } else { 9401 for (k = 0; k < mode_lib->ms.num_active_planes; k++) { 9402 calculate_mcache_setting_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable; 9403 calculate_mcache_setting_params->num_chans = mode_lib->soc.clk_table.dram_config.channel_count; 9404 calculate_mcache_setting_params->mem_word_bytes = mode_lib->soc.mem_word_bytes; 9405 calculate_mcache_setting_params->mcache_size_bytes = mode_lib->soc.mcache_size_bytes; 9406 calculate_mcache_setting_params->mcache_line_size_bytes = mode_lib->soc.mcache_line_size_bytes; 9407 calculate_mcache_setting_params->gpuvm_enable = display_cfg->gpuvm_enable; 9408 calculate_mcache_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; 9409 9410 calculate_mcache_setting_params->source_format = display_cfg->plane_descriptors[k].pixel_format; 9411 calculate_mcache_setting_params->surf_vert = dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle); 9412 calculate_mcache_setting_params->vp_stationary = display_cfg->plane_descriptors[k].composition.viewport.stationary; 9413 calculate_mcache_setting_params->tiling_mode = display_cfg->plane_descriptors[k].surface.tiling; 9414 calculate_mcache_setting_params->imall_enable = mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall; 9415 9416 calculate_mcache_setting_params->vp_start_x_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start; 9417 calculate_mcache_setting_params->vp_start_y_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start; 9418 calculate_mcache_setting_params->full_vp_width_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.width; 9419 calculate_mcache_setting_params->full_vp_height_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; 9420 calculate_mcache_setting_params->blk_width_l = mode_lib->ms.MacroTileWidthY[k]; 9421 calculate_mcache_setting_params->blk_height_l = mode_lib->ms.MacroTileHeightY[k]; 9422 calculate_mcache_setting_params->vmpg_width_l = s->vmpg_width_y[k]; 9423 calculate_mcache_setting_params->vmpg_height_l = s->vmpg_height_y[k]; 9424 calculate_mcache_setting_params->full_swath_bytes_l = s->full_swath_bytes_l[k]; 9425 calculate_mcache_setting_params->bytes_per_pixel_l = mode_lib->ms.BytePerPixelY[k]; 9426 9427 calculate_mcache_setting_params->vp_start_x_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.x_start; 9428 calculate_mcache_setting_params->vp_start_y_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; 9429 calculate_mcache_setting_params->full_vp_width_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.width; 9430 calculate_mcache_setting_params->full_vp_height_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; 9431 calculate_mcache_setting_params->blk_width_c = mode_lib->ms.MacroTileWidthC[k]; 9432 calculate_mcache_setting_params->blk_height_c = mode_lib->ms.MacroTileHeightC[k]; 9433 calculate_mcache_setting_params->vmpg_width_c = s->vmpg_width_c[k]; 9434 calculate_mcache_setting_params->vmpg_height_c = s->vmpg_height_c[k]; 9435 calculate_mcache_setting_params->full_swath_bytes_c = s->full_swath_bytes_c[k]; 9436 calculate_mcache_setting_params->bytes_per_pixel_c = mode_lib->ms.BytePerPixelC[k]; 9437 9438 // output 9439 calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_l = &mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0[k]; 9440 calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_l = &mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0[k]; 9441 calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_c = &mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1[k]; 9442 calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_c = &mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1[k]; 9443 9444 calculate_mcache_setting_params->num_mcaches_l = &mode_lib->ms.num_mcaches_l[k]; 9445 calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->ms.mcache_row_bytes_l[k]; 9446 calculate_mcache_setting_params->mcache_row_bytes_per_channel_l = &mode_lib->ms.mcache_row_bytes_per_channel_l[k]; 9447 calculate_mcache_setting_params->mcache_offsets_l = mode_lib->ms.mcache_offsets_l[k]; 9448 calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->ms.mcache_shift_granularity_l[k]; 9449 9450 calculate_mcache_setting_params->num_mcaches_c = &mode_lib->ms.num_mcaches_c[k]; 9451 calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->ms.mcache_row_bytes_c[k]; 9452 calculate_mcache_setting_params->mcache_row_bytes_per_channel_c = &mode_lib->ms.mcache_row_bytes_per_channel_c[k]; 9453 calculate_mcache_setting_params->mcache_offsets_c = mode_lib->ms.mcache_offsets_c[k]; 9454 calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->ms.mcache_shift_granularity_c[k]; 9455 9456 calculate_mcache_setting_params->mall_comb_mcache_l = &mode_lib->ms.mall_comb_mcache_l[k]; 9457 calculate_mcache_setting_params->mall_comb_mcache_c = &mode_lib->ms.mall_comb_mcache_c[k]; 9458 calculate_mcache_setting_params->lc_comb_mcache = &mode_lib->ms.lc_comb_mcache[k]; 9459 9460 calculate_mcache_setting(&mode_lib->scratch, calculate_mcache_setting_params); 9461 } 9462 9463 calculate_mall_bw_overhead_factor( 9464 mode_lib->ms.mall_prefetch_sdp_overhead_factor, 9465 mode_lib->ms.mall_prefetch_dram_overhead_factor, 9466 9467 // input 9468 display_cfg, 9469 mode_lib->ms.num_active_planes); 9470 } 9471 9472 // Calculate all the bandwidth available 9473 // Need anothe bw for latency evaluation 9474 calculate_bandwidth_available( 9475 mode_lib->ms.support.avg_bandwidth_available_min, // not used 9476 mode_lib->ms.support.avg_bandwidth_available, // not used 9477 mode_lib->ms.support.urg_bandwidth_available_min_latency, 9478 mode_lib->ms.support.urg_bandwidth_available, // not used 9479 mode_lib->ms.support.urg_bandwidth_available_vm_only, // not used 9480 mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm, // not used 9481 9482 &mode_lib->soc, 9483 display_cfg->hostvm_enable, 9484 mode_lib->ms.DCFCLK, 9485 mode_lib->ms.FabricClock, 9486 mode_lib->ms.dram_bw_mbps); 9487 9488 calculate_bandwidth_available( 9489 mode_lib->ms.support.avg_bandwidth_available_min, 9490 mode_lib->ms.support.avg_bandwidth_available, 9491 mode_lib->ms.support.urg_bandwidth_available_min, 9492 mode_lib->ms.support.urg_bandwidth_available, 9493 mode_lib->ms.support.urg_bandwidth_available_vm_only, 9494 mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm, 9495 9496 &mode_lib->soc, 9497 display_cfg->hostvm_enable, 9498 mode_lib->ms.MaxDCFCLK, 9499 mode_lib->ms.MaxFabricClock, 9500 #ifdef DML_MODE_SUPPORT_USE_DPM_DRAM_BW 9501 mode_lib->ms.dram_bw_mbps); 9502 #else 9503 mode_lib->ms.max_dram_bw_mbps); 9504 #endif 9505 9506 // Average BW support check 9507 calculate_avg_bandwidth_required( 9508 mode_lib->ms.support.avg_bandwidth_required, 9509 // input 9510 display_cfg, 9511 mode_lib->ms.num_active_planes, 9512 mode_lib->ms.vactive_sw_bw_l, 9513 mode_lib->ms.vactive_sw_bw_c, 9514 mode_lib->ms.cursor_bw, 9515 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0, 9516 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1, 9517 mode_lib->ms.mall_prefetch_dram_overhead_factor, 9518 mode_lib->ms.mall_prefetch_sdp_overhead_factor); 9519 9520 for (m = 0; m < dml2_core_internal_bw_max; m++) { // check sdp and dram 9521 mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_sys_idle][m] = 1; 9522 mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_sys_active][m] = (mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][m] <= mode_lib->ms.support.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][m]); 9523 mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_svp_prefetch][m] = (mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][m] <= mode_lib->ms.support.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][m]); 9524 } 9525 9526 mode_lib->ms.support.AvgBandwidthSupport = true; 9527 mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = true; 9528 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { 9529 if (mode_lib->ms.NotEnoughUrgentLatencyHiding[k]) { 9530 mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = false; 9531 DML_LOG_VERBOSE("DML::%s: k=%u NotEnoughUrgentLatencyHiding set\n", __func__, k); 9532 9533 } 9534 } 9535 for (m = 0; m < dml2_core_internal_soc_state_max; m++) { 9536 for (n = 0; n < dml2_core_internal_bw_max; n++) { // check sdp and dram 9537 if (!mode_lib->ms.support.avg_bandwidth_support_ok[m][n] && (m == dml2_core_internal_soc_state_sys_active || mode_lib->soc.mall_allocated_for_dcn_mbytes > 0)) { 9538 mode_lib->ms.support.AvgBandwidthSupport = false; 9539 #ifdef __DML_VBA_DEBUG__ 9540 DML_LOG_VERBOSE("DML::%s: avg_bandwidth_support_ok[%s][%s] not ok\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n)); 9541 #endif 9542 } 9543 } 9544 } 9545 9546 dml_core_ms_prefetch_check(mode_lib, display_cfg); 9547 9548 mode_lib->ms.support.max_urgent_latency_us = s->mSOCParameters.max_urgent_latency_us; 9549 9550 //Re-ordering Buffer Support Check 9551 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { 9552 if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 9553 / mode_lib->ms.support.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= s->mSOCParameters.max_urgent_latency_us) { 9554 mode_lib->ms.support.ROBSupport = true; 9555 } else { 9556 mode_lib->ms.support.ROBSupport = false; 9557 } 9558 } else { 9559 if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) { 9560 mode_lib->ms.support.ROBSupport = true; 9561 } else { 9562 mode_lib->ms.support.ROBSupport = false; 9563 } 9564 } 9565 9566 /* VActive fill time calculations (informative) */ 9567 calculate_vactive_det_fill_latency( 9568 display_cfg, 9569 mode_lib->ms.num_active_planes, 9570 s->pstate_bytes_required_l[dml2_pstate_type_uclk], 9571 s->pstate_bytes_required_c[dml2_pstate_type_uclk], 9572 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0, 9573 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1, 9574 mode_lib->ms.vactive_sw_bw_l, 9575 mode_lib->ms.vactive_sw_bw_c, 9576 mode_lib->ms.surface_avg_vactive_required_bw, 9577 mode_lib->ms.surface_peak_required_bw, 9578 /* outputs */ 9579 mode_lib->ms.pstate_vactive_det_fill_delay_us[dml2_pstate_type_uclk]); 9580 9581 #ifdef __DML_VBA_DEBUG__ 9582 DML_LOG_VERBOSE("DML::%s: max_urgent_latency_us = %f\n", __func__, s->mSOCParameters.max_urgent_latency_us); 9583 DML_LOG_VERBOSE("DML::%s: ROBSupport = %u\n", __func__, mode_lib->ms.support.ROBSupport); 9584 #endif 9585 9586 /*Mode Support, Voltage State and SOC Configuration*/ 9587 { 9588 if (mode_lib->ms.support.ScaleRatioAndTapsSupport 9589 && mode_lib->ms.support.SourceFormatPixelAndScanSupport 9590 && mode_lib->ms.support.ViewportSizeSupport 9591 && !mode_lib->ms.support.LinkRateDoesNotMatchDPVersion 9592 && !mode_lib->ms.support.LinkRateForMultistreamNotIndicated 9593 && !mode_lib->ms.support.BPPForMultistreamNotIndicated 9594 && !mode_lib->ms.support.MultistreamWithHDMIOreDP 9595 && !mode_lib->ms.support.ExceededMultistreamSlots 9596 && !mode_lib->ms.support.MSOOrODMSplitWithNonDPLink 9597 && !mode_lib->ms.support.NotEnoughLanesForMSO 9598 && !mode_lib->ms.support.P2IWith420 9599 && !mode_lib->ms.support.DSC422NativeNotSupported 9600 && mode_lib->ms.support.DSCSlicesODMModeSupported 9601 && !mode_lib->ms.support.NotEnoughDSCUnits 9602 && !mode_lib->ms.support.NotEnoughDSCSlices 9603 && !mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe 9604 && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen 9605 && !mode_lib->ms.support.DSCCLKRequiredMoreThanSupported 9606 && mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport 9607 && !mode_lib->ms.support.DTBCLKRequiredMoreThanSupported 9608 && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPState 9609 && mode_lib->ms.support.ROBSupport 9610 && mode_lib->ms.support.OutstandingRequestsSupport 9611 && mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance 9612 && mode_lib->ms.support.DISPCLK_DPPCLK_Support 9613 && mode_lib->ms.support.TotalAvailablePipesSupport 9614 && mode_lib->ms.support.NumberOfOTGSupport 9615 && mode_lib->ms.support.NumberOfHDMIFRLSupport 9616 && mode_lib->ms.support.NumberOfDP2p0Support 9617 && mode_lib->ms.support.EnoughWritebackUnits 9618 && mode_lib->ms.support.WritebackLatencySupport 9619 && mode_lib->ms.support.WritebackScaleRatioAndTapsSupport 9620 && mode_lib->ms.support.CursorSupport 9621 && mode_lib->ms.support.PitchSupport 9622 && !mode_lib->ms.support.ViewportExceedsSurface 9623 && mode_lib->ms.support.PrefetchSupported 9624 && mode_lib->ms.support.EnoughUrgentLatencyHidingSupport 9625 && mode_lib->ms.support.AvgBandwidthSupport 9626 && mode_lib->ms.support.DynamicMetadataSupported 9627 && mode_lib->ms.support.VRatioInPrefetchSupported 9628 && mode_lib->ms.support.PTEBufferSizeNotExceeded 9629 && mode_lib->ms.support.DCCMetaBufferSizeNotExceeded 9630 && !mode_lib->ms.support.ExceededMALLSize 9631 && mode_lib->ms.support.g6_temp_read_support 9632 && ((!display_cfg->hostvm_enable && !s->ImmediateFlipRequired) || mode_lib->ms.support.ImmediateFlipSupport)) { 9633 DML_LOG_VERBOSE("DML::%s: mode is supported\n", __func__); 9634 mode_lib->ms.support.ModeSupport = true; 9635 } else { 9636 DML_LOG_VERBOSE("DML::%s: mode is NOT supported\n", __func__); 9637 mode_lib->ms.support.ModeSupport = false; 9638 } 9639 } 9640 9641 // Since now the mode_support work on 1 particular power state, so there is only 1 state idx (index 0). 9642 DML_LOG_VERBOSE("DML::%s: ModeSupport = %u\n", __func__, mode_lib->ms.support.ModeSupport); 9643 DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport); 9644 9645 for (k = 0; k < mode_lib->ms.num_active_planes; k++) { 9646 mode_lib->ms.support.MPCCombineEnable[k] = mode_lib->ms.MPCCombine[k]; 9647 mode_lib->ms.support.DPPPerSurface[k] = mode_lib->ms.NoOfDPP[k]; 9648 } 9649 9650 for (k = 0; k < mode_lib->ms.num_active_planes; k++) { 9651 mode_lib->ms.support.ODMMode[k] = mode_lib->ms.ODMMode[k]; 9652 mode_lib->ms.support.DSCEnabled[k] = mode_lib->ms.RequiresDSC[k]; 9653 mode_lib->ms.support.FECEnabled[k] = mode_lib->ms.RequiresFEC[k]; 9654 mode_lib->ms.support.OutputBpp[k] = mode_lib->ms.OutputBpp[k]; 9655 mode_lib->ms.support.OutputType[k] = mode_lib->ms.OutputType[k]; 9656 mode_lib->ms.support.OutputRate[k] = mode_lib->ms.OutputRate[k]; 9657 9658 #if defined(__DML_VBA_DEBUG__) 9659 DML_LOG_VERBOSE("DML::%s: k=%d, ODMMode = %u\n", __func__, k, mode_lib->ms.support.ODMMode[k]); 9660 DML_LOG_VERBOSE("DML::%s: k=%d, DSCEnabled = %u\n", __func__, k, mode_lib->ms.support.DSCEnabled[k]); 9661 #endif 9662 } 9663 9664 #if defined(__DML_VBA_DEBUG__) 9665 if (!mode_lib->ms.support.ModeSupport) 9666 dml2_print_mode_support_info(&mode_lib->ms.support, true); 9667 9668 DML_LOG_VERBOSE("DML::%s: --- DONE --- \n", __func__); 9669 #endif 9670 9671 return mode_lib->ms.support.ModeSupport; 9672 } 9673 9674 unsigned int dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support_ex *in_out_params) 9675 { 9676 unsigned int result; 9677 9678 DML_LOG_VERBOSE("DML::%s: ------------- START ----------\n", __func__); 9679 result = dml_core_mode_support(in_out_params); 9680 9681 if (result) 9682 *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support; 9683 9684 DML_LOG_VERBOSE("DML::%s: is_mode_support = %u (min_clk_index=%d)\n", __func__, result, in_out_params->min_clk_index); 9685 9686 for (unsigned int k = 0; k < in_out_params->in_display_cfg->num_planes; k++) 9687 DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, in_out_params->in_display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); 9688 9689 DML_LOG_VERBOSE("DML::%s: ------------- DONE ----------\n", __func__); 9690 9691 return result; 9692 } 9693 9694 static void CalculatePixelDeliveryTimes( 9695 const struct dml2_display_cfg *display_cfg, 9696 const struct core_display_cfg_support_info *cfg_support_info, 9697 unsigned int NumberOfActiveSurfaces, 9698 double VRatioPrefetchY[], 9699 double VRatioPrefetchC[], 9700 unsigned int swath_width_luma_ub[], 9701 unsigned int swath_width_chroma_ub[], 9702 double PSCL_THROUGHPUT[], 9703 double PSCL_THROUGHPUT_CHROMA[], 9704 double Dppclk[], 9705 unsigned int BytePerPixelC[], 9706 unsigned int req_per_swath_ub_l[], 9707 unsigned int req_per_swath_ub_c[], 9708 9709 // Output 9710 double DisplayPipeLineDeliveryTimeLuma[], 9711 double DisplayPipeLineDeliveryTimeChroma[], 9712 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 9713 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 9714 double DisplayPipeRequestDeliveryTimeLuma[], 9715 double DisplayPipeRequestDeliveryTimeChroma[], 9716 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 9717 double DisplayPipeRequestDeliveryTimeChromaPrefetch[]) 9718 { 9719 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { 9720 double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); 9721 9722 #ifdef __DML_VBA_DEBUG__ 9723 DML_LOG_VERBOSE("DML::%s: k=%u : HRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); 9724 DML_LOG_VERBOSE("DML::%s: k=%u : VRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); 9725 DML_LOG_VERBOSE("DML::%s: k=%u : HRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio); 9726 DML_LOG_VERBOSE("DML::%s: k=%u : VRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio); 9727 DML_LOG_VERBOSE("DML::%s: k=%u : VRatioPrefetchY = %f\n", __func__, k, VRatioPrefetchY[k]); 9728 DML_LOG_VERBOSE("DML::%s: k=%u : VRatioPrefetchC = %f\n", __func__, k, VRatioPrefetchC[k]); 9729 DML_LOG_VERBOSE("DML::%s: k=%u : swath_width_luma_ub = %u\n", __func__, k, swath_width_luma_ub[k]); 9730 DML_LOG_VERBOSE("DML::%s: k=%u : swath_width_chroma_ub = %u\n", __func__, k, swath_width_chroma_ub[k]); 9731 DML_LOG_VERBOSE("DML::%s: k=%u : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]); 9732 DML_LOG_VERBOSE("DML::%s: k=%u : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]); 9733 DML_LOG_VERBOSE("DML::%s: k=%u : DPPPerSurface = %u\n", __func__, k, cfg_support_info->plane_support_info[k].dpps_used); 9734 DML_LOG_VERBOSE("DML::%s: k=%u : pixel_clock_mhz = %f\n", __func__, k, pixel_clock_mhz); 9735 DML_LOG_VERBOSE("DML::%s: k=%u : Dppclk = %f\n", __func__, k, Dppclk[k]); 9736 #endif 9737 if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) { 9738 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz; 9739 } else { 9740 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; 9741 } 9742 9743 if (BytePerPixelC[k] == 0) { 9744 DisplayPipeLineDeliveryTimeChroma[k] = 0; 9745 } else { 9746 if (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio <= 1) { 9747 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_clock_mhz; 9748 } else { 9749 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; 9750 } 9751 } 9752 9753 if (VRatioPrefetchY[k] <= 1) { 9754 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz; 9755 } else { 9756 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; 9757 } 9758 9759 if (BytePerPixelC[k] == 0) { 9760 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; 9761 } else { 9762 if (VRatioPrefetchC[k] <= 1) { 9763 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_clock_mhz; 9764 } else { 9765 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; 9766 } 9767 } 9768 #ifdef __DML_VBA_DEBUG__ 9769 DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); 9770 DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); 9771 DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); 9772 DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); 9773 #endif 9774 } 9775 9776 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { 9777 9778 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub_l[k]; 9779 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub_l[k]; 9780 if (BytePerPixelC[k] == 0) { 9781 DisplayPipeRequestDeliveryTimeChroma[k] = 0; 9782 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; 9783 } else { 9784 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub_c[k]; 9785 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub_c[k]; 9786 } 9787 #ifdef __DML_VBA_DEBUG__ 9788 DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); 9789 DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); 9790 DML_LOG_VERBOSE("DML::%s: k=%u : req_per_swath_ub_l = %d\n", __func__, k, req_per_swath_ub_l[k]); 9791 DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); 9792 DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); 9793 DML_LOG_VERBOSE("DML::%s: k=%u : req_per_swath_ub_c = %d\n", __func__, k, req_per_swath_ub_c[k]); 9794 #endif 9795 } 9796 } 9797 9798 static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTETimes_params *p) 9799 { 9800 unsigned int meta_chunk_width; 9801 unsigned int min_meta_chunk_width; 9802 unsigned int meta_chunk_per_row_int; 9803 unsigned int meta_row_remainder; 9804 unsigned int meta_chunk_threshold; 9805 unsigned int meta_chunks_per_row_ub; 9806 unsigned int meta_chunk_width_chroma; 9807 unsigned int min_meta_chunk_width_chroma; 9808 unsigned int meta_chunk_per_row_int_chroma; 9809 unsigned int meta_row_remainder_chroma; 9810 unsigned int meta_chunk_threshold_chroma; 9811 unsigned int meta_chunks_per_row_ub_chroma; 9812 unsigned int dpte_group_width_luma; 9813 unsigned int dpte_groups_per_row_luma_ub; 9814 unsigned int dpte_group_width_chroma; 9815 unsigned int dpte_groups_per_row_chroma_ub; 9816 double pixel_clock_mhz; 9817 9818 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { 9819 p->DST_Y_PER_PTE_ROW_NOM_L[k] = p->dpte_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; 9820 if (p->BytePerPixelC[k] == 0) { 9821 p->DST_Y_PER_PTE_ROW_NOM_C[k] = 0; 9822 } else { 9823 p->DST_Y_PER_PTE_ROW_NOM_C[k] = p->dpte_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; 9824 } 9825 p->DST_Y_PER_META_ROW_NOM_L[k] = p->meta_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; 9826 if (p->BytePerPixelC[k] == 0) { 9827 p->DST_Y_PER_META_ROW_NOM_C[k] = 0; 9828 } else { 9829 p->DST_Y_PER_META_ROW_NOM_C[k] = p->meta_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; 9830 } 9831 } 9832 9833 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { 9834 if (p->display_cfg->plane_descriptors[k].surface.dcc.enable == true && p->mrq_present) { 9835 meta_chunk_width = p->MetaChunkSize * 1024 * 256 / p->BytePerPixelY[k] / p->meta_row_height[k]; 9836 min_meta_chunk_width = p->MinMetaChunkSizeBytes * 256 / p->BytePerPixelY[k] / p->meta_row_height[k]; 9837 meta_chunk_per_row_int = p->meta_row_width[k] / meta_chunk_width; 9838 meta_row_remainder = p->meta_row_width[k] % meta_chunk_width; 9839 if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) { 9840 meta_chunk_threshold = 2 * min_meta_chunk_width - p->meta_req_width[k]; 9841 } else { 9842 meta_chunk_threshold = 2 * min_meta_chunk_width - p->meta_req_height[k]; 9843 } 9844 if (meta_row_remainder <= meta_chunk_threshold) { 9845 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; 9846 } else { 9847 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; 9848 } 9849 p->TimePerMetaChunkNominal[k] = p->meta_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio * 9850 p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / 9851 (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub; 9852 p->TimePerMetaChunkVBlank[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / 9853 (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub; 9854 p->TimePerMetaChunkFlip[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / 9855 (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub; 9856 if (p->BytePerPixelC[k] == 0) { 9857 p->TimePerChromaMetaChunkNominal[k] = 0; 9858 p->TimePerChromaMetaChunkVBlank[k] = 0; 9859 p->TimePerChromaMetaChunkFlip[k] = 0; 9860 } else { 9861 meta_chunk_width_chroma = p->MetaChunkSize * 1024 * 256 / p->BytePerPixelC[k] / p->meta_row_height_chroma[k]; 9862 min_meta_chunk_width_chroma = p->MinMetaChunkSizeBytes * 256 / p->BytePerPixelC[k] / p->meta_row_height_chroma[k]; 9863 meta_chunk_per_row_int_chroma = (unsigned int)((double)p->meta_row_width_chroma[k] / meta_chunk_width_chroma); 9864 meta_row_remainder_chroma = p->meta_row_width_chroma[k] % meta_chunk_width_chroma; 9865 if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) { 9866 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - p->meta_req_width_chroma[k]; 9867 } else { 9868 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - p->meta_req_height_chroma[k]; 9869 } 9870 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) { 9871 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; 9872 } else { 9873 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; 9874 } 9875 p->TimePerChromaMetaChunkNominal[k] = p->meta_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma; 9876 p->TimePerChromaMetaChunkVBlank[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma; 9877 p->TimePerChromaMetaChunkFlip[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma; 9878 } 9879 } else { 9880 p->TimePerMetaChunkNominal[k] = 0; 9881 p->TimePerMetaChunkVBlank[k] = 0; 9882 p->TimePerMetaChunkFlip[k] = 0; 9883 p->TimePerChromaMetaChunkNominal[k] = 0; 9884 p->TimePerChromaMetaChunkVBlank[k] = 0; 9885 p->TimePerChromaMetaChunkFlip[k] = 0; 9886 } 9887 9888 #ifdef __DML_VBA_DEBUG__ 9889 DML_LOG_VERBOSE("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_L[k]); 9890 DML_LOG_VERBOSE("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_C[k]); 9891 DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkNominal = %f\n", __func__, k, p->TimePerMetaChunkNominal[k]); 9892 DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkVBlank = %f\n", __func__, k, p->TimePerMetaChunkVBlank[k]); 9893 DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkFlip = %f\n", __func__, k, p->TimePerMetaChunkFlip[k]); 9894 DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkNominal = %f\n", __func__, k, p->TimePerChromaMetaChunkNominal[k]); 9895 DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkVBlank = %f\n", __func__, k, p->TimePerChromaMetaChunkVBlank[k]); 9896 DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkFlip = %f\n", __func__, k, p->TimePerChromaMetaChunkFlip[k]); 9897 #endif 9898 } 9899 9900 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { 9901 p->DST_Y_PER_PTE_ROW_NOM_L[k] = p->dpte_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; 9902 if (p->BytePerPixelC[k] == 0) { 9903 p->DST_Y_PER_PTE_ROW_NOM_C[k] = 0; 9904 } else { 9905 p->DST_Y_PER_PTE_ROW_NOM_C[k] = p->dpte_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; 9906 } 9907 } 9908 9909 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { 9910 pixel_clock_mhz = ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); 9911 9912 if (p->display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut) 9913 p->time_per_tdlut_group[k] = 2 * p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / p->tdlut_groups_per_2row_ub[k]; 9914 else 9915 p->time_per_tdlut_group[k] = 0; 9916 9917 DML_LOG_VERBOSE("DML::%s: k=%u, time_per_tdlut_group = %f\n", __func__, k, p->time_per_tdlut_group[k]); 9918 9919 if (p->display_cfg->gpuvm_enable == true) { 9920 if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) { 9921 dpte_group_width_luma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeY[k] * p->PixelPTEReqWidthY[k]); 9922 } else { 9923 dpte_group_width_luma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeY[k] * p->PixelPTEReqHeightY[k]); 9924 } 9925 if (p->use_one_row_for_frame[k]) { 9926 dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma / 2.0, 1.0)); 9927 } else { 9928 dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma, 1.0)); 9929 } 9930 if (dpte_groups_per_row_luma_ub <= 2) { 9931 dpte_groups_per_row_luma_ub = dpte_groups_per_row_luma_ub + 1; 9932 } 9933 DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); 9934 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, p->dpte_group_bytes[k]); 9935 DML_LOG_VERBOSE("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, p->PTERequestSizeY[k]); 9936 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEReqWidthY = %u\n", __func__, k, p->PixelPTEReqWidthY[k]); 9937 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEReqHeightY = %u\n", __func__, k, p->PixelPTEReqHeightY[k]); 9938 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]); 9939 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_width_luma = %u\n", __func__, k, dpte_group_width_luma); 9940 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_groups_per_row_luma_ub = %u\n", __func__, k, dpte_groups_per_row_luma_ub); 9941 9942 p->time_per_pte_group_nom_luma[k] = p->DST_Y_PER_PTE_ROW_NOM_L[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub; 9943 p->time_per_pte_group_vblank_luma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub; 9944 p->time_per_pte_group_flip_luma[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub; 9945 if (p->BytePerPixelC[k] == 0) { 9946 p->time_per_pte_group_nom_chroma[k] = 0; 9947 p->time_per_pte_group_vblank_chroma[k] = 0; 9948 p->time_per_pte_group_flip_chroma[k] = 0; 9949 } else { 9950 if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) { 9951 dpte_group_width_chroma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeC[k] * p->PixelPTEReqWidthC[k]); 9952 } else { 9953 dpte_group_width_chroma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeC[k] * p->PixelPTEReqHeightC[k]); 9954 } 9955 9956 if (p->use_one_row_for_frame[k]) { 9957 dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma / 2.0, 1.0)); 9958 } else { 9959 dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma, 1.0)); 9960 } 9961 if (dpte_groups_per_row_chroma_ub <= 2) { 9962 dpte_groups_per_row_chroma_ub = dpte_groups_per_row_chroma_ub + 1; 9963 } 9964 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); 9965 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma); 9966 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub); 9967 9968 p->time_per_pte_group_nom_chroma[k] = p->DST_Y_PER_PTE_ROW_NOM_C[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub; 9969 p->time_per_pte_group_vblank_chroma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub; 9970 p->time_per_pte_group_flip_chroma[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub; 9971 } 9972 } else { 9973 p->time_per_pte_group_nom_luma[k] = 0; 9974 p->time_per_pte_group_vblank_luma[k] = 0; 9975 p->time_per_pte_group_flip_luma[k] = 0; 9976 p->time_per_pte_group_nom_chroma[k] = 0; 9977 p->time_per_pte_group_vblank_chroma[k] = 0; 9978 p->time_per_pte_group_flip_chroma[k] = 0; 9979 } 9980 #ifdef __DML_VBA_DEBUG__ 9981 DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_row_vblank = %f\n", __func__, k, p->dst_y_per_row_vblank[k]); 9982 DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_row_flip = %f\n", __func__, k, p->dst_y_per_row_flip[k]); 9983 9984 DML_LOG_VERBOSE("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_L[k]); 9985 DML_LOG_VERBOSE("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_C[k]); 9986 DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_nom_luma = %f\n", __func__, k, p->time_per_pte_group_nom_luma[k]); 9987 DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_vblank_luma = %f\n", __func__, k, p->time_per_pte_group_vblank_luma[k]); 9988 DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_flip_luma = %f\n", __func__, k, p->time_per_pte_group_flip_luma[k]); 9989 DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_nom_chroma = %f\n", __func__, k, p->time_per_pte_group_nom_chroma[k]); 9990 DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_vblank_chroma = %f\n", __func__, k, p->time_per_pte_group_vblank_chroma[k]); 9991 DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_flip_chroma = %f\n", __func__, k, p->time_per_pte_group_flip_chroma[k]); 9992 #endif 9993 } 9994 } // CalculateMetaAndPTETimes 9995 9996 static void CalculateVMGroupAndRequestTimes( 9997 const struct dml2_display_cfg *display_cfg, 9998 unsigned int NumberOfActiveSurfaces, 9999 unsigned int BytePerPixelC[], 10000 double dst_y_per_vm_vblank[], 10001 double dst_y_per_vm_flip[], 10002 unsigned int dpte_row_width_luma_ub[], 10003 unsigned int dpte_row_width_chroma_ub[], 10004 unsigned int vm_group_bytes[], 10005 unsigned int dpde0_bytes_per_frame_ub_l[], 10006 unsigned int dpde0_bytes_per_frame_ub_c[], 10007 unsigned int tdlut_pte_bytes_per_frame[], 10008 unsigned int meta_pte_bytes_per_frame_ub_l[], 10009 unsigned int meta_pte_bytes_per_frame_ub_c[], 10010 bool mrq_present, 10011 10012 // Output 10013 double TimePerVMGroupVBlank[], 10014 double TimePerVMGroupFlip[], 10015 double TimePerVMRequestVBlank[], 10016 double TimePerVMRequestFlip[]) 10017 { 10018 (void)dpte_row_width_luma_ub; 10019 (void)dpte_row_width_chroma_ub; 10020 unsigned int num_group_per_lower_vm_stage = 0; 10021 unsigned int num_req_per_lower_vm_stage = 0; 10022 unsigned int num_group_per_lower_vm_stage_flip; 10023 unsigned int num_group_per_lower_vm_stage_pref; 10024 unsigned int num_req_per_lower_vm_stage_flip; 10025 unsigned int num_req_per_lower_vm_stage_pref; 10026 double line_time; 10027 10028 #ifdef __DML_VBA_DEBUG__ 10029 DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); 10030 #endif 10031 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { 10032 double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); 10033 bool dcc_mrq_enable = display_cfg->plane_descriptors[k].surface.dcc.enable && mrq_present; 10034 #ifdef __DML_VBA_DEBUG__ 10035 DML_LOG_VERBOSE("DML::%s: k=%u, dcc_mrq_enable = %u\n", __func__, k, dcc_mrq_enable); 10036 DML_LOG_VERBOSE("DML::%s: k=%u, vm_group_bytes = %u\n", __func__, k, vm_group_bytes[k]); 10037 DML_LOG_VERBOSE("DML::%s: k=%u, dpde0_bytes_per_frame_ub_l = %u\n", __func__, k, dpde0_bytes_per_frame_ub_l[k]); 10038 DML_LOG_VERBOSE("DML::%s: k=%u, dpde0_bytes_per_frame_ub_c = %u\n", __func__, k, dpde0_bytes_per_frame_ub_c[k]); 10039 DML_LOG_VERBOSE("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_l = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_l[k]); 10040 DML_LOG_VERBOSE("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_c = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_c[k]); 10041 #endif 10042 10043 if (display_cfg->gpuvm_enable) { 10044 if (display_cfg->gpuvm_max_page_table_levels >= 2) { 10045 num_group_per_lower_vm_stage += (unsigned int) math_ceil2((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 10046 10047 if (BytePerPixelC[k] > 0) 10048 num_group_per_lower_vm_stage += (unsigned int) math_ceil2((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 10049 } 10050 10051 if (dcc_mrq_enable) { 10052 if (BytePerPixelC[k] > 0) { 10053 num_group_per_lower_vm_stage += (unsigned int)(2.0 /*for each mpde0 group*/ + math_ceil2((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) + 10054 math_ceil2((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)); 10055 } else { 10056 num_group_per_lower_vm_stage += (unsigned int)(1.0 + math_ceil2((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)); 10057 } 10058 } 10059 10060 num_group_per_lower_vm_stage_flip = num_group_per_lower_vm_stage; 10061 num_group_per_lower_vm_stage_pref = num_group_per_lower_vm_stage; 10062 10063 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && display_cfg->gpuvm_enable) { 10064 num_group_per_lower_vm_stage_pref += (unsigned int) math_ceil2(tdlut_pte_bytes_per_frame[k] / vm_group_bytes[k], 1); 10065 if (display_cfg->gpuvm_max_page_table_levels >= 2) 10066 num_group_per_lower_vm_stage_pref += 1; // tdpe0 group 10067 } 10068 10069 if (display_cfg->gpuvm_max_page_table_levels >= 2) { 10070 num_req_per_lower_vm_stage += dpde0_bytes_per_frame_ub_l[k] / 64; 10071 if (BytePerPixelC[k] > 0) 10072 num_req_per_lower_vm_stage += dpde0_bytes_per_frame_ub_c[k]; 10073 } 10074 10075 if (dcc_mrq_enable) { 10076 num_req_per_lower_vm_stage += meta_pte_bytes_per_frame_ub_l[k] / 64; 10077 if (BytePerPixelC[k] > 0) 10078 num_req_per_lower_vm_stage += meta_pte_bytes_per_frame_ub_c[k] / 64; 10079 } 10080 10081 num_req_per_lower_vm_stage_flip = num_req_per_lower_vm_stage; 10082 num_req_per_lower_vm_stage_pref = num_req_per_lower_vm_stage; 10083 10084 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && display_cfg->gpuvm_enable) { 10085 num_req_per_lower_vm_stage_pref += tdlut_pte_bytes_per_frame[k] / 64; 10086 } 10087 10088 line_time = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz; 10089 10090 if (num_group_per_lower_vm_stage_pref > 0) 10091 TimePerVMGroupVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_group_per_lower_vm_stage_pref; 10092 else 10093 TimePerVMGroupVBlank[k] = 0; 10094 10095 if (num_group_per_lower_vm_stage_flip > 0) 10096 TimePerVMGroupFlip[k] = dst_y_per_vm_flip[k] * line_time / num_group_per_lower_vm_stage_flip; 10097 else 10098 TimePerVMGroupFlip[k] = 0; 10099 10100 if (num_req_per_lower_vm_stage_pref > 0) 10101 TimePerVMRequestVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_req_per_lower_vm_stage_pref; 10102 else 10103 TimePerVMRequestVBlank[k] = 0.0; 10104 if (num_req_per_lower_vm_stage_flip > 0) 10105 TimePerVMRequestFlip[k] = dst_y_per_vm_flip[k] * line_time / num_req_per_lower_vm_stage_flip; 10106 else 10107 TimePerVMRequestFlip[k] = 0.0; 10108 10109 DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_vm_vblank = %f\n", __func__, k, dst_y_per_vm_vblank[k]); 10110 DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_vm_flip = %f\n", __func__, k, dst_y_per_vm_flip[k]); 10111 DML_LOG_VERBOSE("DML::%s: k=%u, line_time = %f\n", __func__, k, line_time); 10112 DML_LOG_VERBOSE("DML::%s: k=%u, num_group_per_lower_vm_stage_pref = %d\n", __func__, k, num_group_per_lower_vm_stage_pref); 10113 DML_LOG_VERBOSE("DML::%s: k=%u, num_group_per_lower_vm_stage_flip = %d\n", __func__, k, num_group_per_lower_vm_stage_flip); 10114 DML_LOG_VERBOSE("DML::%s: k=%u, num_req_per_lower_vm_stage_pref = %d\n", __func__, k, num_req_per_lower_vm_stage_pref); 10115 DML_LOG_VERBOSE("DML::%s: k=%u, num_req_per_lower_vm_stage_flip = %d\n", __func__, k, num_req_per_lower_vm_stage_flip); 10116 10117 if (display_cfg->gpuvm_max_page_table_levels > 2) { 10118 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; 10119 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; 10120 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; 10121 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; 10122 } 10123 10124 } else { 10125 TimePerVMGroupVBlank[k] = 0; 10126 TimePerVMGroupFlip[k] = 0; 10127 TimePerVMRequestVBlank[k] = 0; 10128 TimePerVMRequestFlip[k] = 0; 10129 } 10130 10131 #ifdef __DML_VBA_DEBUG__ 10132 DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]); 10133 DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]); 10134 DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]); 10135 DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]); 10136 #endif 10137 } 10138 } 10139 10140 static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratch, 10141 struct dml2_core_calcs_CalculateStutterEfficiency_params *p) 10142 { 10143 struct dml2_core_calcs_CalculateStutterEfficiency_locals *l = &scratch->CalculateStutterEfficiency_locals; 10144 10145 unsigned int TotalNumberOfActiveOTG = 0; 10146 double SinglePixelClock = 0; 10147 unsigned int SingleHTotal = 0; 10148 unsigned int SingleVTotal = 0; 10149 bool SameTiming = true; 10150 bool FoundCriticalSurface = false; 10151 10152 memset(l, 0, sizeof(struct dml2_core_calcs_CalculateStutterEfficiency_locals)); 10153 10154 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { 10155 if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) { 10156 if (p->display_cfg->plane_descriptors[k].surface.dcc.enable == true) { 10157 if ((dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockWidth256BytesY[k] > p->SwathHeightY[k]) || (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockHeight256BytesY[k] > p->SwathHeightY[k]) || p->DCCYMaxUncompressedBlock[k] < 256) { 10158 l->MaximumEffectiveCompressionLuma = 2; 10159 } else { 10160 l->MaximumEffectiveCompressionLuma = 4; 10161 } 10162 l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0, l->MaximumEffectiveCompressionLuma); 10163 #ifdef __DML_VBA_DEBUG__ 10164 DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]); 10165 DML_LOG_VERBOSE("DML::%s: k=%u, NetDCCRateLuma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0); 10166 DML_LOG_VERBOSE("DML::%s: k=%u, MaximumEffectiveCompressionLuma = %f\n", __func__, k, l->MaximumEffectiveCompressionLuma); 10167 #endif 10168 l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0; 10169 l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0 / l->MaximumEffectiveCompressionLuma; 10170 10171 if (p->ReadBandwidthSurfaceChroma[k] > 0) { 10172 if ((dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockWidth256BytesC[k] > p->SwathHeightC[k]) || (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockHeight256BytesC[k] > p->SwathHeightC[k]) || p->DCCCMaxUncompressedBlock[k] < 256) { 10173 l->MaximumEffectiveCompressionChroma = 2; 10174 } else { 10175 l->MaximumEffectiveCompressionChroma = 4; 10176 } 10177 l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1, l->MaximumEffectiveCompressionChroma); 10178 #ifdef __DML_VBA_DEBUG__ 10179 DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, p->ReadBandwidthSurfaceChroma[k]); 10180 DML_LOG_VERBOSE("DML::%s: k=%u, NetDCCRateChroma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1); 10181 DML_LOG_VERBOSE("DML::%s: k=%u, MaximumEffectiveCompressionChroma = %f\n", __func__, k, l->MaximumEffectiveCompressionChroma); 10182 #endif 10183 l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1; 10184 l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1 / l->MaximumEffectiveCompressionChroma; 10185 } 10186 } else { 10187 l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] + p->ReadBandwidthSurfaceChroma[k]; 10188 } 10189 l->TotalRowReadBandwidth = l->TotalRowReadBandwidth + p->DPPPerSurface[k] * (p->meta_row_bw[k] + p->dpte_row_bw[k]); 10190 } 10191 } 10192 10193 l->AverageDCCCompressionRate = p->TotalDataReadBandwidth / l->TotalCompressedReadBandwidth; 10194 l->AverageDCCZeroSizeFraction = l->TotalZeroSizeRequestReadBandwidth / p->TotalDataReadBandwidth; 10195 10196 #ifdef __DML_VBA_DEBUG__ 10197 DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled); 10198 DML_LOG_VERBOSE("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, l->TotalCompressedReadBandwidth); 10199 DML_LOG_VERBOSE("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, l->TotalZeroSizeRequestReadBandwidth); 10200 DML_LOG_VERBOSE("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, l->TotalZeroSizeCompressedReadBandwidth); 10201 DML_LOG_VERBOSE("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, l->MaximumEffectiveCompressionLuma); 10202 DML_LOG_VERBOSE("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, l->MaximumEffectiveCompressionChroma); 10203 DML_LOG_VERBOSE("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate); 10204 DML_LOG_VERBOSE("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, l->AverageDCCZeroSizeFraction); 10205 10206 DML_LOG_VERBOSE("DML::%s: CompbufReservedSpace64B = %u (%f kbytes)\n", __func__, p->CompbufReservedSpace64B, p->CompbufReservedSpace64B * 64 / 1024.0); 10207 DML_LOG_VERBOSE("DML::%s: CompbufReservedSpaceZs = %u\n", __func__, p->CompbufReservedSpaceZs); 10208 DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u kbytes\n", __func__, p->CompressedBufferSizeInkByte); 10209 DML_LOG_VERBOSE("DML::%s: ROBBufferSizeInKByte = %u kbytes\n", __func__, p->ROBBufferSizeInKByte); 10210 #endif 10211 if (l->AverageDCCZeroSizeFraction == 1) { 10212 l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth; 10213 l->EffectiveCompressedBufferSize = (double)p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageZeroSizeCompressionRate + ((double)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 * l->AverageZeroSizeCompressionRate; 10214 10215 10216 } else if (l->AverageDCCZeroSizeFraction > 0) { 10217 l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth; 10218 l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate, 10219 (double)p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate)) + 10220 (p->rob_alloc_compressed ? math_min2(((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * l->AverageDCCCompressionRate, 10221 ((double)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate)) 10222 : ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64)); 10223 10224 10225 #ifdef __DML_VBA_DEBUG__ 10226 DML_LOG_VERBOSE("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate); 10227 DML_LOG_VERBOSE("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate)); 10228 DML_LOG_VERBOSE("DML::%s: min 3 = %d\n", __func__, (p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64)); 10229 DML_LOG_VERBOSE("DML::%s: min 4 = %f\n", __func__, (p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate)); 10230 #endif 10231 } else { 10232 l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate, 10233 (double)p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate) + 10234 ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * (p->rob_alloc_compressed ? l->AverageDCCCompressionRate : 1.0); 10235 10236 #ifdef __DML_VBA_DEBUG__ 10237 DML_LOG_VERBOSE("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate); 10238 DML_LOG_VERBOSE("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate); 10239 #endif 10240 } 10241 10242 #ifdef __DML_VBA_DEBUG__ 10243 DML_LOG_VERBOSE("DML::%s: MetaFIFOSizeInKEntries = %u\n", __func__, p->MetaFIFOSizeInKEntries); 10244 DML_LOG_VERBOSE("DML::%s: ZeroSizeBufferEntries = %u\n", __func__, p->ZeroSizeBufferEntries); 10245 DML_LOG_VERBOSE("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, l->AverageZeroSizeCompressionRate); 10246 DML_LOG_VERBOSE("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0); 10247 #endif 10248 10249 *p->StutterPeriod = 0; 10250 10251 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { 10252 if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) { 10253 l->LinesInDETY = ((double)p->DETBufferSizeY[k] + (p->UnboundedRequestEnabled == true ? l->EffectiveCompressedBufferSize : 0) * p->ReadBandwidthSurfaceLuma[k] / p->TotalDataReadBandwidth) / p->BytePerPixelDETY[k] / p->SwathWidthY[k]; 10254 l->LinesInDETYRoundedDownToSwath = math_floor2(l->LinesInDETY, p->SwathHeightY[k]); 10255 l->DETBufferingTimeY = l->LinesInDETYRoundedDownToSwath * ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; 10256 #ifdef __DML_VBA_DEBUG__ 10257 DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferSizeY = %u (%u kbytes)\n", __func__, k, p->DETBufferSizeY[k], p->DETBufferSizeY[k] / 1024); 10258 DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]); 10259 DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]); 10260 DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]); 10261 DML_LOG_VERBOSE("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, p->TotalDataReadBandwidth); 10262 DML_LOG_VERBOSE("DML::%s: k=%u, LinesInDETY = %f\n", __func__, k, l->LinesInDETY); 10263 DML_LOG_VERBOSE("DML::%s: k=%u, LinesInDETYRoundedDownToSwath = %f\n", __func__, k, l->LinesInDETYRoundedDownToSwath); 10264 DML_LOG_VERBOSE("DML::%s: k=%u, VRatio = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); 10265 DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferingTimeY = %f\n", __func__, k, l->DETBufferingTimeY); 10266 #endif 10267 10268 if (!FoundCriticalSurface || l->DETBufferingTimeY < *p->StutterPeriod) { 10269 bool isInterlaceTiming = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !p->ProgressiveToInterlaceUnitInOPP; 10270 10271 FoundCriticalSurface = true; 10272 *p->StutterPeriod = l->DETBufferingTimeY; 10273 l->FrameTimeCriticalSurface = (isInterlaceTiming ? math_floor2((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total / 2.0, 1.0) : p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total) * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); 10274 l->VActiveTimeCriticalSurface = (isInterlaceTiming ? math_floor2((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_active / 2.0, 1.0) : p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_active) * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); 10275 l->BytePerPixelYCriticalSurface = p->BytePerPixelY[k]; 10276 l->SwathWidthYCriticalSurface = p->SwathWidthY[k]; 10277 l->SwathHeightYCriticalSurface = p->SwathHeightY[k]; 10278 l->BlockWidth256BytesYCriticalSurface = p->BlockWidth256BytesY[k]; 10279 l->DETBufferSizeYCriticalSurface = p->DETBufferSizeY[k]; 10280 l->MinTTUVBlankCriticalSurface = p->MinTTUVBlank[k]; 10281 l->SinglePlaneCriticalSurface = (p->ReadBandwidthSurfaceChroma[k] == 0); 10282 l->SinglePipeCriticalSurface = (p->DPPPerSurface[k] == 1); 10283 10284 #ifdef __DML_VBA_DEBUG__ 10285 DML_LOG_VERBOSE("DML::%s: k=%u, FoundCriticalSurface = %u\n", __func__, k, FoundCriticalSurface); 10286 DML_LOG_VERBOSE("DML::%s: k=%u, StutterPeriod = %f\n", __func__, k, *p->StutterPeriod); 10287 DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlankCriticalSurface = %f\n", __func__, k, l->MinTTUVBlankCriticalSurface); 10288 DML_LOG_VERBOSE("DML::%s: k=%u, FrameTimeCriticalSurface= %f\n", __func__, k, l->FrameTimeCriticalSurface); 10289 DML_LOG_VERBOSE("DML::%s: k=%u, VActiveTimeCriticalSurface = %f\n", __func__, k, l->VActiveTimeCriticalSurface); 10290 DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelYCriticalSurface = %u\n", __func__, k, l->BytePerPixelYCriticalSurface); 10291 DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthYCriticalSurface = %f\n", __func__, k, l->SwathWidthYCriticalSurface); 10292 DML_LOG_VERBOSE("DML::%s: k=%u, SwathHeightYCriticalSurface = %f\n", __func__, k, l->SwathHeightYCriticalSurface); 10293 DML_LOG_VERBOSE("DML::%s: k=%u, BlockWidth256BytesYCriticalSurface = %u\n", __func__, k, l->BlockWidth256BytesYCriticalSurface); 10294 DML_LOG_VERBOSE("DML::%s: k=%u, SinglePlaneCriticalSurface = %u\n", __func__, k, l->SinglePlaneCriticalSurface); 10295 DML_LOG_VERBOSE("DML::%s: k=%u, SinglePipeCriticalSurface = %u\n", __func__, k, l->SinglePipeCriticalSurface); 10296 #endif 10297 } 10298 } 10299 } 10300 10301 // for bounded req, the stutter period is calculated only based on DET size, but during burst there can be some return inside ROB/compressed buffer 10302 // stutter period is calculated only on the det sizing 10303 // if (cdb + rob >= det) the stutter burst will be absorbed by the cdb + rob which is before decompress 10304 // else 10305 // the cdb + rob part will be in compressed rate with urg bw (idea bw) 10306 // the det part will be return at uncompressed rate with 64B/dcfclk 10307 // 10308 // for unbounded req, the stutter period should be calculated as total of CDB+ROB+DET, so the term "PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer" 10309 // should be == EffectiveCompressedBufferSize which will returned a compressed rate, the rest of stutter period is from the DET will be returned at uncompressed rate with 64B/dcfclk 10310 10311 l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = math_min2(*p->StutterPeriod * p->TotalDataReadBandwidth, l->EffectiveCompressedBufferSize); 10312 #ifdef __DML_VBA_DEBUG__ 10313 DML_LOG_VERBOSE("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate); 10314 DML_LOG_VERBOSE("DML::%s: StutterPeriod*TotalDataReadBandwidth = %f (%f kbytes)\n", __func__, *p->StutterPeriod * p->TotalDataReadBandwidth, (*p->StutterPeriod * p->TotalDataReadBandwidth) / 1024.0); 10315 DML_LOG_VERBOSE("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0); 10316 DML_LOG_VERBOSE("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f (%f kbytes)\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / 1024); 10317 DML_LOG_VERBOSE("DML::%s: ReturnBW = %f\n", __func__, p->ReturnBW); 10318 DML_LOG_VERBOSE("DML::%s: TotalDataReadBandwidth = %f\n", __func__, p->TotalDataReadBandwidth); 10319 DML_LOG_VERBOSE("DML::%s: TotalRowReadBandwidth = %f\n", __func__, l->TotalRowReadBandwidth); 10320 DML_LOG_VERBOSE("DML::%s: DCFCLK = %f\n", __func__, p->DCFCLK); 10321 #endif 10322 10323 l->StutterBurstTime = l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer 10324 / (p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) + 10325 (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) 10326 / math_min2(p->DCFCLK * 64, p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) + 10327 *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW; 10328 #ifdef __DML_VBA_DEBUG__ 10329 DML_LOG_VERBOSE("DML::%s: Part 1 = %f\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / p->ReturnBW / (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)); 10330 DML_LOG_VERBOSE("DML::%s: Part 2 = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64)); 10331 DML_LOG_VERBOSE("DML::%s: Part 3 = %f\n", __func__, *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW); 10332 DML_LOG_VERBOSE("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime); 10333 #endif 10334 l->TotalActiveWriteback = 0; 10335 memset(l->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool)); 10336 10337 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { 10338 if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) { 10339 if (!l->stream_visited[p->display_cfg->plane_descriptors[k].stream_index]) { 10340 10341 if (p->display_cfg->stream_descriptors[k].writeback.active_writebacks_per_stream > 0) 10342 l->TotalActiveWriteback = l->TotalActiveWriteback + 1; 10343 10344 if (TotalNumberOfActiveOTG == 0) { // first otg 10345 SinglePixelClock = ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); 10346 SingleHTotal = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total; 10347 SingleVTotal = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total; 10348 } else if (SinglePixelClock != ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) || 10349 SingleHTotal != p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total || 10350 SingleVTotal != p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total) { 10351 SameTiming = false; 10352 } 10353 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; 10354 l->stream_visited[p->display_cfg->plane_descriptors[k].stream_index] = 1; 10355 } 10356 } 10357 } 10358 10359 if (l->TotalActiveWriteback == 0) { 10360 #ifdef __DML_VBA_DEBUG__ 10361 DML_LOG_VERBOSE("DML::%s: SRExitTime = %f\n", __func__, p->SRExitTime); 10362 DML_LOG_VERBOSE("DML::%s: SRExitZ8Time = %f\n", __func__, p->SRExitZ8Time); 10363 DML_LOG_VERBOSE("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod); 10364 #endif 10365 *p->StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitTime + l->StutterBurstTime) / *p->StutterPeriod) * 100; 10366 *p->Z8StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitZ8Time + l->StutterBurstTime) / *p->StutterPeriod) * 100; 10367 *p->NumberOfStutterBurstsPerFrame = (*p->StutterEfficiencyNotIncludingVBlank > 0 ? (unsigned int)(math_ceil2(l->VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0); 10368 *p->Z8NumberOfStutterBurstsPerFrame = (*p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? (unsigned int)(math_ceil2(l->VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0); 10369 } else { 10370 *p->StutterEfficiencyNotIncludingVBlank = 0.; 10371 *p->Z8StutterEfficiencyNotIncludingVBlank = 0.; 10372 *p->NumberOfStutterBurstsPerFrame = 0; 10373 *p->Z8NumberOfStutterBurstsPerFrame = 0; 10374 } 10375 #ifdef __DML_VBA_DEBUG__ 10376 DML_LOG_VERBOSE("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, l->VActiveTimeCriticalSurface); 10377 DML_LOG_VERBOSE("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank); 10378 DML_LOG_VERBOSE("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank); 10379 DML_LOG_VERBOSE("DML::%s: NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->NumberOfStutterBurstsPerFrame); 10380 DML_LOG_VERBOSE("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); 10381 #endif 10382 10383 if (*p->StutterEfficiencyNotIncludingVBlank > 0) { 10384 if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) { 10385 *p->StutterEfficiency = *p->StutterEfficiencyNotIncludingVBlank; 10386 } else { 10387 *p->StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitTime + l->StutterBurstTime * l->VActiveTimeCriticalSurface / *p->StutterPeriod) / l->FrameTimeCriticalSurface) * 100; 10388 } 10389 } else { 10390 *p->StutterEfficiency = 0; 10391 *p->NumberOfStutterBurstsPerFrame = 0; 10392 } 10393 10394 if (*p->Z8StutterEfficiencyNotIncludingVBlank > 0) { 10395 //LastZ8StutterPeriod = l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod; 10396 if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) { 10397 *p->Z8StutterEfficiency = *p->Z8StutterEfficiencyNotIncludingVBlank; 10398 } else { 10399 *p->Z8StutterEfficiency = (1 - (*p->Z8NumberOfStutterBurstsPerFrame * p->SRExitZ8Time + l->StutterBurstTime * l->VActiveTimeCriticalSurface / *p->StutterPeriod) / l->FrameTimeCriticalSurface) * 100; 10400 } 10401 } else { 10402 *p->Z8StutterEfficiency = 0.; 10403 *p->Z8NumberOfStutterBurstsPerFrame = 0; 10404 } 10405 10406 #ifdef __DML_VBA_DEBUG__ 10407 DML_LOG_VERBOSE("DML::%s: TotalNumberOfActiveOTG = %u\n", __func__, TotalNumberOfActiveOTG); 10408 DML_LOG_VERBOSE("DML::%s: SameTiming = %u\n", __func__, SameTiming); 10409 DML_LOG_VERBOSE("DML::%s: SynchronizeTimings = %u\n", __func__, p->SynchronizeTimings); 10410 DML_LOG_VERBOSE("DML::%s: LastZ8StutterPeriod = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod : 0); 10411 DML_LOG_VERBOSE("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Z8StutterEnterPlusExitWatermark); 10412 DML_LOG_VERBOSE("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime); 10413 DML_LOG_VERBOSE("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod); 10414 DML_LOG_VERBOSE("DML::%s: StutterEfficiency = %f\n", __func__, *p->StutterEfficiency); 10415 DML_LOG_VERBOSE("DML::%s: Z8StutterEfficiency = %f\n", __func__, *p->Z8StutterEfficiency); 10416 DML_LOG_VERBOSE("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank); 10417 DML_LOG_VERBOSE("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); 10418 #endif 10419 10420 *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && l->SinglePlaneCriticalSurface && l->SinglePipeCriticalSurface); 10421 10422 #ifdef __DML_VBA_DEBUG__ 10423 DML_LOG_VERBOSE("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, l->DETBufferSizeYCriticalSurface); 10424 DML_LOG_VERBOSE("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte); 10425 DML_LOG_VERBOSE("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); 10426 #endif 10427 } 10428 10429 static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex *in_out_params) 10430 { 10431 const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg; 10432 const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table; 10433 const struct core_display_cfg_support_info *cfg_support_info = in_out_params->cfg_support_info; 10434 struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib; 10435 struct dml2_display_cfg_programming *programming = in_out_params->programming; 10436 10437 struct dml2_core_calcs_mode_programming_locals *s = &mode_lib->scratch.dml_core_mode_programming_locals; 10438 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params; 10439 struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params; 10440 struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params; 10441 struct dml2_core_calcs_CalculateStutterEfficiency_params *CalculateStutterEfficiency_params = &mode_lib->scratch.CalculateStutterEfficiency_params; 10442 struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; 10443 struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params; 10444 struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params; 10445 struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params; 10446 struct dml2_core_shared_CalculateMetaAndPTETimes_params *CalculateMetaAndPTETimes_params = &mode_lib->scratch.CalculateMetaAndPTETimes_params; 10447 struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params; 10448 struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *calculate_bytes_to_fetch_required_to_hide_latency_params = &mode_lib->scratch.calculate_bytes_to_fetch_required_to_hide_latency_params; 10449 10450 unsigned int k; 10451 bool must_support_iflip; 10452 const long min_return_uclk_cycles = 83; 10453 const long min_return_fclk_cycles = 75; 10454 const double max_fclk_mhz = min_clk_table->max_clocks_khz.fclk / 1000.0; 10455 double hard_minimum_dcfclk_mhz = (double)min_clk_table->dram_bw_table.entries[0].min_dcfclk_khz / 1000.0; 10456 double max_uclk_mhz = 0; 10457 double min_return_latency_in_DCFCLK_cycles = 0; 10458 10459 DML_LOG_VERBOSE("DML::%s: --- START --- \n", __func__); 10460 10461 memset(&mode_lib->scratch, 0, sizeof(struct dml2_core_internal_scratch)); 10462 memset(&mode_lib->mp, 0, sizeof(struct dml2_core_internal_mode_program)); 10463 10464 s->num_active_planes = display_cfg->num_planes; 10465 get_stream_output_bpp(s->OutputBpp, display_cfg); 10466 10467 mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(display_cfg->num_planes, cfg_support_info); 10468 dml_calc_pipe_plane_mapping(cfg_support_info, mode_lib->mp.pipe_plane); 10469 10470 mode_lib->mp.Dcfclk = programming->min_clocks.dcn4x.active.dcfclk_khz / 1000.0; 10471 mode_lib->mp.FabricClock = programming->min_clocks.dcn4x.active.fclk_khz / 1000.0; 10472 mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table.dram_config, &min_clk_table->dram_bw_table); 10473 mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4x.active.uclk_khz / 1000.0; 10474 mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4x.dpprefclk_khz / 1000.0; 10475 s->SOCCLK = (double)programming->min_clocks.dcn4x.socclk_khz / 1000; 10476 mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4x.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params); 10477 mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table); 10478 10479 for (k = 0; k < s->num_active_planes; ++k) { 10480 unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; 10481 DML_ASSERT(cfg_support_info->stream_support_info[stream_index].odms_used <= 4); 10482 DML_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4 || 10483 cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 2 || 10484 cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1); 10485 10486 if (cfg_support_info->stream_support_info[stream_index].odms_used > 1) 10487 DML_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1); 10488 10489 switch (cfg_support_info->stream_support_info[stream_index].odms_used) { 10490 case (4): 10491 mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_4to1; 10492 break; 10493 case (3): 10494 mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_3to1; 10495 break; 10496 case (2): 10497 mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_2to1; 10498 break; 10499 default: 10500 if (cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4) 10501 mode_lib->mp.ODMMode[k] = dml2_odm_mode_mso_1to4; 10502 else if (cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 2) 10503 mode_lib->mp.ODMMode[k] = dml2_odm_mode_mso_1to2; 10504 else 10505 mode_lib->mp.ODMMode[k] = dml2_odm_mode_bypass; 10506 break; 10507 } 10508 } 10509 10510 for (k = 0; k < s->num_active_planes; ++k) { 10511 mode_lib->mp.NoOfDPP[k] = cfg_support_info->plane_support_info[k].dpps_used; 10512 mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4x.dppclk_khz / 1000.0; 10513 DML_ASSERT(mode_lib->mp.Dppclk[k] > 0); 10514 } 10515 10516 for (k = 0; k < s->num_active_planes; ++k) { 10517 unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; 10518 mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4x.dscclk_khz / 1000.0; 10519 DML_LOG_VERBOSE("DML::%s: k=%d stream_index=%d, mode_lib->mp.DSCCLK = %f\n", __func__, k, stream_index, mode_lib->mp.DSCCLK[k]); 10520 } 10521 10522 mode_lib->mp.Dispclk = programming->min_clocks.dcn4x.dispclk_khz / 1000.0; 10523 mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4x.deepsleep_dcfclk_khz / 1000.0; 10524 10525 DML_ASSERT(mode_lib->mp.Dcfclk > 0); 10526 DML_ASSERT(mode_lib->mp.FabricClock > 0); 10527 DML_ASSERT(mode_lib->mp.dram_bw_mbps > 0); 10528 DML_ASSERT(mode_lib->mp.uclk_freq_mhz > 0); 10529 DML_ASSERT(mode_lib->mp.GlobalDPPCLK > 0); 10530 DML_ASSERT(mode_lib->mp.Dispclk > 0); 10531 DML_ASSERT(mode_lib->mp.DCFCLKDeepSleep > 0); 10532 DML_ASSERT(s->SOCCLK > 0); 10533 10534 #ifdef __DML_VBA_DEBUG__ 10535 DML_LOG_VERBOSE("DML::%s: num_active_planes = %u\n", __func__, s->num_active_planes); 10536 DML_LOG_VERBOSE("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes); 10537 DML_LOG_VERBOSE("DML::%s: Dcfclk = %f\n", __func__, mode_lib->mp.Dcfclk); 10538 DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, mode_lib->mp.FabricClock); 10539 DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->mp.dram_bw_mbps); 10540 DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->mp.uclk_freq_mhz); 10541 DML_LOG_VERBOSE("DML::%s: Dispclk = %f\n", __func__, mode_lib->mp.Dispclk); 10542 for (k = 0; k < s->num_active_planes; ++k) { 10543 DML_LOG_VERBOSE("DML::%s: Dppclk[%0d] = %f\n", __func__, k, mode_lib->mp.Dppclk[k]); 10544 } 10545 DML_LOG_VERBOSE("DML::%s: GlobalDPPCLK = %f\n", __func__, mode_lib->mp.GlobalDPPCLK); 10546 DML_LOG_VERBOSE("DML::%s: DCFCLKDeepSleep = %f\n", __func__, mode_lib->mp.DCFCLKDeepSleep); 10547 DML_LOG_VERBOSE("DML::%s: SOCCLK = %f\n", __func__, s->SOCCLK); 10548 DML_LOG_VERBOSE("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index); 10549 DML_LOG_VERBOSE("DML::%s: min_clk_table min_fclk_khz = %ld\n", __func__, min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz); 10550 if (min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_uclk_khz) 10551 DML_LOG_VERBOSE("DML::%s: min_clk_table uclk_mhz = %f\n", __func__, min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_uclk_khz / 1000.0); 10552 else 10553 DML_LOG_VERBOSE("DML::%s: min_clk_table uclk_mhz = %f\n", __func__, dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config)); 10554 for (k = 0; k < mode_lib->mp.num_active_pipes; ++k) { 10555 DML_LOG_VERBOSE("DML::%s: pipe=%d is in plane=%d\n", __func__, k, mode_lib->mp.pipe_plane[k]); 10556 DML_LOG_VERBOSE("DML::%s: Per-plane DPPPerSurface[%0d] = %d\n", __func__, k, mode_lib->mp.NoOfDPP[k]); 10557 } 10558 10559 for (k = 0; k < s->num_active_planes; k++) 10560 DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); 10561 #endif 10562 10563 CalculateMaxDETAndMinCompressedBufferSize( 10564 mode_lib->ip.config_return_buffer_size_in_kbytes, 10565 mode_lib->ip.config_return_buffer_segment_size_in_kbytes, 10566 mode_lib->ip.rob_buffer_size_kbytes, 10567 mode_lib->ip.max_num_dpp, 10568 display_cfg->overrides.hw.force_nom_det_size_kbytes.enable, 10569 display_cfg->overrides.hw.force_nom_det_size_kbytes.value, 10570 mode_lib->ip.dcn_mrq_present, 10571 10572 /* Output */ 10573 &s->MaxTotalDETInKByte, 10574 &s->NomDETInKByte, 10575 &s->MinCompressedBufferSizeInKByte); 10576 10577 10578 PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg, mode_lib->ip.ptoi_supported, s->PixelClockBackEnd); 10579 10580 for (k = 0; k < s->num_active_planes; ++k) { 10581 CalculateSinglePipeDPPCLKAndSCLThroughput( 10582 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, 10583 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, 10584 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, 10585 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, 10586 mode_lib->ip.max_dchub_pscl_bw_pix_per_clk, 10587 mode_lib->ip.max_pscl_lb_bw_pix_per_clk, 10588 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), 10589 display_cfg->plane_descriptors[k].pixel_format, 10590 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps, 10591 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps, 10592 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps, 10593 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps, 10594 10595 /* Output */ 10596 &mode_lib->mp.PSCL_THROUGHPUT[k], 10597 &mode_lib->mp.PSCL_THROUGHPUT_CHROMA[k], 10598 &mode_lib->mp.DPPCLKUsingSingleDPP[k]); 10599 } 10600 10601 for (k = 0; k < s->num_active_planes; ++k) { 10602 CalculateBytePerPixelAndBlockSizes( 10603 display_cfg->plane_descriptors[k].pixel_format, 10604 display_cfg->plane_descriptors[k].surface.tiling, 10605 display_cfg->plane_descriptors[k].surface.plane0.pitch, 10606 display_cfg->plane_descriptors[k].surface.plane1.pitch, 10607 10608 // Output 10609 &mode_lib->mp.BytePerPixelY[k], 10610 &mode_lib->mp.BytePerPixelC[k], 10611 &mode_lib->mp.BytePerPixelInDETY[k], 10612 &mode_lib->mp.BytePerPixelInDETC[k], 10613 &mode_lib->mp.Read256BlockHeightY[k], 10614 &mode_lib->mp.Read256BlockHeightC[k], 10615 &mode_lib->mp.Read256BlockWidthY[k], 10616 &mode_lib->mp.Read256BlockWidthC[k], 10617 &mode_lib->mp.MacroTileHeightY[k], 10618 &mode_lib->mp.MacroTileHeightC[k], 10619 &mode_lib->mp.MacroTileWidthY[k], 10620 &mode_lib->mp.MacroTileWidthC[k], 10621 &mode_lib->mp.surf_linear128_l[k], 10622 &mode_lib->mp.surf_linear128_c[k]); 10623 } 10624 10625 CalculateSwathWidth( 10626 display_cfg, 10627 false, // ForceSingleDPP 10628 s->num_active_planes, 10629 mode_lib->mp.ODMMode, 10630 mode_lib->mp.BytePerPixelY, 10631 mode_lib->mp.BytePerPixelC, 10632 mode_lib->mp.Read256BlockHeightY, 10633 mode_lib->mp.Read256BlockHeightC, 10634 mode_lib->mp.Read256BlockWidthY, 10635 mode_lib->mp.Read256BlockWidthC, 10636 mode_lib->mp.surf_linear128_l, 10637 mode_lib->mp.surf_linear128_c, 10638 mode_lib->mp.NoOfDPP, 10639 10640 /* Output */ 10641 mode_lib->mp.req_per_swath_ub_l, 10642 mode_lib->mp.req_per_swath_ub_c, 10643 mode_lib->mp.SwathWidthSingleDPPY, 10644 mode_lib->mp.SwathWidthSingleDPPC, 10645 mode_lib->mp.SwathWidthY, 10646 mode_lib->mp.SwathWidthC, 10647 s->dummy_integer_array[0], // unsigned int MaximumSwathHeightY[] 10648 s->dummy_integer_array[1], // unsigned int MaximumSwathHeightC[] 10649 mode_lib->mp.swath_width_luma_ub, 10650 mode_lib->mp.swath_width_chroma_ub); 10651 10652 for (k = 0; k < s->num_active_planes; ++k) { 10653 mode_lib->mp.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width * display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 / 10654 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)); 10655 mode_lib->mp.vactive_sw_bw_l[k] = mode_lib->mp.SwathWidthSingleDPPY[k] * mode_lib->mp.BytePerPixelY[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; 10656 mode_lib->mp.vactive_sw_bw_c[k] = mode_lib->mp.SwathWidthSingleDPPC[k] * mode_lib->mp.BytePerPixelC[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; 10657 DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_l[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); 10658 DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_c[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); 10659 } 10660 10661 CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg; 10662 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ip.config_return_buffer_size_in_kbytes; 10663 CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = s->MaxTotalDETInKByte; 10664 CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = s->MinCompressedBufferSizeInKByte; 10665 CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; 10666 CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes; 10667 CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; 10668 CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes; 10669 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false; 10670 CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = s->num_active_planes; 10671 CalculateSwathAndDETConfiguration_params->nomDETInKByte = s->NomDETInKByte; 10672 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes; 10673 CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes; 10674 CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->mp.vactive_sw_bw_l; 10675 CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->mp.vactive_sw_bw_c; 10676 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = s->dummy_single_array[0]; 10677 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = s->dummy_single_array[1]; 10678 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->mp.Read256BlockHeightY; 10679 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->mp.Read256BlockHeightC; 10680 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->mp.Read256BlockWidthY; 10681 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->mp.Read256BlockWidthC; 10682 CalculateSwathAndDETConfiguration_params->surf_linear128_l = mode_lib->mp.surf_linear128_l; 10683 CalculateSwathAndDETConfiguration_params->surf_linear128_c = mode_lib->mp.surf_linear128_c; 10684 CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->mp.ODMMode; 10685 CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->mp.NoOfDPP; 10686 CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->mp.BytePerPixelY; 10687 CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->mp.BytePerPixelC; 10688 CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->mp.BytePerPixelInDETY; 10689 CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->mp.BytePerPixelInDETC; 10690 CalculateSwathAndDETConfiguration_params->mrq_present = mode_lib->ip.dcn_mrq_present; 10691 10692 // output 10693 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = mode_lib->mp.req_per_swath_ub_l; 10694 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = mode_lib->mp.req_per_swath_ub_c; 10695 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_long_array[0]; 10696 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_long_array[1]; 10697 CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_long_array[2]; 10698 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_long_array[3]; 10699 CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->mp.SwathHeightY; 10700 CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->mp.SwathHeightC; 10701 CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = mode_lib->mp.request_size_bytes_luma; 10702 CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = mode_lib->mp.request_size_bytes_chroma; 10703 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->mp.DETBufferSizeInKByte; 10704 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY; 10705 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC; 10706 CalculateSwathAndDETConfiguration_params->full_swath_bytes_l = s->full_swath_bytes_l; 10707 CalculateSwathAndDETConfiguration_params->full_swath_bytes_c = s->full_swath_bytes_c; 10708 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->mp.UnboundedRequestEnabled; 10709 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &mode_lib->mp.compbuf_reserved_space_64b; 10710 CalculateSwathAndDETConfiguration_params->hw_debug5 = &mode_lib->mp.hw_debug5; 10711 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->mp.CompressedBufferSizeInkByte; 10712 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = &s->dummy_boolean_array[0][0]; 10713 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[0]; 10714 10715 // Calculate DET size, swath height here. 10716 CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params); 10717 10718 // DSC Delay 10719 for (k = 0; k < s->num_active_planes; ++k) { 10720 mode_lib->mp.DSCDelay[k] = DSCDelayRequirement(cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].dsc_enable, 10721 mode_lib->mp.ODMMode[k], 10722 mode_lib->ip.maximum_dsc_bits_per_component, 10723 s->OutputBpp[k], 10724 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, 10725 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, 10726 cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].num_dsc_slices, 10727 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, 10728 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, 10729 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), 10730 s->PixelClockBackEnd[k]); 10731 } 10732 10733 // Prefetch 10734 if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0) { 10735 for (k = 0; k < s->num_active_planes; ++k) 10736 mode_lib->mp.SurfaceSizeInTheMALL[k] = 0; 10737 } else { 10738 CalculateSurfaceSizeInMall( 10739 display_cfg, 10740 s->num_active_planes, 10741 mode_lib->soc.mall_allocated_for_dcn_mbytes, 10742 mode_lib->mp.BytePerPixelY, 10743 mode_lib->mp.BytePerPixelC, 10744 mode_lib->mp.Read256BlockWidthY, 10745 mode_lib->mp.Read256BlockWidthC, 10746 mode_lib->mp.Read256BlockHeightY, 10747 mode_lib->mp.Read256BlockHeightC, 10748 mode_lib->mp.MacroTileWidthY, 10749 mode_lib->mp.MacroTileWidthC, 10750 mode_lib->mp.MacroTileHeightY, 10751 mode_lib->mp.MacroTileHeightC, 10752 10753 /* Output */ 10754 mode_lib->mp.SurfaceSizeInTheMALL, 10755 &s->dummy_boolean[0]); /* bool *ExceededMALLSize */ 10756 } 10757 10758 for (k = 0; k < s->num_active_planes; ++k) { 10759 s->SurfaceParameters[k].PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); 10760 s->SurfaceParameters[k].DPPPerSurface = mode_lib->mp.NoOfDPP[k]; 10761 s->SurfaceParameters[k].RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle; 10762 s->SurfaceParameters[k].ViewportHeight = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; 10763 s->SurfaceParameters[k].ViewportHeightC = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; 10764 s->SurfaceParameters[k].BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY[k]; 10765 s->SurfaceParameters[k].BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY[k]; 10766 s->SurfaceParameters[k].BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC[k]; 10767 s->SurfaceParameters[k].BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC[k]; 10768 s->SurfaceParameters[k].BlockWidthY = mode_lib->mp.MacroTileWidthY[k]; 10769 s->SurfaceParameters[k].BlockHeightY = mode_lib->mp.MacroTileHeightY[k]; 10770 s->SurfaceParameters[k].BlockWidthC = mode_lib->mp.MacroTileWidthC[k]; 10771 s->SurfaceParameters[k].BlockHeightC = mode_lib->mp.MacroTileHeightC[k]; 10772 s->SurfaceParameters[k].InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced; 10773 s->SurfaceParameters[k].HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total; 10774 s->SurfaceParameters[k].DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable; 10775 s->SurfaceParameters[k].SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format; 10776 s->SurfaceParameters[k].SurfaceTiling = display_cfg->plane_descriptors[k].surface.tiling; 10777 s->SurfaceParameters[k].BytePerPixelY = mode_lib->mp.BytePerPixelY[k]; 10778 s->SurfaceParameters[k].BytePerPixelC = mode_lib->mp.BytePerPixelC[k]; 10779 s->SurfaceParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; 10780 s->SurfaceParameters[k].VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; 10781 s->SurfaceParameters[k].VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; 10782 s->SurfaceParameters[k].VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; 10783 s->SurfaceParameters[k].VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; 10784 s->SurfaceParameters[k].PitchY = display_cfg->plane_descriptors[k].surface.plane0.pitch; 10785 s->SurfaceParameters[k].PitchC = display_cfg->plane_descriptors[k].surface.plane1.pitch; 10786 s->SurfaceParameters[k].ViewportStationary = display_cfg->plane_descriptors[k].composition.viewport.stationary; 10787 s->SurfaceParameters[k].ViewportXStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start; 10788 s->SurfaceParameters[k].ViewportYStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start; 10789 s->SurfaceParameters[k].ViewportXStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; 10790 s->SurfaceParameters[k].ViewportYStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; 10791 s->SurfaceParameters[k].FORCE_ONE_ROW_FOR_FRAME = display_cfg->plane_descriptors[k].overrides.hw.force_one_row_for_frame; 10792 s->SurfaceParameters[k].SwathHeightY = mode_lib->mp.SwathHeightY[k]; 10793 s->SurfaceParameters[k].SwathHeightC = mode_lib->mp.SwathHeightC[k]; 10794 s->SurfaceParameters[k].DCCMetaPitchY = display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch; 10795 s->SurfaceParameters[k].DCCMetaPitchC = display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch; 10796 } 10797 10798 CalculateVMRowAndSwath_params->display_cfg = display_cfg; 10799 CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = s->num_active_planes; 10800 CalculateVMRowAndSwath_params->myPipe = s->SurfaceParameters; 10801 CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->mp.SurfaceSizeInTheMALL; 10802 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; 10803 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma; 10804 CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes; 10805 CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->mp.SwathWidthY; 10806 CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->mp.SwathWidthC; 10807 CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes * 1024; 10808 CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes; 10809 CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present; 10810 10811 // output 10812 CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0]; 10813 CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = mode_lib->mp.dpte_row_width_luma_ub; 10814 CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = mode_lib->mp.dpte_row_width_chroma_ub; 10815 CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->mp.dpte_row_height; 10816 CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->mp.dpte_row_height_chroma; 10817 CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = mode_lib->mp.dpte_row_height_linear; 10818 CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = mode_lib->mp.dpte_row_height_linear_chroma; 10819 CalculateVMRowAndSwath_params->vm_group_bytes = mode_lib->mp.vm_group_bytes; 10820 CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes; 10821 CalculateVMRowAndSwath_params->PixelPTEReqWidthY = mode_lib->mp.PixelPTEReqWidthY; 10822 CalculateVMRowAndSwath_params->PixelPTEReqHeightY = mode_lib->mp.PixelPTEReqHeightY; 10823 CalculateVMRowAndSwath_params->PTERequestSizeY = mode_lib->mp.PTERequestSizeY; 10824 CalculateVMRowAndSwath_params->PixelPTEReqWidthC = mode_lib->mp.PixelPTEReqWidthC; 10825 CalculateVMRowAndSwath_params->PixelPTEReqHeightC = mode_lib->mp.PixelPTEReqHeightC; 10826 CalculateVMRowAndSwath_params->PTERequestSizeC = mode_lib->mp.PTERequestSizeC; 10827 CalculateVMRowAndSwath_params->vmpg_width_y = s->vmpg_width_y; 10828 CalculateVMRowAndSwath_params->vmpg_height_y = s->vmpg_height_y; 10829 CalculateVMRowAndSwath_params->vmpg_width_c = s->vmpg_width_c; 10830 CalculateVMRowAndSwath_params->vmpg_height_c = s->vmpg_height_c; 10831 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = mode_lib->mp.dpde0_bytes_per_frame_ub_l; 10832 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = mode_lib->mp.dpde0_bytes_per_frame_ub_c; 10833 CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->mp.PrefetchSourceLinesY; 10834 CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->mp.PrefetchSourceLinesC; 10835 CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->mp.VInitPreFillY; 10836 CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->mp.VInitPreFillC; 10837 CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->mp.MaxNumSwathY; 10838 CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->mp.MaxNumSwathC; 10839 CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->mp.dpte_row_bw; 10840 CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->mp.PixelPTEBytesPerRow; 10841 CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_l = s->dpte_row_bytes_per_row_l; 10842 CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_c = s->dpte_row_bytes_per_row_c; 10843 CalculateVMRowAndSwath_params->vm_bytes = mode_lib->mp.vm_bytes; 10844 CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->mp.use_one_row_for_frame; 10845 CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->mp.use_one_row_for_frame_flip; 10846 CalculateVMRowAndSwath_params->is_using_mall_for_ss = mode_lib->mp.is_using_mall_for_ss; 10847 CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = mode_lib->mp.PTE_BUFFER_MODE; 10848 CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = mode_lib->mp.BIGK_FRAGMENT_SIZE; 10849 CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = s->dummy_boolean_array[1]; 10850 CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->mp.meta_row_bw; 10851 CalculateVMRowAndSwath_params->meta_row_bytes = mode_lib->mp.meta_row_bytes; 10852 CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l; 10853 CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c; 10854 CalculateVMRowAndSwath_params->meta_req_width_luma = mode_lib->mp.meta_req_width; 10855 CalculateVMRowAndSwath_params->meta_req_height_luma = mode_lib->mp.meta_req_height; 10856 CalculateVMRowAndSwath_params->meta_row_width_luma = mode_lib->mp.meta_row_width; 10857 CalculateVMRowAndSwath_params->meta_row_height_luma = mode_lib->mp.meta_row_height; 10858 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = mode_lib->mp.meta_pte_bytes_per_frame_ub_l; 10859 CalculateVMRowAndSwath_params->meta_req_width_chroma = mode_lib->mp.meta_req_width_chroma; 10860 CalculateVMRowAndSwath_params->meta_row_height_chroma = mode_lib->mp.meta_row_height_chroma; 10861 CalculateVMRowAndSwath_params->meta_row_width_chroma = mode_lib->mp.meta_row_width_chroma; 10862 CalculateVMRowAndSwath_params->meta_req_height_chroma = mode_lib->mp.meta_req_height_chroma; 10863 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = mode_lib->mp.meta_pte_bytes_per_frame_ub_c; 10864 10865 CalculateVMRowAndSwath(&mode_lib->scratch, CalculateVMRowAndSwath_params); 10866 10867 memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params)); 10868 if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0 || mode_lib->ip.dcn_mrq_present) { 10869 for (k = 0; k < s->num_active_planes; k++) { 10870 mode_lib->mp.mall_prefetch_sdp_overhead_factor[k] = 1.0; 10871 mode_lib->mp.mall_prefetch_dram_overhead_factor[k] = 1.0; 10872 mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0[k] = 1.0; 10873 mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0[k] = 1.0; 10874 mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1[k] = 1.0; 10875 mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1[k] = 1.0; 10876 } 10877 } else { 10878 for (k = 0; k < s->num_active_planes; k++) { 10879 calculate_mcache_setting_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable; 10880 calculate_mcache_setting_params->num_chans = mode_lib->soc.clk_table.dram_config.channel_count; 10881 calculate_mcache_setting_params->mem_word_bytes = mode_lib->soc.mem_word_bytes; 10882 calculate_mcache_setting_params->mcache_size_bytes = mode_lib->soc.mcache_size_bytes; 10883 calculate_mcache_setting_params->mcache_line_size_bytes = mode_lib->soc.mcache_line_size_bytes; 10884 calculate_mcache_setting_params->gpuvm_enable = display_cfg->gpuvm_enable; 10885 calculate_mcache_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; 10886 10887 calculate_mcache_setting_params->source_format = display_cfg->plane_descriptors[k].pixel_format; 10888 calculate_mcache_setting_params->surf_vert = dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle); 10889 calculate_mcache_setting_params->vp_stationary = display_cfg->plane_descriptors[k].composition.viewport.stationary; 10890 calculate_mcache_setting_params->tiling_mode = display_cfg->plane_descriptors[k].surface.tiling; 10891 calculate_mcache_setting_params->imall_enable = mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall; 10892 10893 calculate_mcache_setting_params->vp_start_x_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start; 10894 calculate_mcache_setting_params->vp_start_y_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start; 10895 calculate_mcache_setting_params->full_vp_width_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.width; 10896 calculate_mcache_setting_params->full_vp_height_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; 10897 calculate_mcache_setting_params->blk_width_l = mode_lib->mp.MacroTileWidthY[k]; 10898 calculate_mcache_setting_params->blk_height_l = mode_lib->mp.MacroTileHeightY[k]; 10899 calculate_mcache_setting_params->vmpg_width_l = s->vmpg_width_y[k]; 10900 calculate_mcache_setting_params->vmpg_height_l = s->vmpg_height_y[k]; 10901 calculate_mcache_setting_params->full_swath_bytes_l = s->full_swath_bytes_l[k]; 10902 calculate_mcache_setting_params->bytes_per_pixel_l = mode_lib->mp.BytePerPixelY[k]; 10903 10904 calculate_mcache_setting_params->vp_start_x_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; 10905 calculate_mcache_setting_params->vp_start_y_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; 10906 calculate_mcache_setting_params->full_vp_width_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.width; 10907 calculate_mcache_setting_params->full_vp_height_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; 10908 calculate_mcache_setting_params->blk_width_c = mode_lib->mp.MacroTileWidthC[k]; 10909 calculate_mcache_setting_params->blk_height_c = mode_lib->mp.MacroTileHeightC[k]; 10910 calculate_mcache_setting_params->vmpg_width_c = s->vmpg_width_c[k]; 10911 calculate_mcache_setting_params->vmpg_height_c = s->vmpg_height_c[k]; 10912 calculate_mcache_setting_params->full_swath_bytes_c = s->full_swath_bytes_c[k]; 10913 calculate_mcache_setting_params->bytes_per_pixel_c = mode_lib->mp.BytePerPixelC[k]; 10914 10915 // output 10916 calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_l = &mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0[k]; 10917 calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_l = &mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0[k]; 10918 calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_c = &mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1[k]; 10919 calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_c = &mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1[k]; 10920 10921 calculate_mcache_setting_params->num_mcaches_l = &mode_lib->mp.num_mcaches_l[k]; 10922 calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->mp.mcache_row_bytes_l[k]; 10923 calculate_mcache_setting_params->mcache_row_bytes_per_channel_l = &mode_lib->mp.mcache_row_bytes_per_channel_l[k]; 10924 calculate_mcache_setting_params->mcache_offsets_l = mode_lib->mp.mcache_offsets_l[k]; 10925 calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->mp.mcache_shift_granularity_l[k]; 10926 10927 calculate_mcache_setting_params->num_mcaches_c = &mode_lib->mp.num_mcaches_c[k]; 10928 calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->mp.mcache_row_bytes_c[k]; 10929 calculate_mcache_setting_params->mcache_row_bytes_per_channel_c = &mode_lib->mp.mcache_row_bytes_per_channel_c[k]; 10930 calculate_mcache_setting_params->mcache_offsets_c = mode_lib->mp.mcache_offsets_c[k]; 10931 calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->mp.mcache_shift_granularity_c[k]; 10932 10933 calculate_mcache_setting_params->mall_comb_mcache_l = &mode_lib->mp.mall_comb_mcache_l[k]; 10934 calculate_mcache_setting_params->mall_comb_mcache_c = &mode_lib->mp.mall_comb_mcache_c[k]; 10935 calculate_mcache_setting_params->lc_comb_mcache = &mode_lib->mp.lc_comb_mcache[k]; 10936 calculate_mcache_setting(&mode_lib->scratch, calculate_mcache_setting_params); 10937 } 10938 10939 calculate_mall_bw_overhead_factor( 10940 mode_lib->mp.mall_prefetch_sdp_overhead_factor, 10941 mode_lib->mp.mall_prefetch_dram_overhead_factor, 10942 10943 // input 10944 display_cfg, 10945 s->num_active_planes); 10946 } 10947 10948 // Calculate all the bandwidth availabe 10949 calculate_bandwidth_available( 10950 mode_lib->mp.avg_bandwidth_available_min, 10951 mode_lib->mp.avg_bandwidth_available, 10952 mode_lib->mp.urg_bandwidth_available_min, 10953 mode_lib->mp.urg_bandwidth_available, 10954 mode_lib->mp.urg_bandwidth_available_vm_only, 10955 mode_lib->mp.urg_bandwidth_available_pixel_and_vm, 10956 10957 &mode_lib->soc, 10958 display_cfg->hostvm_enable, 10959 mode_lib->mp.Dcfclk, 10960 mode_lib->mp.FabricClock, 10961 mode_lib->mp.dram_bw_mbps); 10962 10963 10964 calculate_hostvm_inefficiency_factor( 10965 &s->HostVMInefficiencyFactor, 10966 &s->HostVMInefficiencyFactorPrefetch, 10967 10968 display_cfg->gpuvm_enable, 10969 display_cfg->hostvm_enable, 10970 mode_lib->ip.remote_iommu_outstanding_translations, 10971 mode_lib->soc.max_outstanding_reqs, 10972 mode_lib->mp.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active], 10973 mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]); 10974 10975 s->TotalDCCActiveDPP = 0; 10976 s->TotalActiveDPP = 0; 10977 for (k = 0; k < s->num_active_planes; ++k) { 10978 s->TotalActiveDPP = s->TotalActiveDPP + mode_lib->mp.NoOfDPP[k]; 10979 if (display_cfg->plane_descriptors[k].surface.dcc.enable) 10980 s->TotalDCCActiveDPP = s->TotalDCCActiveDPP + mode_lib->mp.NoOfDPP[k]; 10981 } 10982 // Calculate tdlut schedule related terms 10983 for (k = 0; k <= s->num_active_planes - 1; k++) { 10984 calculate_tdlut_setting_params->dispclk_mhz = mode_lib->mp.Dispclk; 10985 calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; 10986 calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode; 10987 calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode; 10988 calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size; 10989 calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable; 10990 calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; 10991 10992 // output 10993 calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k]; 10994 calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k]; 10995 calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k]; 10996 calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k]; 10997 calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k]; 10998 calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k]; 10999 calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k]; 11000 calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); 11001 } 11002 11003 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) 11004 s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes, 11005 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, 11006 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes)); 11007 11008 CalculateExtraLatency( 11009 display_cfg, 11010 mode_lib->ip.rob_buffer_size_kbytes, 11011 mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles, 11012 s->ReorderingBytes, 11013 mode_lib->mp.Dcfclk, 11014 mode_lib->mp.FabricClock, 11015 mode_lib->ip.pixel_chunk_size_kbytes, 11016 mode_lib->mp.urg_bandwidth_available_min[dml2_core_internal_soc_state_sys_active], 11017 s->num_active_planes, 11018 mode_lib->mp.NoOfDPP, 11019 mode_lib->mp.dpte_group_bytes, 11020 s->tdlut_bytes_per_group, 11021 s->HostVMInefficiencyFactor, 11022 s->HostVMInefficiencyFactorPrefetch, 11023 mode_lib->soc.hostvm_min_page_size_kbytes * 1024, 11024 mode_lib->soc.qos_parameters.qos_type, 11025 !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable), 11026 mode_lib->soc.max_outstanding_reqs, 11027 mode_lib->mp.request_size_bytes_luma, 11028 mode_lib->mp.request_size_bytes_chroma, 11029 mode_lib->ip.meta_chunk_size_kbytes, 11030 mode_lib->ip.dchub_arb_to_ret_delay, 11031 mode_lib->mp.TripToMemory, 11032 mode_lib->ip.hostvm_mode, 11033 11034 // output 11035 &mode_lib->mp.ExtraLatency, 11036 &mode_lib->mp.ExtraLatency_sr, 11037 &mode_lib->mp.ExtraLatencyPrefetch); 11038 11039 mode_lib->mp.TCalc = 24.0 / mode_lib->mp.DCFCLKDeepSleep; 11040 11041 for (k = 0; k < s->num_active_planes; ++k) { 11042 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { 11043 mode_lib->mp.WritebackDelay[k] = 11044 mode_lib->soc.qos_parameters.writeback.base_latency_us 11045 + CalculateWriteBackDelay( 11046 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format, 11047 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio, 11048 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio, 11049 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps, 11050 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width, 11051 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height, 11052 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height, 11053 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->mp.Dispclk; 11054 } else 11055 mode_lib->mp.WritebackDelay[k] = 0; 11056 } 11057 11058 /* VActive bytes to fetch for UCLK P-State */ 11059 calculate_bytes_to_fetch_required_to_hide_latency_params->display_cfg = display_cfg; 11060 calculate_bytes_to_fetch_required_to_hide_latency_params->mrq_present = mode_lib->ip.dcn_mrq_present; 11061 11062 calculate_bytes_to_fetch_required_to_hide_latency_params->num_active_planes = s->num_active_planes; 11063 calculate_bytes_to_fetch_required_to_hide_latency_params->num_of_dpp = mode_lib->mp.NoOfDPP; 11064 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_l = mode_lib->mp.meta_row_height; 11065 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_c = mode_lib->mp.meta_row_height_chroma; 11066 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l; 11067 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c; 11068 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_l = mode_lib->mp.dpte_row_height; 11069 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_c = mode_lib->mp.dpte_row_height_chroma; 11070 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_l = s->dpte_row_bytes_per_row_l; 11071 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_c = s->dpte_row_bytes_per_row_c; 11072 calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_l = mode_lib->mp.BytePerPixelY; 11073 calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_c = mode_lib->mp.BytePerPixelC; 11074 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_l = mode_lib->mp.SwathWidthY; 11075 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_c = mode_lib->mp.SwathWidthC; 11076 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_l = mode_lib->mp.SwathHeightY; 11077 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_c = mode_lib->mp.SwathHeightC; 11078 calculate_bytes_to_fetch_required_to_hide_latency_params->latency_to_hide_us[0] = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us; 11079 11080 /* outputs */ 11081 calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_l = s->pstate_bytes_required_l[dml2_pstate_type_uclk]; 11082 calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_c = s->pstate_bytes_required_c[dml2_pstate_type_uclk]; 11083 11084 calculate_bytes_to_fetch_required_to_hide_latency(calculate_bytes_to_fetch_required_to_hide_latency_params); 11085 11086 /* Excess VActive bandwidth required to fill DET */ 11087 calculate_excess_vactive_bandwidth_required( 11088 display_cfg, 11089 s->num_active_planes, 11090 s->pstate_bytes_required_l[dml2_pstate_type_uclk], 11091 s->pstate_bytes_required_c[dml2_pstate_type_uclk], 11092 /* outputs */ 11093 mode_lib->mp.excess_vactive_fill_bw_l, 11094 mode_lib->mp.excess_vactive_fill_bw_c); 11095 11096 mode_lib->mp.UrgentLatency = CalculateUrgentLatency( 11097 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us, 11098 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us, 11099 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us, 11100 mode_lib->soc.do_urgent_latency_adjustment, 11101 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us, 11102 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz, 11103 mode_lib->mp.FabricClock, 11104 mode_lib->mp.uclk_freq_mhz, 11105 mode_lib->soc.qos_parameters.qos_type, 11106 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles, 11107 mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles, 11108 mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, 11109 mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, 11110 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin, 11111 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); 11112 11113 mode_lib->mp.TripToMemory = CalculateTripToMemory( 11114 mode_lib->mp.UrgentLatency, 11115 mode_lib->mp.FabricClock, 11116 mode_lib->mp.uclk_freq_mhz, 11117 mode_lib->soc.qos_parameters.qos_type, 11118 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles, 11119 mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, 11120 mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, 11121 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, 11122 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); 11123 11124 mode_lib->mp.TripToMemory = math_max2(mode_lib->mp.UrgentLatency, mode_lib->mp.TripToMemory); 11125 11126 mode_lib->mp.MetaTripToMemory = CalculateMetaTripToMemory( 11127 mode_lib->mp.UrgentLatency, 11128 mode_lib->mp.FabricClock, 11129 mode_lib->mp.uclk_freq_mhz, 11130 mode_lib->soc.qos_parameters.qos_type, 11131 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles, 11132 mode_lib->soc.qos_parameters.qos_params.dcn4x.meta_trip_adder_fclk_cycles, 11133 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, 11134 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); 11135 11136 for (k = 0; k < s->num_active_planes; ++k) { 11137 bool cursor_not_enough_urgent_latency_hiding = false; 11138 s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / 11139 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); 11140 11141 s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format; 11142 11143 s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, 11144 mode_lib->mp.NoOfDPP[k], 11145 display_cfg->plane_descriptors[k].composition.viewport.plane0.width, 11146 display_cfg->plane_descriptors[k].composition.viewport.plane0.height, 11147 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, 11148 display_cfg->plane_descriptors[k].composition.rotation_angle); 11149 11150 s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, 11151 mode_lib->mp.NoOfDPP[k], 11152 display_cfg->plane_descriptors[k].composition.viewport.plane1.width, 11153 display_cfg->plane_descriptors[k].composition.viewport.plane1.height, 11154 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, 11155 display_cfg->plane_descriptors[k].composition.rotation_angle); 11156 11157 if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) { 11158 calculate_cursor_req_attributes( 11159 display_cfg->plane_descriptors[k].cursor.cursor_width, 11160 display_cfg->plane_descriptors[k].cursor.cursor_bpp, 11161 11162 // output 11163 &s->cursor_lines_per_chunk[k], 11164 &s->cursor_bytes_per_line[k], 11165 &s->cursor_bytes_per_chunk[k], 11166 &s->cursor_bytes[k]); 11167 11168 calculate_cursor_urgent_burst_factor( 11169 mode_lib->ip.cursor_buffer_size, 11170 display_cfg->plane_descriptors[k].cursor.cursor_width, 11171 s->cursor_bytes_per_chunk[k], 11172 s->cursor_lines_per_chunk[k], 11173 s->line_times[k], 11174 mode_lib->mp.UrgentLatency, 11175 11176 // output 11177 &mode_lib->mp.UrgentBurstFactorCursor[k], 11178 &cursor_not_enough_urgent_latency_hiding); 11179 } 11180 mode_lib->mp.UrgentBurstFactorCursorPre[k] = mode_lib->mp.UrgentBurstFactorCursor[k]; 11181 11182 CalculateUrgentBurstFactor( 11183 &display_cfg->plane_descriptors[k], 11184 mode_lib->mp.swath_width_luma_ub[k], 11185 mode_lib->mp.swath_width_chroma_ub[k], 11186 mode_lib->mp.SwathHeightY[k], 11187 mode_lib->mp.SwathHeightC[k], 11188 s->line_times[k], 11189 mode_lib->mp.UrgentLatency, 11190 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, 11191 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, 11192 mode_lib->mp.BytePerPixelInDETY[k], 11193 mode_lib->mp.BytePerPixelInDETC[k], 11194 mode_lib->mp.DETBufferSizeY[k], 11195 mode_lib->mp.DETBufferSizeC[k], 11196 11197 /* output */ 11198 &mode_lib->mp.UrgentBurstFactorLuma[k], 11199 &mode_lib->mp.UrgentBurstFactorChroma[k], 11200 &mode_lib->mp.NotEnoughUrgentLatencyHiding[k]); 11201 11202 mode_lib->mp.NotEnoughUrgentLatencyHiding[k] = mode_lib->mp.NotEnoughUrgentLatencyHiding[k] || cursor_not_enough_urgent_latency_hiding; 11203 } 11204 11205 for (k = 0; k < s->num_active_planes; ++k) { 11206 s->MaxVStartupLines[k] = CalculateMaxVStartup( 11207 mode_lib->ip.ptoi_supported, 11208 mode_lib->ip.vblank_nom_default_us, 11209 &display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing, 11210 mode_lib->mp.WritebackDelay[k]); 11211 11212 #ifdef __DML_VBA_DEBUG__ 11213 DML_LOG_VERBOSE("DML::%s: k=%u MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); 11214 DML_LOG_VERBOSE("DML::%s: k=%u WritebackDelay = %f\n", __func__, k, mode_lib->mp.WritebackDelay[k]); 11215 #endif 11216 } 11217 11218 s->immediate_flip_required = false; 11219 for (k = 0; k < s->num_active_planes; ++k) { 11220 s->immediate_flip_required = s->immediate_flip_required || display_cfg->plane_descriptors[k].immediate_flip; 11221 } 11222 #ifdef __DML_VBA_DEBUG__ 11223 DML_LOG_VERBOSE("DML::%s: immediate_flip_required = %u\n", __func__, s->immediate_flip_required); 11224 #endif 11225 11226 if (s->num_active_planes > 1) { 11227 CheckGlobalPrefetchAdmissibility_params->num_active_planes = s->num_active_planes; 11228 CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format; 11229 CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024; 11230 CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024; 11231 CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l; 11232 CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c; 11233 CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->mp.SwathHeightY; 11234 CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->mp.SwathHeightC; 11235 CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; 11236 CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->mp.CompressedBufferSizeInkByte; 11237 CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->mp.DETBufferSizeY; 11238 CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->mp.DETBufferSizeC; 11239 CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l; 11240 CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c; 11241 CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes; 11242 CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = 0; // don't care 11243 CheckGlobalPrefetchAdmissibility_params->Tpre_oto = 0; // don't care 11244 CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; 11245 CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = mode_lib->mp.Dcfclk; 11246 CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times; 11247 CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->mp.dst_y_prefetch; 11248 11249 // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible 11250 CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->dummy_boolean[0]; 11251 CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre; 11252 CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params); // dont care about the check output for mode programming 11253 } 11254 11255 { 11256 s->DestinationLineTimesForPrefetchLessThan2 = false; 11257 s->VRatioPrefetchMoreThanMax = false; 11258 11259 DML_LOG_VERBOSE("DML::%s: Start one iteration of prefetch schedule evaluation\n", __func__); 11260 11261 for (k = 0; k < s->num_active_planes; ++k) { 11262 struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe; 11263 11264 DML_LOG_VERBOSE("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); 11265 mode_lib->mp.TWait[k] = CalculateTWait( 11266 display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, 11267 mode_lib->mp.UrgentLatency, 11268 mode_lib->mp.TripToMemory, 11269 !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ? 11270 get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0); 11271 11272 myPipe->Dppclk = mode_lib->mp.Dppclk[k]; 11273 myPipe->Dispclk = mode_lib->mp.Dispclk; 11274 myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); 11275 myPipe->DCFClkDeepSleep = mode_lib->mp.DCFCLKDeepSleep; 11276 myPipe->DPPPerSurface = mode_lib->mp.NoOfDPP[k]; 11277 myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled; 11278 myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; 11279 myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; 11280 myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; 11281 myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; 11282 myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle; 11283 myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored; 11284 myPipe->BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY[k]; 11285 myPipe->BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY[k]; 11286 myPipe->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC[k]; 11287 myPipe->BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC[k]; 11288 myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced; 11289 myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors; 11290 myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active; 11291 myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total; 11292 myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active; 11293 myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable; 11294 myPipe->ODMMode = mode_lib->mp.ODMMode[k]; 11295 myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format; 11296 myPipe->BytePerPixelY = mode_lib->mp.BytePerPixelY[k]; 11297 myPipe->BytePerPixelC = mode_lib->mp.BytePerPixelC[k]; 11298 myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; 11299 11300 #ifdef __DML_VBA_DEBUG__ 11301 DML_LOG_VERBOSE("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k); 11302 #endif 11303 CalculatePrefetchSchedule_params->display_cfg = display_cfg; 11304 CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch; 11305 CalculatePrefetchSchedule_params->myPipe = myPipe; 11306 CalculatePrefetchSchedule_params->DSCDelay = mode_lib->mp.DSCDelay[k]; 11307 CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter; 11308 CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl; 11309 CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only; 11310 CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor; 11311 CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal; 11312 CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->mp.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); 11313 CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format; 11314 CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters; 11315 CalculatePrefetchSchedule_params->VStartup = s->MaxVStartupLines[k]; 11316 CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes * 1024; 11317 CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable; 11318 CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled; 11319 CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required; 11320 CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes; 11321 CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->mp.ExtraLatencyPrefetch; 11322 CalculatePrefetchSchedule_params->TCalc = mode_lib->mp.TCalc; 11323 CalculatePrefetchSchedule_params->vm_bytes = mode_lib->mp.vm_bytes[k]; 11324 CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->mp.PixelPTEBytesPerRow[k]; 11325 CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->mp.PrefetchSourceLinesY[k]; 11326 CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->mp.VInitPreFillY[k]; 11327 CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->mp.MaxNumSwathY[k]; 11328 CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->mp.PrefetchSourceLinesC[k]; 11329 CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->mp.VInitPreFillC[k]; 11330 CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->mp.MaxNumSwathC[k]; 11331 CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->mp.swath_width_luma_ub[k]; 11332 CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->mp.swath_width_chroma_ub[k]; 11333 CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->mp.SwathHeightY[k]; 11334 CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->mp.SwathHeightC[k]; 11335 CalculatePrefetchSchedule_params->TWait = mode_lib->mp.TWait[k]; 11336 CalculatePrefetchSchedule_params->Ttrip = mode_lib->mp.TripToMemory; 11337 CalculatePrefetchSchedule_params->Turg = mode_lib->mp.UrgentLatency; 11338 CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; 11339 CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k]; 11340 CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k]; 11341 CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k]; 11342 CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k]; 11343 CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0); 11344 CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k]; 11345 CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k]; 11346 CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable; 11347 CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present; 11348 CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->mp.meta_row_bytes[k]; 11349 CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor[k]; 11350 CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k]; 11351 CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->mp.vactive_sw_bw_l[k]; 11352 CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->mp.vactive_sw_bw_c[k]; 11353 11354 // output 11355 CalculatePrefetchSchedule_params->DSTXAfterScaler = &mode_lib->mp.DSTXAfterScaler[k]; 11356 CalculatePrefetchSchedule_params->DSTYAfterScaler = &mode_lib->mp.DSTYAfterScaler[k]; 11357 CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->mp.dst_y_prefetch[k]; 11358 CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->mp.dst_y_per_vm_vblank[k]; 11359 CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->mp.dst_y_per_row_vblank[k]; 11360 CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->mp.VRatioPrefetchY[k]; 11361 CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->mp.VRatioPrefetchC[k]; 11362 CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]; 11363 CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]; 11364 CalculatePrefetchSchedule_params->RequiredPrefetchBWMax = &s->dummy_single_array[0][k]; 11365 CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]; 11366 CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->mp.Tno_bw[k]; 11367 CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->mp.Tno_bw_flip[k]; 11368 CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->mp.prefetch_vmrow_bw[k]; 11369 CalculatePrefetchSchedule_params->Tdmdl_vm = &mode_lib->mp.Tdmdl_vm[k]; 11370 CalculatePrefetchSchedule_params->Tdmdl = &mode_lib->mp.Tdmdl[k]; 11371 CalculatePrefetchSchedule_params->TSetup = &mode_lib->mp.TSetup[k]; 11372 CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k]; 11373 CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k]; 11374 CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k]; 11375 CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k]; 11376 CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k]; 11377 CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k]; 11378 CalculatePrefetchSchedule_params->VUpdateOffsetPix = &mode_lib->mp.VUpdateOffsetPix[k]; 11379 CalculatePrefetchSchedule_params->VUpdateWidthPix = &mode_lib->mp.VUpdateWidthPix[k]; 11380 CalculatePrefetchSchedule_params->VReadyOffsetPix = &mode_lib->mp.VReadyOffsetPix[k]; 11381 CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->mp.prefetch_cursor_bw[k]; 11382 CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k]; 11383 CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k]; 11384 CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k]; 11385 CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->dummy_single[0]; 11386 11387 mode_lib->mp.NoTimeToPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params); 11388 11389 if (s->impacted_dst_y_pre[k] > 0) 11390 mode_lib->mp.impacted_prefetch_margin_us[k] = (mode_lib->mp.dst_y_prefetch[k] - s->impacted_dst_y_pre[k]) * s->line_times[k]; 11391 else 11392 mode_lib->mp.impacted_prefetch_margin_us[k] = 0; 11393 11394 #ifdef __DML_VBA_DEBUG__ 11395 DML_LOG_VERBOSE("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]); 11396 #endif 11397 mode_lib->mp.VStartupMin[k] = s->MaxVStartupLines[k]; 11398 } // for k 11399 11400 mode_lib->mp.PrefetchModeSupported = true; 11401 for (k = 0; k < s->num_active_planes; ++k) { 11402 if (mode_lib->mp.NoTimeToPrefetch[k] == true || 11403 mode_lib->mp.NotEnoughTimeForDynamicMetadata[k] || 11404 mode_lib->mp.DSTYAfterScaler[k] > 8) { 11405 DML_LOG_VERBOSE("DML::%s: k=%u, NoTimeToPrefetch = %0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]); 11406 DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughTimeForDynamicMetadata=%u\n", __func__, k, mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]); 11407 DML_LOG_VERBOSE("DML::%s: k=%u, DSTYAfterScaler=%u (should be <= 0)\n", __func__, k, mode_lib->mp.DSTYAfterScaler[k]); 11408 mode_lib->mp.PrefetchModeSupported = false; 11409 } 11410 if (mode_lib->mp.dst_y_prefetch[k] < 2) 11411 s->DestinationLineTimesForPrefetchLessThan2 = true; 11412 11413 if (mode_lib->mp.VRatioPrefetchY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ || 11414 mode_lib->mp.VRatioPrefetchC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) { 11415 s->VRatioPrefetchMoreThanMax = true; 11416 DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPrefetchY=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k], __DML2_CALCS_MAX_VRATIO_PRE__); 11417 DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPrefetchC=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchC[k], __DML2_CALCS_MAX_VRATIO_PRE__); 11418 DML_LOG_VERBOSE("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); 11419 } 11420 11421 if (mode_lib->mp.NotEnoughUrgentLatencyHiding[k]) { 11422 DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughUrgentLatencyHiding = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHiding[k]); 11423 mode_lib->mp.PrefetchModeSupported = false; 11424 } 11425 } 11426 11427 if (s->VRatioPrefetchMoreThanMax == true || s->DestinationLineTimesForPrefetchLessThan2 == true) { 11428 DML_LOG_VERBOSE("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); 11429 DML_LOG_VERBOSE("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2); 11430 mode_lib->mp.PrefetchModeSupported = false; 11431 } 11432 11433 DML_LOG_VERBOSE("DML::%s: Prefetch schedule is %sOK at vstartup = %u\n", __func__, 11434 mode_lib->mp.PrefetchModeSupported ? "" : "NOT ", CalculatePrefetchSchedule_params->VStartup); 11435 11436 // Prefetch schedule OK, now check prefetch bw 11437 if (mode_lib->mp.PrefetchModeSupported == true) { 11438 for (k = 0; k < s->num_active_planes; ++k) { 11439 double line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / 11440 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); 11441 CalculateUrgentBurstFactor( 11442 &display_cfg->plane_descriptors[k], 11443 mode_lib->mp.swath_width_luma_ub[k], 11444 mode_lib->mp.swath_width_chroma_ub[k], 11445 mode_lib->mp.SwathHeightY[k], 11446 mode_lib->mp.SwathHeightC[k], 11447 line_time_us, 11448 mode_lib->mp.UrgentLatency, 11449 mode_lib->mp.VRatioPrefetchY[k], 11450 mode_lib->mp.VRatioPrefetchC[k], 11451 mode_lib->mp.BytePerPixelInDETY[k], 11452 mode_lib->mp.BytePerPixelInDETC[k], 11453 mode_lib->mp.DETBufferSizeY[k], 11454 mode_lib->mp.DETBufferSizeC[k], 11455 /* Output */ 11456 &mode_lib->mp.UrgentBurstFactorLumaPre[k], 11457 &mode_lib->mp.UrgentBurstFactorChromaPre[k], 11458 &mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]); 11459 11460 #ifdef __DML_VBA_DEBUG__ 11461 DML_LOG_VERBOSE("DML::%s: k=%0u DPPPerSurface=%u\n", __func__, k, mode_lib->mp.NoOfDPP[k]); 11462 DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorLuma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLuma[k]); 11463 DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorChroma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChroma[k]); 11464 DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorLumaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLumaPre[k]); 11465 DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorChromaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChromaPre[k]); 11466 11467 DML_LOG_VERBOSE("DML::%s: k=%0u VRatioPrefetchY=%f\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k]); 11468 DML_LOG_VERBOSE("DML::%s: k=%0u VRatioY=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); 11469 11470 DML_LOG_VERBOSE("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, mode_lib->mp.prefetch_vmrow_bw[k]); 11471 DML_LOG_VERBOSE("DML::%s: k=%0u vactive_sw_bw_l=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); 11472 DML_LOG_VERBOSE("DML::%s: k=%0u vactive_sw_bw_c=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); 11473 DML_LOG_VERBOSE("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, mode_lib->mp.cursor_bw[k]); 11474 DML_LOG_VERBOSE("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, mode_lib->mp.dpte_row_bw[k]); 11475 DML_LOG_VERBOSE("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, mode_lib->mp.meta_row_bw[k]); 11476 DML_LOG_VERBOSE("DML::%s: k=%0u RequiredPrefetchPixelDataBWLuma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]); 11477 DML_LOG_VERBOSE("DML::%s: k=%0u RequiredPrefetchPixelDataBWChroma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]); 11478 DML_LOG_VERBOSE("DML::%s: k=%0u prefetch_cursor_bw=%f\n", __func__, k, mode_lib->mp.prefetch_cursor_bw[k]); 11479 #endif 11480 } 11481 11482 for (k = 0; k <= s->num_active_planes - 1; k++) 11483 mode_lib->mp.final_flip_bw[k] = 0; 11484 11485 calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = mode_lib->mp.urg_vactive_bandwidth_required; 11486 calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->mp.urg_bandwidth_required; 11487 calculate_peak_bandwidth_params->urg_bandwidth_required_qual = mode_lib->mp.urg_bandwidth_required_qual; 11488 calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->mp.non_urg_bandwidth_required; 11489 calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw; 11490 calculate_peak_bandwidth_params->surface_peak_required_bw = s->surface_dummy_bw0; 11491 11492 calculate_peak_bandwidth_params->display_cfg = display_cfg; 11493 calculate_peak_bandwidth_params->inc_flip_bw = 0; 11494 calculate_peak_bandwidth_params->num_active_planes = s->num_active_planes; 11495 calculate_peak_bandwidth_params->num_of_dpp = mode_lib->mp.NoOfDPP; 11496 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0; 11497 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1; 11498 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0; 11499 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1; 11500 calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor; 11501 calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->mp.mall_prefetch_dram_overhead_factor; 11502 11503 calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.vactive_sw_bw_l; 11504 calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c; 11505 calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma; 11506 calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma; 11507 calculate_peak_bandwidth_params->prefetch_bandwidth_max = s->dummy_single_array[0]; 11508 calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l; 11509 calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c; 11510 calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw; 11511 calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->mp.dpte_row_bw; 11512 calculate_peak_bandwidth_params->meta_row_bw = mode_lib->mp.meta_row_bw; 11513 calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->mp.prefetch_cursor_bw; 11514 calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->mp.prefetch_vmrow_bw; 11515 calculate_peak_bandwidth_params->flip_bw = mode_lib->mp.final_flip_bw; 11516 calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->mp.UrgentBurstFactorLuma; 11517 calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->mp.UrgentBurstFactorChroma; 11518 calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->mp.UrgentBurstFactorCursor; 11519 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->mp.UrgentBurstFactorLumaPre; 11520 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->mp.UrgentBurstFactorChromaPre; 11521 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->mp.UrgentBurstFactorCursorPre; 11522 11523 calculate_peak_bandwidth_required( 11524 &mode_lib->scratch, 11525 calculate_peak_bandwidth_params); 11526 11527 // Check urg peak bandwidth against available urg bw 11528 // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active) 11529 check_urgent_bandwidth_support( 11530 &mode_lib->mp.FractionOfUrgentBandwidth, // double* frac_urg_bandwidth 11531 &mode_lib->mp.FractionOfUrgentBandwidthMALL, // double* frac_urg_bandwidth_mall 11532 &s->dummy_boolean[1], // vactive bw ok 11533 &mode_lib->mp.PrefetchModeSupported, // prefetch bw ok 11534 11535 mode_lib->soc.mall_allocated_for_dcn_mbytes, 11536 mode_lib->mp.non_urg_bandwidth_required, 11537 mode_lib->mp.urg_vactive_bandwidth_required, 11538 mode_lib->mp.urg_bandwidth_required, 11539 mode_lib->mp.urg_bandwidth_available); 11540 11541 if (!mode_lib->mp.PrefetchModeSupported) 11542 DML_LOG_VERBOSE("DML::%s: Bandwidth not sufficient for prefetch!\n", __func__); 11543 11544 for (k = 0; k < s->num_active_planes; ++k) { 11545 if (mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]) { 11546 DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughUrgentLatencyHidingPre = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]); 11547 mode_lib->mp.PrefetchModeSupported = false; 11548 } 11549 } 11550 } // prefetch schedule ok 11551 11552 // Prefetch schedule and prefetch bw ok, now check flip bw 11553 if (mode_lib->mp.PrefetchModeSupported == true) { // prefetch schedule and prefetch bw ok, now check flip bw 11554 11555 mode_lib->mp.BandwidthAvailableForImmediateFlip = 11556 get_bandwidth_available_for_immediate_flip( 11557 dml2_core_internal_soc_state_sys_active, 11558 mode_lib->mp.urg_bandwidth_required_qual, // no flip 11559 mode_lib->mp.urg_bandwidth_available); 11560 mode_lib->mp.TotImmediateFlipBytes = 0; 11561 for (k = 0; k < s->num_active_planes; ++k) { 11562 if (display_cfg->plane_descriptors[k].immediate_flip) { 11563 s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes(s->HostVMInefficiencyFactor, 11564 mode_lib->mp.vm_bytes[k], 11565 mode_lib->mp.PixelPTEBytesPerRow[k], 11566 mode_lib->mp.meta_row_bytes[k]); 11567 } else { 11568 s->per_pipe_flip_bytes[k] = 0; 11569 } 11570 mode_lib->mp.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->mp.NoOfDPP[k]; 11571 #ifdef __DML_VBA_DEBUG__ 11572 DML_LOG_VERBOSE("DML::%s: k = %u\n", __func__, k); 11573 DML_LOG_VERBOSE("DML::%s: DPPPerSurface = %u\n", __func__, mode_lib->mp.NoOfDPP[k]); 11574 DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, mode_lib->mp.vm_bytes[k]); 11575 DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, mode_lib->mp.PixelPTEBytesPerRow[k]); 11576 DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %u\n", __func__, mode_lib->mp.meta_row_bytes[k]); 11577 DML_LOG_VERBOSE("DML::%s: TotImmediateFlipBytes = %u\n", __func__, mode_lib->mp.TotImmediateFlipBytes); 11578 #endif 11579 } 11580 for (k = 0; k < s->num_active_planes; ++k) { 11581 CalculateFlipSchedule( 11582 &mode_lib->scratch, 11583 display_cfg->plane_descriptors[k].immediate_flip, 11584 0, // use_lb_flip_bw 11585 s->HostVMInefficiencyFactor, 11586 s->Tvm_trips_flip[k], 11587 s->Tr0_trips_flip[k], 11588 s->Tvm_trips_flip_rounded[k], 11589 s->Tr0_trips_flip_rounded[k], 11590 display_cfg->gpuvm_enable, 11591 mode_lib->mp.vm_bytes[k], 11592 mode_lib->mp.PixelPTEBytesPerRow[k], 11593 mode_lib->mp.BandwidthAvailableForImmediateFlip, 11594 mode_lib->mp.TotImmediateFlipBytes, 11595 display_cfg->plane_descriptors[k].pixel_format, 11596 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), 11597 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, 11598 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, 11599 mode_lib->mp.Tno_bw[k], 11600 mode_lib->mp.dpte_row_height[k], 11601 mode_lib->mp.dpte_row_height_chroma[k], 11602 mode_lib->mp.use_one_row_for_frame_flip[k], 11603 mode_lib->ip.max_flip_time_us, 11604 mode_lib->ip.max_flip_time_lines, 11605 s->per_pipe_flip_bytes[k], 11606 mode_lib->mp.meta_row_bytes[k], 11607 mode_lib->mp.meta_row_height[k], 11608 mode_lib->mp.meta_row_height_chroma[k], 11609 mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable, 11610 11611 // Output 11612 &mode_lib->mp.dst_y_per_vm_flip[k], 11613 &mode_lib->mp.dst_y_per_row_flip[k], 11614 &mode_lib->mp.final_flip_bw[k], 11615 &mode_lib->mp.ImmediateFlipSupportedForPipe[k]); 11616 } 11617 11618 calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw; 11619 calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->mp.urg_bandwidth_required_flip; 11620 calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw; 11621 calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->mp.non_urg_bandwidth_required_flip; 11622 calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw; 11623 calculate_peak_bandwidth_params->surface_peak_required_bw = s->surface_dummy_bw0; 11624 11625 calculate_peak_bandwidth_params->display_cfg = display_cfg; 11626 calculate_peak_bandwidth_params->inc_flip_bw = 1; 11627 calculate_peak_bandwidth_params->num_active_planes = s->num_active_planes; 11628 calculate_peak_bandwidth_params->num_of_dpp = mode_lib->mp.NoOfDPP; 11629 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0; 11630 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1; 11631 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0; 11632 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1; 11633 calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor; 11634 calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->mp.mall_prefetch_dram_overhead_factor; 11635 11636 calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.vactive_sw_bw_l; 11637 calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c; 11638 calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma; 11639 calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma; 11640 calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l; 11641 calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c; 11642 calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw; 11643 calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->mp.dpte_row_bw; 11644 calculate_peak_bandwidth_params->meta_row_bw = mode_lib->mp.meta_row_bw; 11645 calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->mp.prefetch_cursor_bw; 11646 calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->mp.prefetch_vmrow_bw; 11647 calculate_peak_bandwidth_params->prefetch_bandwidth_max = s->dummy_single_array[0]; 11648 calculate_peak_bandwidth_params->flip_bw = mode_lib->mp.final_flip_bw; 11649 calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->mp.UrgentBurstFactorLuma; 11650 calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->mp.UrgentBurstFactorChroma; 11651 calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->mp.UrgentBurstFactorCursor; 11652 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->mp.UrgentBurstFactorLumaPre; 11653 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->mp.UrgentBurstFactorChromaPre; 11654 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->mp.UrgentBurstFactorCursorPre; 11655 11656 calculate_peak_bandwidth_required( 11657 &mode_lib->scratch, 11658 calculate_peak_bandwidth_params); 11659 11660 calculate_immediate_flip_bandwidth_support( 11661 &mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip, // double* frac_urg_bandwidth_flip 11662 &mode_lib->mp.ImmediateFlipSupported, // bool* flip_bandwidth_support_ok 11663 11664 dml2_core_internal_soc_state_sys_active, 11665 mode_lib->mp.urg_bandwidth_required_flip, 11666 mode_lib->mp.non_urg_bandwidth_required_flip, 11667 mode_lib->mp.urg_bandwidth_available); 11668 11669 if (!mode_lib->mp.ImmediateFlipSupported) 11670 DML_LOG_VERBOSE("DML::%s: Bandwidth not sufficient for flip!", __func__); 11671 11672 for (k = 0; k < s->num_active_planes; ++k) { 11673 if (display_cfg->plane_descriptors[k].immediate_flip && mode_lib->mp.ImmediateFlipSupportedForPipe[k] == false) { 11674 mode_lib->mp.ImmediateFlipSupported = false; 11675 #ifdef __DML_VBA_DEBUG__ 11676 DML_LOG_VERBOSE("DML::%s: Pipe %0d not supporting iflip!\n", __func__, k); 11677 #endif 11678 } 11679 } 11680 } else { // flip or prefetch not support 11681 mode_lib->mp.ImmediateFlipSupported = false; 11682 } 11683 11684 // consider flip support is okay if the flip bw is ok or (when user does't require a iflip and there is no host vm) 11685 must_support_iflip = display_cfg->hostvm_enable || s->immediate_flip_required; 11686 mode_lib->mp.PrefetchAndImmediateFlipSupported = (mode_lib->mp.PrefetchModeSupported == true && (!must_support_iflip || mode_lib->mp.ImmediateFlipSupported)); 11687 11688 #ifdef __DML_VBA_DEBUG__ 11689 DML_LOG_VERBOSE("DML::%s: PrefetchModeSupported = %u\n", __func__, mode_lib->mp.PrefetchModeSupported); 11690 for (k = 0; k < s->num_active_planes; ++k) 11691 DML_LOG_VERBOSE("DML::%s: immediate_flip_required[%u] = %u\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip); 11692 DML_LOG_VERBOSE("DML::%s: HostVMEnable = %u\n", __func__, display_cfg->hostvm_enable); 11693 DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupported = %u\n", __func__, mode_lib->mp.ImmediateFlipSupported); 11694 DML_LOG_VERBOSE("DML::%s: PrefetchAndImmediateFlipSupported = %u\n", __func__, mode_lib->mp.PrefetchAndImmediateFlipSupported); 11695 #endif 11696 DML_LOG_VERBOSE("DML::%s: Done one iteration: k=%d, MaxVStartupLines=%u\n", __func__, k, s->MaxVStartupLines[k]); 11697 } 11698 11699 for (k = 0; k < s->num_active_planes; ++k) 11700 DML_LOG_VERBOSE("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); 11701 11702 if (!mode_lib->mp.PrefetchAndImmediateFlipSupported) { 11703 DML_LOG_VERBOSE("DML::%s: Bad, Prefetch and flip scheduling solution NOT found!\n", __func__); 11704 } else { 11705 DML_LOG_VERBOSE("DML::%s: Good, Prefetch and flip scheduling solution found\n", __func__); 11706 11707 // DCC Configuration 11708 for (k = 0; k < s->num_active_planes; ++k) { 11709 #ifdef __DML_VBA_DEBUG__ 11710 DML_LOG_VERBOSE("DML::%s: Calculate DCC configuration for surface k=%u\n", __func__, k); 11711 #endif 11712 CalculateDCCConfiguration( 11713 display_cfg->plane_descriptors[k].surface.dcc.enable, 11714 display_cfg->overrides.dcc_programming_assumes_scan_direction_unknown, 11715 display_cfg->plane_descriptors[k].pixel_format, 11716 display_cfg->plane_descriptors[k].surface.plane0.width, 11717 display_cfg->plane_descriptors[k].surface.plane1.width, 11718 display_cfg->plane_descriptors[k].surface.plane0.height, 11719 display_cfg->plane_descriptors[k].surface.plane1.height, 11720 s->NomDETInKByte, 11721 mode_lib->mp.Read256BlockHeightY[k], 11722 mode_lib->mp.Read256BlockHeightC[k], 11723 display_cfg->plane_descriptors[k].surface.tiling, 11724 mode_lib->mp.BytePerPixelY[k], 11725 mode_lib->mp.BytePerPixelC[k], 11726 mode_lib->mp.BytePerPixelInDETY[k], 11727 mode_lib->mp.BytePerPixelInDETC[k], 11728 display_cfg->plane_descriptors[k].composition.rotation_angle, 11729 11730 /* Output */ 11731 &mode_lib->mp.RequestLuma[k], 11732 &mode_lib->mp.RequestChroma[k], 11733 &mode_lib->mp.DCCYMaxUncompressedBlock[k], 11734 &mode_lib->mp.DCCCMaxUncompressedBlock[k], 11735 &mode_lib->mp.DCCYMaxCompressedBlock[k], 11736 &mode_lib->mp.DCCCMaxCompressedBlock[k], 11737 &mode_lib->mp.DCCYIndependentBlock[k], 11738 &mode_lib->mp.DCCCIndependentBlock[k]); 11739 } 11740 11741 //Watermarks and NB P-State/DRAM Clock Change Support 11742 s->mmSOCParameters.UrgentLatency = mode_lib->mp.UrgentLatency; 11743 s->mmSOCParameters.ExtraLatency = mode_lib->mp.ExtraLatency; 11744 s->mmSOCParameters.ExtraLatency_sr = mode_lib->mp.ExtraLatency_sr; 11745 s->mmSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us; 11746 s->mmSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us; 11747 s->mmSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us; 11748 s->mmSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us; 11749 s->mmSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us; 11750 s->mmSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us; 11751 s->mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us; 11752 s->mmSOCParameters.USRRetrainingLatency = 0; 11753 s->mmSOCParameters.SMNLatency = 0; 11754 s->mmSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index); 11755 s->mmSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->mp.uclk_freq_mhz, mode_lib->mp.FabricClock, in_out_params->min_clk_index); 11756 s->mmSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->mp.FabricClock; 11757 s->mmSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type; 11758 11759 CalculateWatermarks_params->display_cfg = display_cfg; 11760 CalculateWatermarks_params->USRRetrainingRequired = false; 11761 CalculateWatermarks_params->NumberOfActiveSurfaces = s->num_active_planes; 11762 CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines; 11763 CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits; 11764 CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes; 11765 CalculateWatermarks_params->DCFCLK = mode_lib->mp.Dcfclk; 11766 CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings; 11767 CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change; 11768 CalculateWatermarks_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes; 11769 CalculateWatermarks_params->mmSOCParameters = s->mmSOCParameters; 11770 CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes; 11771 CalculateWatermarks_params->SOCCLK = s->SOCCLK; 11772 CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->mp.DCFCLKDeepSleep; 11773 CalculateWatermarks_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY; 11774 CalculateWatermarks_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC; 11775 CalculateWatermarks_params->SwathHeightY = mode_lib->mp.SwathHeightY; 11776 CalculateWatermarks_params->SwathHeightC = mode_lib->mp.SwathHeightC; 11777 CalculateWatermarks_params->SwathWidthY = mode_lib->mp.SwathWidthY; 11778 CalculateWatermarks_params->SwathWidthC = mode_lib->mp.SwathWidthC; 11779 CalculateWatermarks_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY; 11780 CalculateWatermarks_params->BytePerPixelDETC = mode_lib->mp.BytePerPixelInDETC; 11781 CalculateWatermarks_params->DSTXAfterScaler = mode_lib->mp.DSTXAfterScaler; 11782 CalculateWatermarks_params->DSTYAfterScaler = mode_lib->mp.DSTYAfterScaler; 11783 CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->mp.UnboundedRequestEnabled; 11784 CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->mp.CompressedBufferSizeInkByte; 11785 CalculateWatermarks_params->meta_row_height_l = mode_lib->mp.meta_row_height; 11786 CalculateWatermarks_params->meta_row_height_c = mode_lib->mp.meta_row_height_chroma; 11787 CalculateWatermarks_params->DPPPerSurface = mode_lib->mp.NoOfDPP; 11788 11789 // Output 11790 CalculateWatermarks_params->Watermark = &mode_lib->mp.Watermark; 11791 CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->mp.DRAMClockChangeSupport; 11792 CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->mp.global_dram_clock_change_supported; 11793 CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported; 11794 CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->mp.SubViewportLinesNeededInMALL; 11795 CalculateWatermarks_params->FCLKChangeSupport = mode_lib->mp.FCLKChangeSupport; 11796 CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->mp.global_fclk_change_supported; 11797 CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &mode_lib->mp.MaxActiveFCLKChangeLatencySupported; 11798 CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->mp.USRRetrainingSupport; 11799 CalculateWatermarks_params->g6_temp_read_support = &mode_lib->mp.g6_temp_read_support; 11800 CalculateWatermarks_params->VActiveLatencyHidingMargin = 0; 11801 CalculateWatermarks_params->VActiveLatencyHidingUs = 0; 11802 11803 CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params); 11804 11805 for (k = 0; k < s->num_active_planes; ++k) { 11806 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { 11807 mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / 11808 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark); 11809 mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / 11810 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackFCLKChangeWatermark); 11811 } else { 11812 mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = 0; 11813 mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = 0; 11814 } 11815 } 11816 11817 calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->mp.Watermark, mode_lib->mp.pstate_keepout_dst_lines); 11818 11819 DML_LOG_VERBOSE("DML::%s: DEBUG stream_index = %0d\n", __func__, display_cfg->plane_descriptors[0].stream_index); 11820 DML_LOG_VERBOSE("DML::%s: DEBUG PixelClock = %ld kHz\n", __func__, (display_cfg->stream_descriptors[display_cfg->plane_descriptors[0].stream_index].timing.pixel_clock_khz)); 11821 11822 //Display Pipeline Delivery Time in Prefetch, Groups 11823 CalculatePixelDeliveryTimes( 11824 display_cfg, 11825 cfg_support_info, 11826 s->num_active_planes, 11827 mode_lib->mp.VRatioPrefetchY, 11828 mode_lib->mp.VRatioPrefetchC, 11829 mode_lib->mp.swath_width_luma_ub, 11830 mode_lib->mp.swath_width_chroma_ub, 11831 mode_lib->mp.PSCL_THROUGHPUT, 11832 mode_lib->mp.PSCL_THROUGHPUT_CHROMA, 11833 mode_lib->mp.Dppclk, 11834 mode_lib->mp.BytePerPixelC, 11835 mode_lib->mp.req_per_swath_ub_l, 11836 mode_lib->mp.req_per_swath_ub_c, 11837 11838 /* Output */ 11839 mode_lib->mp.DisplayPipeLineDeliveryTimeLuma, 11840 mode_lib->mp.DisplayPipeLineDeliveryTimeChroma, 11841 mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch, 11842 mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch, 11843 mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma, 11844 mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma, 11845 mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch, 11846 mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch); 11847 11848 CalculateMetaAndPTETimes_params->scratch = &mode_lib->scratch; 11849 CalculateMetaAndPTETimes_params->display_cfg = display_cfg; 11850 CalculateMetaAndPTETimes_params->NumberOfActiveSurfaces = s->num_active_planes; 11851 CalculateMetaAndPTETimes_params->use_one_row_for_frame = mode_lib->mp.use_one_row_for_frame; 11852 CalculateMetaAndPTETimes_params->dst_y_per_row_vblank = mode_lib->mp.dst_y_per_row_vblank; 11853 CalculateMetaAndPTETimes_params->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip; 11854 CalculateMetaAndPTETimes_params->BytePerPixelY = mode_lib->mp.BytePerPixelY; 11855 CalculateMetaAndPTETimes_params->BytePerPixelC = mode_lib->mp.BytePerPixelC; 11856 CalculateMetaAndPTETimes_params->dpte_row_height = mode_lib->mp.dpte_row_height; 11857 CalculateMetaAndPTETimes_params->dpte_row_height_chroma = mode_lib->mp.dpte_row_height_chroma; 11858 CalculateMetaAndPTETimes_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes; 11859 CalculateMetaAndPTETimes_params->PTERequestSizeY = mode_lib->mp.PTERequestSizeY; 11860 CalculateMetaAndPTETimes_params->PTERequestSizeC = mode_lib->mp.PTERequestSizeC; 11861 CalculateMetaAndPTETimes_params->PixelPTEReqWidthY = mode_lib->mp.PixelPTEReqWidthY; 11862 CalculateMetaAndPTETimes_params->PixelPTEReqHeightY = mode_lib->mp.PixelPTEReqHeightY; 11863 CalculateMetaAndPTETimes_params->PixelPTEReqWidthC = mode_lib->mp.PixelPTEReqWidthC; 11864 CalculateMetaAndPTETimes_params->PixelPTEReqHeightC = mode_lib->mp.PixelPTEReqHeightC; 11865 CalculateMetaAndPTETimes_params->dpte_row_width_luma_ub = mode_lib->mp.dpte_row_width_luma_ub; 11866 CalculateMetaAndPTETimes_params->dpte_row_width_chroma_ub = mode_lib->mp.dpte_row_width_chroma_ub; 11867 CalculateMetaAndPTETimes_params->tdlut_groups_per_2row_ub = s->tdlut_groups_per_2row_ub; 11868 CalculateMetaAndPTETimes_params->mrq_present = mode_lib->ip.dcn_mrq_present; 11869 11870 CalculateMetaAndPTETimes_params->MetaChunkSize = mode_lib->ip.meta_chunk_size_kbytes; 11871 CalculateMetaAndPTETimes_params->MinMetaChunkSizeBytes = mode_lib->ip.min_meta_chunk_size_bytes; 11872 CalculateMetaAndPTETimes_params->meta_row_width = mode_lib->mp.meta_row_width; 11873 CalculateMetaAndPTETimes_params->meta_row_width_chroma = mode_lib->mp.meta_row_width_chroma; 11874 CalculateMetaAndPTETimes_params->meta_row_height = mode_lib->mp.meta_row_height; 11875 CalculateMetaAndPTETimes_params->meta_row_height_chroma = mode_lib->mp.meta_row_height_chroma; 11876 CalculateMetaAndPTETimes_params->meta_req_width = mode_lib->mp.meta_req_width; 11877 CalculateMetaAndPTETimes_params->meta_req_width_chroma = mode_lib->mp.meta_req_width_chroma; 11878 CalculateMetaAndPTETimes_params->meta_req_height = mode_lib->mp.meta_req_height; 11879 CalculateMetaAndPTETimes_params->meta_req_height_chroma = mode_lib->mp.meta_req_height_chroma; 11880 11881 CalculateMetaAndPTETimes_params->time_per_tdlut_group = mode_lib->mp.time_per_tdlut_group; 11882 CalculateMetaAndPTETimes_params->DST_Y_PER_PTE_ROW_NOM_L = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L; 11883 CalculateMetaAndPTETimes_params->DST_Y_PER_PTE_ROW_NOM_C = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C; 11884 CalculateMetaAndPTETimes_params->time_per_pte_group_nom_luma = mode_lib->mp.time_per_pte_group_nom_luma; 11885 CalculateMetaAndPTETimes_params->time_per_pte_group_vblank_luma = mode_lib->mp.time_per_pte_group_vblank_luma; 11886 CalculateMetaAndPTETimes_params->time_per_pte_group_flip_luma = mode_lib->mp.time_per_pte_group_flip_luma; 11887 CalculateMetaAndPTETimes_params->time_per_pte_group_nom_chroma = mode_lib->mp.time_per_pte_group_nom_chroma; 11888 CalculateMetaAndPTETimes_params->time_per_pte_group_vblank_chroma = mode_lib->mp.time_per_pte_group_vblank_chroma; 11889 CalculateMetaAndPTETimes_params->time_per_pte_group_flip_chroma = mode_lib->mp.time_per_pte_group_flip_chroma; 11890 CalculateMetaAndPTETimes_params->DST_Y_PER_META_ROW_NOM_L = mode_lib->mp.DST_Y_PER_META_ROW_NOM_L; 11891 CalculateMetaAndPTETimes_params->DST_Y_PER_META_ROW_NOM_C = mode_lib->mp.DST_Y_PER_META_ROW_NOM_C; 11892 CalculateMetaAndPTETimes_params->TimePerMetaChunkNominal = mode_lib->mp.TimePerMetaChunkNominal; 11893 CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkNominal = mode_lib->mp.TimePerChromaMetaChunkNominal; 11894 CalculateMetaAndPTETimes_params->TimePerMetaChunkVBlank = mode_lib->mp.TimePerMetaChunkVBlank; 11895 CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkVBlank = mode_lib->mp.TimePerChromaMetaChunkVBlank; 11896 CalculateMetaAndPTETimes_params->TimePerMetaChunkFlip = mode_lib->mp.TimePerMetaChunkFlip; 11897 CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkFlip = mode_lib->mp.TimePerChromaMetaChunkFlip; 11898 11899 CalculateMetaAndPTETimes(CalculateMetaAndPTETimes_params); 11900 11901 CalculateVMGroupAndRequestTimes( 11902 display_cfg, 11903 s->num_active_planes, 11904 mode_lib->mp.BytePerPixelC, 11905 mode_lib->mp.dst_y_per_vm_vblank, 11906 mode_lib->mp.dst_y_per_vm_flip, 11907 mode_lib->mp.dpte_row_width_luma_ub, 11908 mode_lib->mp.dpte_row_width_chroma_ub, 11909 mode_lib->mp.vm_group_bytes, 11910 mode_lib->mp.dpde0_bytes_per_frame_ub_l, 11911 mode_lib->mp.dpde0_bytes_per_frame_ub_c, 11912 s->tdlut_pte_bytes_per_frame, 11913 mode_lib->mp.meta_pte_bytes_per_frame_ub_l, 11914 mode_lib->mp.meta_pte_bytes_per_frame_ub_c, 11915 mode_lib->ip.dcn_mrq_present, 11916 11917 /* Output */ 11918 mode_lib->mp.TimePerVMGroupVBlank, 11919 mode_lib->mp.TimePerVMGroupFlip, 11920 mode_lib->mp.TimePerVMRequestVBlank, 11921 mode_lib->mp.TimePerVMRequestFlip); 11922 11923 // VStartup Adjustment 11924 for (k = 0; k < s->num_active_planes; ++k) { 11925 bool isInterlaceTiming; 11926 11927 mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TWait[k] + mode_lib->mp.ExtraLatency; 11928 if (!display_cfg->plane_descriptors[k].dynamic_meta_data.enable) 11929 mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TCalc + mode_lib->mp.MinTTUVBlank[k]; 11930 11931 #ifdef __DML_VBA_DEBUG__ 11932 DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]); 11933 #endif 11934 s->Tvstartup_margin = (s->MaxVStartupLines[k] - mode_lib->mp.VStartupMin[k]) * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); 11935 mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.MinTTUVBlank[k] + s->Tvstartup_margin; 11936 11937 #ifdef __DML_VBA_DEBUG__ 11938 DML_LOG_VERBOSE("DML::%s: k=%u, Tvstartup_margin = %f\n", __func__, k, s->Tvstartup_margin); 11939 DML_LOG_VERBOSE("DML::%s: k=%u, MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); 11940 DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlank = %f\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]); 11941 #endif 11942 11943 mode_lib->mp.Tdmdl[k] = mode_lib->mp.Tdmdl[k] + s->Tvstartup_margin; 11944 if (display_cfg->plane_descriptors[k].dynamic_meta_data.enable && mode_lib->ip.dynamic_metadata_vm_enabled) { 11945 mode_lib->mp.Tdmdl_vm[k] = mode_lib->mp.Tdmdl_vm[k] + s->Tvstartup_margin; 11946 } 11947 11948 isInterlaceTiming = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !mode_lib->ip.ptoi_supported); 11949 11950 // The actual positioning of the vstartup 11951 mode_lib->mp.VStartup[k] = (isInterlaceTiming ? (2 * s->MaxVStartupLines[k]) : s->MaxVStartupLines[k]); 11952 11953 s->dlg_vblank_start = ((isInterlaceTiming ? math_floor2((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch) / 2.0, 1.0) : 11954 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total) - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch); 11955 s->LSetup = math_floor2(4.0 * mode_lib->mp.TSetup[k] / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)), 1.0) / 4.0; 11956 s->blank_lines_remaining = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active) - mode_lib->mp.VStartup[k]; 11957 11958 if (s->blank_lines_remaining < 0) { 11959 DML_LOG_VERBOSE("ERROR: Vstartup is larger than vblank!?\n"); 11960 s->blank_lines_remaining = 0; 11961 DML_ASSERT(0); 11962 } 11963 mode_lib->mp.MIN_DST_Y_NEXT_START[k] = s->dlg_vblank_start + s->blank_lines_remaining + s->LSetup; 11964 11965 // debug only 11966 if (((mode_lib->mp.VUpdateOffsetPix[k] + mode_lib->mp.VUpdateWidthPix[k] + mode_lib->mp.VReadyOffsetPix[k]) / (double) display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) <= 11967 (isInterlaceTiming ? 11968 math_floor2((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch - mode_lib->mp.VStartup[k]) / 2.0, 1.0) : 11969 (int)(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch - mode_lib->mp.VStartup[k]))) { 11970 mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = true; 11971 } else { 11972 mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = false; 11973 } 11974 #ifdef __DML_VBA_DEBUG__ 11975 DML_LOG_VERBOSE("DML::%s: k=%u, VStartup = %u (max)\n", __func__, k, mode_lib->mp.VStartup[k]); 11976 DML_LOG_VERBOSE("DML::%s: k=%u, VStartupMin = %u (max)\n", __func__, k, mode_lib->mp.VStartupMin[k]); 11977 DML_LOG_VERBOSE("DML::%s: k=%u, VUpdateOffsetPix = %u\n", __func__, k, mode_lib->mp.VUpdateOffsetPix[k]); 11978 DML_LOG_VERBOSE("DML::%s: k=%u, VUpdateWidthPix = %u\n", __func__, k, mode_lib->mp.VUpdateWidthPix[k]); 11979 DML_LOG_VERBOSE("DML::%s: k=%u, VReadyOffsetPix = %u\n", __func__, k, mode_lib->mp.VReadyOffsetPix[k]); 11980 DML_LOG_VERBOSE("DML::%s: k=%u, HTotal = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total); 11981 DML_LOG_VERBOSE("DML::%s: k=%u, VTotal = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total); 11982 DML_LOG_VERBOSE("DML::%s: k=%u, VActive = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active); 11983 DML_LOG_VERBOSE("DML::%s: k=%u, VFrontPorch = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch); 11984 DML_LOG_VERBOSE("DML::%s: k=%u, TSetup = %f\n", __func__, k, mode_lib->mp.TSetup[k]); 11985 DML_LOG_VERBOSE("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f\n", __func__, k, mode_lib->mp.MIN_DST_Y_NEXT_START[k]); 11986 DML_LOG_VERBOSE("DML::%s: k=%u, VREADY_AT_OR_AFTER_VSYNC = %u\n", __func__, k, mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k]); 11987 #endif 11988 } 11989 11990 //Maximum Bandwidth Used 11991 mode_lib->mp.TotalWRBandwidth = 0; 11992 for (k = 0; k < display_cfg->num_streams; ++k) { 11993 s->WRBandwidth = 0; 11994 if (display_cfg->stream_descriptors[k].writeback.active_writebacks_per_stream > 0) { 11995 s->WRBandwidth = display_cfg->stream_descriptors[k].writeback.writeback_stream[0].output_height 11996 * display_cfg->stream_descriptors[k].writeback.writeback_stream[0].output_width / 11997 (display_cfg->stream_descriptors[k].timing.h_total * display_cfg->stream_descriptors[k].writeback.writeback_stream[0].input_height 11998 / ((double)display_cfg->stream_descriptors[k].timing.pixel_clock_khz / 1000)) 11999 * (display_cfg->stream_descriptors[k].writeback.writeback_stream[0].pixel_format == dml2_444_32 ? 4.0 : 8.0); 12000 mode_lib->mp.TotalWRBandwidth = mode_lib->mp.TotalWRBandwidth + s->WRBandwidth; 12001 } 12002 } 12003 12004 mode_lib->mp.TotalDataReadBandwidth = 0; 12005 for (k = 0; k < s->num_active_planes; ++k) { 12006 mode_lib->mp.TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth + mode_lib->mp.vactive_sw_bw_l[k] + mode_lib->mp.vactive_sw_bw_c[k]; 12007 #ifdef __DML_VBA_DEBUG__ 12008 DML_LOG_VERBOSE("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, mode_lib->mp.TotalDataReadBandwidth); 12009 DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); 12010 DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); 12011 #endif 12012 } 12013 12014 CalculateStutterEfficiency_params->display_cfg = display_cfg; 12015 CalculateStutterEfficiency_params->CompressedBufferSizeInkByte = mode_lib->mp.CompressedBufferSizeInkByte; 12016 CalculateStutterEfficiency_params->UnboundedRequestEnabled = mode_lib->mp.UnboundedRequestEnabled; 12017 CalculateStutterEfficiency_params->MetaFIFOSizeInKEntries = mode_lib->ip.meta_fifo_size_in_kentries; 12018 CalculateStutterEfficiency_params->ZeroSizeBufferEntries = mode_lib->ip.zero_size_buffer_entries; 12019 CalculateStutterEfficiency_params->PixelChunkSizeInKByte = mode_lib->ip.pixel_chunk_size_kbytes; 12020 CalculateStutterEfficiency_params->NumberOfActiveSurfaces = s->num_active_planes; 12021 CalculateStutterEfficiency_params->ROBBufferSizeInKByte = mode_lib->ip.rob_buffer_size_kbytes; 12022 CalculateStutterEfficiency_params->TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth; 12023 CalculateStutterEfficiency_params->DCFCLK = mode_lib->mp.Dcfclk; 12024 CalculateStutterEfficiency_params->ReturnBW = mode_lib->mp.urg_bandwidth_available_min[dml2_core_internal_soc_state_sys_active]; 12025 CalculateStutterEfficiency_params->CompbufReservedSpace64B = mode_lib->mp.compbuf_reserved_space_64b; 12026 CalculateStutterEfficiency_params->CompbufReservedSpaceZs = mode_lib->ip.compbuf_reserved_space_zs; 12027 CalculateStutterEfficiency_params->SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us; 12028 CalculateStutterEfficiency_params->SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us; 12029 CalculateStutterEfficiency_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings; 12030 CalculateStutterEfficiency_params->StutterEnterPlusExitWatermark = mode_lib->mp.Watermark.StutterEnterPlusExitWatermark; 12031 CalculateStutterEfficiency_params->Z8StutterEnterPlusExitWatermark = mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark; 12032 CalculateStutterEfficiency_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; 12033 CalculateStutterEfficiency_params->MinTTUVBlank = mode_lib->mp.MinTTUVBlank; 12034 CalculateStutterEfficiency_params->DPPPerSurface = mode_lib->mp.NoOfDPP; 12035 CalculateStutterEfficiency_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY; 12036 CalculateStutterEfficiency_params->BytePerPixelY = mode_lib->mp.BytePerPixelY; 12037 CalculateStutterEfficiency_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY; 12038 CalculateStutterEfficiency_params->SwathWidthY = mode_lib->mp.SwathWidthY; 12039 CalculateStutterEfficiency_params->SwathHeightY = mode_lib->mp.SwathHeightY; 12040 CalculateStutterEfficiency_params->SwathHeightC = mode_lib->mp.SwathHeightC; 12041 CalculateStutterEfficiency_params->BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY; 12042 CalculateStutterEfficiency_params->BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY; 12043 CalculateStutterEfficiency_params->BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC; 12044 CalculateStutterEfficiency_params->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC; 12045 CalculateStutterEfficiency_params->DCCYMaxUncompressedBlock = mode_lib->mp.DCCYMaxUncompressedBlock; 12046 CalculateStutterEfficiency_params->DCCCMaxUncompressedBlock = mode_lib->mp.DCCCMaxUncompressedBlock; 12047 CalculateStutterEfficiency_params->ReadBandwidthSurfaceLuma = mode_lib->mp.vactive_sw_bw_l; 12048 CalculateStutterEfficiency_params->ReadBandwidthSurfaceChroma = mode_lib->mp.vactive_sw_bw_c; 12049 CalculateStutterEfficiency_params->dpte_row_bw = mode_lib->mp.dpte_row_bw; 12050 CalculateStutterEfficiency_params->meta_row_bw = mode_lib->mp.meta_row_bw; 12051 CalculateStutterEfficiency_params->rob_alloc_compressed = mode_lib->ip.dcn_mrq_present; 12052 12053 // output 12054 CalculateStutterEfficiency_params->StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.StutterEfficiencyNotIncludingVBlank; 12055 CalculateStutterEfficiency_params->StutterEfficiency = &mode_lib->mp.StutterEfficiency; 12056 CalculateStutterEfficiency_params->NumberOfStutterBurstsPerFrame = &mode_lib->mp.NumberOfStutterBurstsPerFrame; 12057 CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank; 12058 CalculateStutterEfficiency_params->Z8StutterEfficiency = &mode_lib->mp.Z8StutterEfficiency; 12059 CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &mode_lib->mp.Z8NumberOfStutterBurstsPerFrame; 12060 CalculateStutterEfficiency_params->StutterPeriod = &mode_lib->mp.StutterPeriod; 12061 CalculateStutterEfficiency_params->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = &mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE; 12062 12063 // Stutter Efficiency 12064 CalculateStutterEfficiency(&mode_lib->scratch, CalculateStutterEfficiency_params); 12065 12066 #ifdef __DML_VBA_ALLOW_DELTA__ 12067 // Calculate z8 stutter eff assuming 0 reserved space 12068 CalculateStutterEfficiency_params->CompbufReservedSpace64B = 0; 12069 CalculateStutterEfficiency_params->CompbufReservedSpaceZs = 0; 12070 12071 CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlankBestCase; 12072 CalculateStutterEfficiency_params->Z8StutterEfficiency = &mode_lib->mp.Z8StutterEfficiencyBestCase; 12073 CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase; 12074 CalculateStutterEfficiency_params->StutterPeriod = &mode_lib->mp.StutterPeriodBestCase; 12075 12076 // Stutter Efficiency 12077 CalculateStutterEfficiency(&mode_lib->scratch, CalculateStutterEfficiency_params); 12078 #else 12079 mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlankBestCase = mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank; 12080 mode_lib->mp.Z8StutterEfficiencyBestCase = mode_lib->mp.Z8StutterEfficiency; 12081 mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase = mode_lib->mp.Z8NumberOfStutterBurstsPerFrame; 12082 mode_lib->mp.StutterPeriodBestCase = mode_lib->mp.StutterPeriod; 12083 #endif 12084 } // PrefetchAndImmediateFlipSupported 12085 12086 max_uclk_mhz = mode_lib->soc.clk_table.uclk.clk_values_khz[mode_lib->soc.clk_table.uclk.num_clk_values - 1] / 1000.0; 12087 min_return_latency_in_DCFCLK_cycles = (min_return_uclk_cycles / max_uclk_mhz + min_return_fclk_cycles / max_fclk_mhz) * hard_minimum_dcfclk_mhz; 12088 mode_lib->mp.min_return_latency_in_dcfclk = (unsigned int)min_return_latency_in_DCFCLK_cycles; 12089 mode_lib->mp.dcfclk_deep_sleep_hysteresis = (unsigned int)math_max2(32, (double)mode_lib->ip.pixel_chunk_size_kbytes * 1024 * 3 / 4 / 64 - min_return_latency_in_DCFCLK_cycles); 12090 DML_ASSERT(mode_lib->mp.dcfclk_deep_sleep_hysteresis < 256); 12091 12092 #ifdef __DML_VBA_DEBUG__ 12093 DML_LOG_VERBOSE("DML::%s: max_fclk_mhz = %f\n", __func__, max_fclk_mhz); 12094 DML_LOG_VERBOSE("DML::%s: max_uclk_mhz = %f\n", __func__, max_uclk_mhz); 12095 DML_LOG_VERBOSE("DML::%s: hard_minimum_dcfclk_mhz = %f\n", __func__, hard_minimum_dcfclk_mhz); 12096 DML_LOG_VERBOSE("DML::%s: min_return_uclk_cycles = %ld\n", __func__, min_return_uclk_cycles); 12097 DML_LOG_VERBOSE("DML::%s: min_return_fclk_cycles = %ld\n", __func__, min_return_fclk_cycles); 12098 DML_LOG_VERBOSE("DML::%s: min_return_latency_in_DCFCLK_cycles = %f\n", __func__, min_return_latency_in_DCFCLK_cycles); 12099 DML_LOG_VERBOSE("DML::%s: dcfclk_deep_sleep_hysteresis = %d \n", __func__, mode_lib->mp.dcfclk_deep_sleep_hysteresis); 12100 DML_LOG_VERBOSE("DML::%s: --- END --- \n", __func__); 12101 #endif 12102 return (in_out_params->mode_lib->mp.PrefetchAndImmediateFlipSupported); 12103 } 12104 12105 bool dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex *in_out_params) 12106 { 12107 DML_LOG_VERBOSE("DML::%s: ------------- START ----------\n", __func__); 12108 bool result = dml_core_mode_programming(in_out_params); 12109 12110 DML_LOG_VERBOSE("DML::%s: result = %0d\n", __func__, result); 12111 DML_LOG_VERBOSE("DML::%s: ------------- DONE ----------\n", __func__); 12112 return result; 12113 } 12114 12115 void dml2_core_calcs_get_dpte_row_height( 12116 unsigned int *dpte_row_height, 12117 struct dml2_core_internal_display_mode_lib *mode_lib, 12118 bool is_plane1, 12119 enum dml2_source_format_class SourcePixelFormat, 12120 enum dml2_swizzle_mode SurfaceTiling, 12121 enum dml2_rotation_angle ScanDirection, 12122 unsigned int pitch, 12123 unsigned int GPUVMMinPageSizeKBytes) 12124 { 12125 unsigned int BytePerPixelY; 12126 unsigned int BytePerPixelC; 12127 double BytePerPixelInDETY; 12128 double BytePerPixelInDETC; 12129 unsigned int BlockHeight256BytesY; 12130 unsigned int BlockHeight256BytesC; 12131 unsigned int BlockWidth256BytesY; 12132 unsigned int BlockWidth256BytesC; 12133 unsigned int MacroTileWidthY; 12134 unsigned int MacroTileWidthC; 12135 unsigned int MacroTileHeightY; 12136 unsigned int MacroTileHeightC; 12137 bool surf_linear_128_l = false; 12138 bool surf_linear_128_c = false; 12139 12140 CalculateBytePerPixelAndBlockSizes( 12141 SourcePixelFormat, 12142 SurfaceTiling, 12143 pitch, 12144 pitch, 12145 12146 /* Output */ 12147 &BytePerPixelY, 12148 &BytePerPixelC, 12149 &BytePerPixelInDETY, 12150 &BytePerPixelInDETC, 12151 &BlockHeight256BytesY, 12152 &BlockHeight256BytesC, 12153 &BlockWidth256BytesY, 12154 &BlockWidth256BytesC, 12155 &MacroTileHeightY, 12156 &MacroTileHeightC, 12157 &MacroTileWidthY, 12158 &MacroTileWidthC, 12159 &surf_linear_128_l, 12160 &surf_linear_128_c); 12161 12162 unsigned int BytePerPixel = is_plane1 ? BytePerPixelC : BytePerPixelY; 12163 unsigned int BlockHeight256Bytes = is_plane1 ? BlockHeight256BytesC : BlockHeight256BytesY; 12164 unsigned int BlockWidth256Bytes = is_plane1 ? BlockWidth256BytesC : BlockWidth256BytesY; 12165 unsigned int MacroTileWidth = is_plane1 ? MacroTileWidthC : MacroTileWidthY; 12166 unsigned int MacroTileHeight = is_plane1 ? MacroTileHeightC : MacroTileHeightY; 12167 unsigned int PTEBufferSizeInRequests = is_plane1 ? mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma : mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; 12168 #ifdef __DML_VBA_DEBUG__ 12169 DML_LOG_VERBOSE("DML: %s: is_plane1 = %u\n", __func__, is_plane1); 12170 DML_LOG_VERBOSE("DML: %s: BytePerPixel = %u\n", __func__, BytePerPixel); 12171 DML_LOG_VERBOSE("DML: %s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes); 12172 DML_LOG_VERBOSE("DML: %s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes); 12173 DML_LOG_VERBOSE("DML: %s: MacroTileWidth = %u\n", __func__, MacroTileWidth); 12174 DML_LOG_VERBOSE("DML: %s: MacroTileHeight = %u\n", __func__, MacroTileHeight); 12175 DML_LOG_VERBOSE("DML: %s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests); 12176 DML_LOG_VERBOSE("DML: %s: dpte_buffer_size_in_pte_reqs_luma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma); 12177 DML_LOG_VERBOSE("DML: %s: dpte_buffer_size_in_pte_reqs_chroma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma); 12178 DML_LOG_VERBOSE("DML: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes); 12179 #endif 12180 unsigned int dummy_integer[21]; 12181 12182 mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportStationary = 0; 12183 mode_lib->scratch.calculate_vm_and_row_bytes_params.DCCEnable = 0; 12184 mode_lib->scratch.calculate_vm_and_row_bytes_params.NumberOfDPPs = 1; 12185 mode_lib->scratch.calculate_vm_and_row_bytes_params.BlockHeight256Bytes = BlockHeight256Bytes; 12186 mode_lib->scratch.calculate_vm_and_row_bytes_params.BlockWidth256Bytes = BlockWidth256Bytes; 12187 mode_lib->scratch.calculate_vm_and_row_bytes_params.SourcePixelFormat = SourcePixelFormat; 12188 mode_lib->scratch.calculate_vm_and_row_bytes_params.SurfaceTiling = SurfaceTiling; 12189 mode_lib->scratch.calculate_vm_and_row_bytes_params.BytePerPixel = BytePerPixel; 12190 mode_lib->scratch.calculate_vm_and_row_bytes_params.RotationAngle = ScanDirection; 12191 mode_lib->scratch.calculate_vm_and_row_bytes_params.SwathWidth = 0; 12192 mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportHeight = 0; 12193 mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportXStart = 0; 12194 mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportYStart = 0; 12195 mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMEnable = 1; 12196 mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = 4; 12197 mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = GPUVMMinPageSizeKBytes; 12198 mode_lib->scratch.calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = PTEBufferSizeInRequests; 12199 mode_lib->scratch.calculate_vm_and_row_bytes_params.Pitch = pitch; 12200 mode_lib->scratch.calculate_vm_and_row_bytes_params.MacroTileWidth = MacroTileWidth; 12201 mode_lib->scratch.calculate_vm_and_row_bytes_params.MacroTileHeight = MacroTileHeight; 12202 mode_lib->scratch.calculate_vm_and_row_bytes_params.is_phantom = 0; 12203 mode_lib->scratch.calculate_vm_and_row_bytes_params.DCCMetaPitch = 0; 12204 mode_lib->scratch.calculate_vm_and_row_bytes_params.mrq_present = 0; 12205 12206 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &dummy_integer[1]; 12207 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &dummy_integer[2]; 12208 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_width_ub = &dummy_integer[3]; 12209 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height = dpte_row_height; 12210 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height_linear = &dummy_integer[4]; 12211 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &dummy_integer[5]; 12212 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &dummy_integer[6]; 12213 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &dummy_integer[7]; 12214 mode_lib->scratch.calculate_vm_and_row_bytes_params.vmpg_width = &dummy_integer[8]; 12215 mode_lib->scratch.calculate_vm_and_row_bytes_params.vmpg_height = &dummy_integer[9]; 12216 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &dummy_integer[11]; 12217 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &dummy_integer[12]; 12218 mode_lib->scratch.calculate_vm_and_row_bytes_params.PTERequestSize = &dummy_integer[13]; 12219 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &dummy_integer[14]; 12220 12221 mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_bytes = &dummy_integer[15]; 12222 mode_lib->scratch.calculate_vm_and_row_bytes_params.MetaRequestWidth = &dummy_integer[16]; 12223 mode_lib->scratch.calculate_vm_and_row_bytes_params.MetaRequestHeight = &dummy_integer[17]; 12224 mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_width = &dummy_integer[18]; 12225 mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_height = &dummy_integer[19]; 12226 mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &dummy_integer[20]; 12227 12228 // just supply with enough parameters to calculate dpte 12229 CalculateVMAndRowBytes(&mode_lib->scratch.calculate_vm_and_row_bytes_params); 12230 12231 #ifdef __DML_VBA_DEBUG__ 12232 DML_LOG_VERBOSE("DML: %s: dpte_row_height = %u\n", __func__, *dpte_row_height); 12233 #endif 12234 } 12235 12236 static bool is_dual_plane(enum dml2_source_format_class source_format) 12237 { 12238 bool ret_val = false; 12239 12240 if ((source_format == dml2_420_12) || (source_format == dml2_420_8) || (source_format == dml2_420_10) || (source_format == dml2_rgbe_alpha)) 12241 ret_val = true; 12242 12243 return ret_val; 12244 } 12245 12246 static unsigned int dml_get_plane_idx(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx) 12247 { 12248 unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; 12249 return plane_idx; 12250 } 12251 12252 static void rq_dlg_get_wm_regs(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *wm_regs) 12253 { 12254 double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; 12255 12256 wm_regs->fclk_pstate = (unsigned int)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz); 12257 wm_regs->sr_enter = (unsigned int)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz); 12258 wm_regs->sr_exit = (unsigned int)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz); 12259 wm_regs->sr_enter_z8 = (unsigned int)(mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark * refclk_freq_in_mhz); 12260 wm_regs->sr_exit_z8 = (unsigned int)(mode_lib->mp.Watermark.Z8StutterExitWatermark * refclk_freq_in_mhz); 12261 wm_regs->temp_read_or_ppt = (unsigned int)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz); 12262 wm_regs->uclk_pstate = (unsigned int)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz); 12263 wm_regs->urgent = (unsigned int)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); 12264 wm_regs->usr = (unsigned int)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz); 12265 wm_regs->refcyc_per_trip_to_mem = (unsigned int)(mode_lib->mp.UrgentLatency * refclk_freq_in_mhz); 12266 wm_regs->refcyc_per_meta_trip_to_mem = (unsigned int)(mode_lib->mp.MetaTripToMemory * refclk_freq_in_mhz); 12267 wm_regs->frac_urg_bw_flip = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip * 1000); 12268 wm_regs->frac_urg_bw_nom = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidth * 1000); 12269 wm_regs->frac_urg_bw_mall = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthMALL * 1000); 12270 } 12271 12272 static unsigned int log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend) 12273 { 12274 if (a == 0) 12275 return 0; 12276 12277 return (math_log2_approx(a) - subtrahend); 12278 } 12279 12280 void dml2_core_calcs_cursor_dlg_reg(struct dml2_cursor_dlg_regs *cursor_dlg_regs, const struct dml2_get_cursor_dlg_reg *p) 12281 { 12282 int dst_x_offset = (int) ((p->cursor_x_position + (p->cursor_stereo_en == 0 ? 0 : math_max2(p->cursor_primary_offset, p->cursor_secondary_offset)) - 12283 (p->cursor_hotspot_x * (p->cursor_2x_magnify == 0 ? 1 : 2))) * p->dlg_refclk_mhz / p->pixel_rate_mhz / p->hratio); 12284 cursor_dlg_regs->dst_x_offset = (unsigned int) ((dst_x_offset > 0) ? dst_x_offset : 0); 12285 12286 #ifdef __DML_VBA_DEBUG__ 12287 DML_LOG_VERBOSE("DML_DLG::%s: cursor_x_position=%d\n", __func__, p->cursor_x_position); 12288 DML_LOG_VERBOSE("DML_DLG::%s: dlg_refclk_mhz=%f\n", __func__, p->dlg_refclk_mhz); 12289 DML_LOG_VERBOSE("DML_DLG::%s: pixel_rate_mhz=%f\n", __func__, p->pixel_rate_mhz); 12290 DML_LOG_VERBOSE("DML_DLG::%s: dst_x_offset=%d\n", __func__, dst_x_offset); 12291 DML_LOG_VERBOSE("DML_DLG::%s: dst_x_offset=%d (reg)\n", __func__, cursor_dlg_regs->dst_x_offset); 12292 #endif 12293 12294 cursor_dlg_regs->chunk_hdl_adjust = 3; 12295 cursor_dlg_regs->dst_y_offset = 0; 12296 12297 cursor_dlg_regs->qos_level_fixed = 8; 12298 cursor_dlg_regs->qos_ramp_disable = 0; 12299 } 12300 12301 static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs, 12302 const struct dml2_display_cfg *display_cfg, 12303 const struct dml2_core_internal_display_mode_lib *mode_lib, 12304 unsigned int pipe_idx) 12305 { 12306 unsigned int plane_idx = dml_get_plane_idx(mode_lib, pipe_idx); 12307 enum dml2_source_format_class source_format = display_cfg->plane_descriptors[plane_idx].pixel_format; 12308 enum dml2_swizzle_mode sw_mode = display_cfg->plane_descriptors[plane_idx].surface.tiling; 12309 bool dual_plane = is_dual_plane((enum dml2_source_format_class)(source_format)); 12310 12311 unsigned int pixel_chunk_bytes = 0; 12312 unsigned int min_pixel_chunk_bytes = 0; 12313 unsigned int meta_chunk_bytes = 0; 12314 unsigned int min_meta_chunk_bytes = 0; 12315 unsigned int dpte_group_bytes = 0; 12316 unsigned int mpte_group_bytes = 0; 12317 12318 unsigned int p1_pixel_chunk_bytes = 0; 12319 unsigned int p1_min_pixel_chunk_bytes = 0; 12320 unsigned int p1_meta_chunk_bytes = 0; 12321 unsigned int p1_min_meta_chunk_bytes = 0; 12322 unsigned int p1_dpte_group_bytes = 0; 12323 unsigned int p1_mpte_group_bytes = 0; 12324 12325 unsigned int detile_buf_plane1_addr = 0; 12326 unsigned int detile_buf_size_in_bytes; 12327 double stored_swath_l_bytes; 12328 double stored_swath_c_bytes; 12329 bool is_phantom_pipe; 12330 12331 DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] start\n", __func__, pipe_idx); 12332 12333 pixel_chunk_bytes = (unsigned int)(mode_lib->ip.pixel_chunk_size_kbytes * 1024); 12334 min_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.min_pixel_chunk_size_bytes); 12335 12336 if (pixel_chunk_bytes == 64 * 1024) 12337 min_pixel_chunk_bytes = 0; 12338 12339 dpte_group_bytes = (unsigned int)(dml_get_dpte_group_size_in_bytes(mode_lib, pipe_idx)); 12340 mpte_group_bytes = (unsigned int)(dml_get_vm_group_size_in_bytes(mode_lib, pipe_idx)); 12341 12342 meta_chunk_bytes = (unsigned int)(mode_lib->ip.meta_chunk_size_kbytes * 1024); 12343 min_meta_chunk_bytes = (unsigned int)(mode_lib->ip.min_meta_chunk_size_bytes); 12344 12345 p1_pixel_chunk_bytes = pixel_chunk_bytes; 12346 p1_min_pixel_chunk_bytes = min_pixel_chunk_bytes; 12347 p1_meta_chunk_bytes = meta_chunk_bytes; 12348 p1_min_meta_chunk_bytes = min_meta_chunk_bytes; 12349 p1_dpte_group_bytes = dpte_group_bytes; 12350 p1_mpte_group_bytes = mpte_group_bytes; 12351 12352 if (source_format == dml2_rgbe_alpha) 12353 p1_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.alpha_pixel_chunk_size_kbytes * 1024); 12354 12355 rq_regs->unbounded_request_enabled = dml_get_unbounded_request_enabled(mode_lib); 12356 rq_regs->rq_regs_l.chunk_size = log_and_substract_if_non_zero(pixel_chunk_bytes, 10); 12357 rq_regs->rq_regs_c.chunk_size = log_and_substract_if_non_zero(p1_pixel_chunk_bytes, 10); 12358 12359 if (min_pixel_chunk_bytes == 0) 12360 rq_regs->rq_regs_l.min_chunk_size = 0; 12361 else 12362 rq_regs->rq_regs_l.min_chunk_size = log_and_substract_if_non_zero(min_pixel_chunk_bytes, 8 - 1); 12363 12364 if (p1_min_pixel_chunk_bytes == 0) 12365 rq_regs->rq_regs_c.min_chunk_size = 0; 12366 else 12367 rq_regs->rq_regs_c.min_chunk_size = log_and_substract_if_non_zero(p1_min_pixel_chunk_bytes, 8 - 1); 12368 12369 rq_regs->rq_regs_l.meta_chunk_size = log_and_substract_if_non_zero(meta_chunk_bytes, 10); 12370 rq_regs->rq_regs_c.meta_chunk_size = log_and_substract_if_non_zero(p1_meta_chunk_bytes, 10); 12371 12372 if (min_meta_chunk_bytes == 0) 12373 rq_regs->rq_regs_l.min_meta_chunk_size = 0; 12374 else 12375 rq_regs->rq_regs_l.min_meta_chunk_size = log_and_substract_if_non_zero(min_meta_chunk_bytes, 6 - 1); 12376 12377 if (min_meta_chunk_bytes == 0) 12378 rq_regs->rq_regs_c.min_meta_chunk_size = 0; 12379 else 12380 rq_regs->rq_regs_c.min_meta_chunk_size = log_and_substract_if_non_zero(p1_min_meta_chunk_bytes, 6 - 1); 12381 12382 rq_regs->rq_regs_l.dpte_group_size = log_and_substract_if_non_zero(dpte_group_bytes, 6); 12383 rq_regs->rq_regs_l.mpte_group_size = log_and_substract_if_non_zero(mpte_group_bytes, 6); 12384 rq_regs->rq_regs_c.dpte_group_size = log_and_substract_if_non_zero(p1_dpte_group_bytes, 6); 12385 rq_regs->rq_regs_c.mpte_group_size = log_and_substract_if_non_zero(p1_mpte_group_bytes, 6); 12386 12387 detile_buf_size_in_bytes = (unsigned int)(dml_get_det_buffer_size_kbytes(mode_lib, pipe_idx) * 1024); 12388 12389 if (sw_mode == dml2_sw_linear && display_cfg->gpuvm_enable) { 12390 unsigned int p0_pte_row_height_linear = (unsigned int)(dml_get_dpte_row_height_linear_l(mode_lib, pipe_idx)); 12391 #ifdef __DML_VBA_DEBUG__ 12392 DML_LOG_VERBOSE("DML_DLG: %s: p0_pte_row_height_linear = %u\n", __func__, p0_pte_row_height_linear); 12393 #endif 12394 DML_ASSERT(p0_pte_row_height_linear >= 8); 12395 12396 rq_regs->rq_regs_l.pte_row_height_linear = math_log2_approx(p0_pte_row_height_linear) - 3; 12397 if (dual_plane) { 12398 unsigned int p1_pte_row_height_linear = (unsigned int)(dml_get_dpte_row_height_linear_c(mode_lib, pipe_idx)); 12399 12400 #ifdef __DML_VBA_DEBUG__ 12401 DML_LOG_VERBOSE("DML_DLG: %s: p1_pte_row_height_linear = %u\n", __func__, p1_pte_row_height_linear); 12402 #endif 12403 if (sw_mode == dml2_sw_linear) { 12404 DML_ASSERT(p1_pte_row_height_linear >= 8); 12405 } 12406 rq_regs->rq_regs_c.pte_row_height_linear = math_log2_approx(p1_pte_row_height_linear) - 3; 12407 } 12408 } else { 12409 rq_regs->rq_regs_l.pte_row_height_linear = 0; 12410 rq_regs->rq_regs_c.pte_row_height_linear = 0; 12411 } 12412 12413 rq_regs->rq_regs_l.swath_height = log_and_substract_if_non_zero(dml_get_swath_height_l(mode_lib, pipe_idx), 0); 12414 rq_regs->rq_regs_c.swath_height = log_and_substract_if_non_zero(dml_get_swath_height_c(mode_lib, pipe_idx), 0); 12415 12416 // FIXME_DCN4, programming guide has dGPU condition 12417 if (pixel_chunk_bytes >= 32 * 1024 || (dual_plane && p1_pixel_chunk_bytes >= 32 * 1024)) { //32kb 12418 rq_regs->drq_expansion_mode = 0; 12419 } else { 12420 rq_regs->drq_expansion_mode = 2; 12421 } 12422 rq_regs->prq_expansion_mode = 1; 12423 rq_regs->crq_expansion_mode = 1; 12424 rq_regs->mrq_expansion_mode = 1; 12425 12426 stored_swath_l_bytes = dml_get_det_stored_buffer_size_l_bytes(mode_lib, pipe_idx); 12427 stored_swath_c_bytes = dml_get_det_stored_buffer_size_c_bytes(mode_lib, pipe_idx); 12428 is_phantom_pipe = dml_get_is_phantom_pipe(display_cfg, mode_lib, pipe_idx); 12429 12430 // Note: detile_buf_plane1_addr is in unit of 1KB 12431 if (dual_plane) { 12432 if (is_phantom_pipe) { 12433 detile_buf_plane1_addr = (unsigned int)((1024.0 * 1024.0) / 2.0 / 1024.0); // half to chroma 12434 } else { 12435 if (stored_swath_l_bytes / stored_swath_c_bytes <= 1.5) { 12436 detile_buf_plane1_addr = (unsigned int)(detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma 12437 #ifdef __DML_VBA_DEBUG__ 12438 DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d (1/2 to chroma)\n", __func__, detile_buf_plane1_addr); 12439 #endif 12440 } else { 12441 detile_buf_plane1_addr = (unsigned int)(dml_round_to_multiple((unsigned int)((2.0 * detile_buf_size_in_bytes) / 3.0), 1024, 0) / 1024.0); // 2/3 to luma 12442 #ifdef __DML_VBA_DEBUG__ 12443 DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d (1/3 chroma)\n", __func__, detile_buf_plane1_addr); 12444 #endif 12445 } 12446 } 12447 } 12448 rq_regs->plane1_base_address = detile_buf_plane1_addr; 12449 12450 #ifdef __DML_VBA_DEBUG__ 12451 DML_LOG_VERBOSE("DML_DLG: %s: is_phantom_pipe = %d\n", __func__, is_phantom_pipe); 12452 DML_LOG_VERBOSE("DML_DLG: %s: stored_swath_l_bytes = %f\n", __func__, stored_swath_l_bytes); 12453 DML_LOG_VERBOSE("DML_DLG: %s: stored_swath_c_bytes = %f\n", __func__, stored_swath_c_bytes); 12454 DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_size_in_bytes = %d\n", __func__, detile_buf_size_in_bytes); 12455 DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d\n", __func__, detile_buf_plane1_addr); 12456 DML_LOG_VERBOSE("DML_DLG: %s: plane1_base_address = %d\n", __func__, rq_regs->plane1_base_address); 12457 #endif 12458 //DML_LOG_VERBOSE_rq_regs_st(rq_regs); 12459 DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx); 12460 } 12461 12462 static void rq_dlg_get_dlg_reg( 12463 struct dml2_core_internal_scratch *s, 12464 struct dml2_display_dlg_regs *disp_dlg_regs, 12465 struct dml2_display_ttu_regs *disp_ttu_regs, 12466 const struct dml2_display_cfg *display_cfg, 12467 const struct dml2_core_internal_display_mode_lib *mode_lib, 12468 const unsigned int pipe_idx) 12469 { 12470 struct dml2_core_shared_rq_dlg_get_dlg_reg_locals *l = &s->rq_dlg_get_dlg_reg_locals; 12471 12472 memset(l, 0, sizeof(struct dml2_core_shared_rq_dlg_get_dlg_reg_locals)); 12473 12474 DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe_idx=%d\n", __func__, pipe_idx); 12475 12476 l->plane_idx = dml_get_plane_idx(mode_lib, pipe_idx); 12477 DML_ASSERT(l->plane_idx < DML2_MAX_PLANES); 12478 12479 l->source_format = dml2_444_8; 12480 l->odm_mode = dml2_odm_mode_bypass; 12481 l->dual_plane = false; 12482 l->htotal = 0; 12483 l->hactive = 0; 12484 l->hblank_end = 0; 12485 l->vblank_end = 0; 12486 l->interlaced = false; 12487 l->pclk_freq_in_mhz = 0.0; 12488 l->refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; 12489 l->ref_freq_to_pix_freq = 0.0; 12490 12491 if (l->plane_idx < DML2_MAX_PLANES) { 12492 12493 l->timing = &display_cfg->stream_descriptors[display_cfg->plane_descriptors[l->plane_idx].stream_index].timing; 12494 l->source_format = display_cfg->plane_descriptors[l->plane_idx].pixel_format; 12495 l->odm_mode = mode_lib->mp.ODMMode[l->plane_idx]; 12496 12497 l->dual_plane = is_dual_plane(l->source_format); 12498 12499 l->htotal = l->timing->h_total; 12500 l->hactive = l->timing->h_active; 12501 l->hblank_end = l->timing->h_blank_end; 12502 l->vblank_end = l->timing->v_blank_end; 12503 l->interlaced = l->timing->interlaced; 12504 l->pclk_freq_in_mhz = (double)l->timing->pixel_clock_khz / 1000; 12505 l->ref_freq_to_pix_freq = l->refclk_freq_in_mhz / l->pclk_freq_in_mhz; 12506 12507 DML_LOG_VERBOSE("DML_DLG::%s: plane_idx = %d\n", __func__, l->plane_idx); 12508 DML_LOG_VERBOSE("DML_DLG: %s: htotal = %d\n", __func__, l->htotal); 12509 DML_LOG_VERBOSE("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, l->refclk_freq_in_mhz); 12510 DML_LOG_VERBOSE("DML_DLG: %s: dlg_ref_clk_mhz = %3.2f\n", __func__, display_cfg->overrides.hw.dlg_ref_clk_mhz); 12511 DML_LOG_VERBOSE("DML_DLG: %s: soc.refclk_mhz = %d\n", __func__, mode_lib->soc.dchub_refclk_mhz); 12512 DML_LOG_VERBOSE("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, l->pclk_freq_in_mhz); 12513 DML_LOG_VERBOSE("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq); 12514 DML_LOG_VERBOSE("DML_DLG: %s: interlaced = %d\n", __func__, l->interlaced); 12515 12516 DML_ASSERT(l->refclk_freq_in_mhz != 0); 12517 DML_ASSERT(l->pclk_freq_in_mhz != 0); 12518 DML_ASSERT(l->ref_freq_to_pix_freq < 4.0); 12519 12520 // Need to figure out which side of odm combine we're in 12521 // Assume the pipe instance under the same plane is in order 12522 12523 if (l->odm_mode == dml2_odm_mode_bypass) { 12524 disp_dlg_regs->refcyc_h_blank_end = (unsigned int)((double)l->hblank_end * l->ref_freq_to_pix_freq); 12525 } else if (l->odm_mode == dml2_odm_mode_combine_2to1 || l->odm_mode == dml2_odm_mode_combine_3to1 || l->odm_mode == dml2_odm_mode_combine_4to1) { 12526 // find out how many pipe are in this plane 12527 l->num_active_pipes = mode_lib->mp.num_active_pipes; 12528 l->first_pipe_idx_in_plane = DML2_MAX_PLANES; 12529 l->pipe_idx_in_combine = 0; // pipe index within the plane 12530 l->odm_combine_factor = 2; 12531 12532 if (l->odm_mode == dml2_odm_mode_combine_3to1) 12533 l->odm_combine_factor = 3; 12534 else if (l->odm_mode == dml2_odm_mode_combine_4to1) 12535 l->odm_combine_factor = 4; 12536 12537 for (unsigned int i = 0; i < l->num_active_pipes; i++) { 12538 if (dml_get_plane_idx(mode_lib, i) == l->plane_idx) { 12539 if (i < l->first_pipe_idx_in_plane) { 12540 l->first_pipe_idx_in_plane = i; 12541 } 12542 } 12543 } 12544 l->pipe_idx_in_combine = pipe_idx - l->first_pipe_idx_in_plane; // DML assumes the pipes in the same plane will have continuous indexing (i.e. plane 0 use pipe 0, 1, and plane 1 uses pipe 2, 3, etc.) 12545 12546 disp_dlg_regs->refcyc_h_blank_end = (unsigned int)(((double)l->hblank_end + (double)l->pipe_idx_in_combine * (double)l->hactive / (double)l->odm_combine_factor) * l->ref_freq_to_pix_freq); 12547 DML_LOG_VERBOSE("DML_DLG: %s: pipe_idx = %d\n", __func__, pipe_idx); 12548 DML_LOG_VERBOSE("DML_DLG: %s: first_pipe_idx_in_plane = %d\n", __func__, l->first_pipe_idx_in_plane); 12549 DML_LOG_VERBOSE("DML_DLG: %s: pipe_idx_in_combine = %d\n", __func__, l->pipe_idx_in_combine); 12550 DML_LOG_VERBOSE("DML_DLG: %s: odm_combine_factor = %d\n", __func__, l->odm_combine_factor); 12551 } 12552 DML_LOG_VERBOSE("DML_DLG: %s: refcyc_h_blank_end = %d\n", __func__, disp_dlg_regs->refcyc_h_blank_end); 12553 12554 DML_ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)math_pow(2, 13)); 12555 12556 disp_dlg_regs->ref_freq_to_pix_freq = (unsigned int)(l->ref_freq_to_pix_freq * math_pow(2, 19)); 12557 disp_dlg_regs->refcyc_per_htotal = (unsigned int)(l->ref_freq_to_pix_freq * (double)l->htotal * math_pow(2, 8)); 12558 disp_dlg_regs->dlg_vblank_end = l->interlaced ? (l->vblank_end / 2) : l->vblank_end; // 15 bits 12559 12560 l->min_ttu_vblank = mode_lib->mp.MinTTUVBlank[mode_lib->mp.pipe_plane[pipe_idx]]; 12561 l->min_dst_y_next_start = (unsigned int)(mode_lib->mp.MIN_DST_Y_NEXT_START[mode_lib->mp.pipe_plane[pipe_idx]]); 12562 12563 DML_LOG_VERBOSE("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, l->min_ttu_vblank); 12564 DML_LOG_VERBOSE("DML_DLG: %s: min_dst_y_next_start = %d\n", __func__, l->min_dst_y_next_start); 12565 DML_LOG_VERBOSE("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq); 12566 12567 l->vready_after_vcount0 = (unsigned int)(mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[mode_lib->mp.pipe_plane[pipe_idx]]); 12568 disp_dlg_regs->vready_after_vcount0 = l->vready_after_vcount0; 12569 12570 DML_LOG_VERBOSE("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, disp_dlg_regs->vready_after_vcount0); 12571 12572 l->dst_x_after_scaler = (unsigned int)(mode_lib->mp.DSTXAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]); 12573 l->dst_y_after_scaler = (unsigned int)(mode_lib->mp.DSTYAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]); 12574 12575 DML_LOG_VERBOSE("DML_DLG: %s: dst_x_after_scaler = %d\n", __func__, l->dst_x_after_scaler); 12576 DML_LOG_VERBOSE("DML_DLG: %s: dst_y_after_scaler = %d\n", __func__, l->dst_y_after_scaler); 12577 12578 l->dst_y_prefetch = mode_lib->mp.dst_y_prefetch[mode_lib->mp.pipe_plane[pipe_idx]]; 12579 l->dst_y_per_vm_vblank = mode_lib->mp.dst_y_per_vm_vblank[mode_lib->mp.pipe_plane[pipe_idx]]; 12580 l->dst_y_per_row_vblank = mode_lib->mp.dst_y_per_row_vblank[mode_lib->mp.pipe_plane[pipe_idx]]; 12581 l->dst_y_per_vm_flip = mode_lib->mp.dst_y_per_vm_flip[mode_lib->mp.pipe_plane[pipe_idx]]; 12582 l->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip[mode_lib->mp.pipe_plane[pipe_idx]]; 12583 12584 DML_LOG_VERBOSE("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, l->dst_y_prefetch); 12585 DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, l->dst_y_per_vm_flip); 12586 DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, l->dst_y_per_row_flip); 12587 DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, l->dst_y_per_vm_vblank); 12588 DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, l->dst_y_per_row_vblank); 12589 12590 if (l->dst_y_prefetch > 0 && l->dst_y_per_vm_vblank > 0 && l->dst_y_per_row_vblank > 0) { 12591 DML_ASSERT(l->dst_y_prefetch > (l->dst_y_per_vm_vblank + l->dst_y_per_row_vblank)); 12592 } 12593 12594 l->vratio_pre_l = mode_lib->mp.VRatioPrefetchY[mode_lib->mp.pipe_plane[pipe_idx]]; 12595 l->vratio_pre_c = mode_lib->mp.VRatioPrefetchC[mode_lib->mp.pipe_plane[pipe_idx]]; 12596 12597 DML_LOG_VERBOSE("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, l->vratio_pre_l); 12598 DML_LOG_VERBOSE("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, l->vratio_pre_c); 12599 12600 // Active 12601 l->refcyc_per_line_delivery_pre_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; 12602 l->refcyc_per_line_delivery_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; 12603 12604 DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_l); 12605 DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_l); 12606 12607 l->refcyc_per_line_delivery_pre_c = 0.0; 12608 l->refcyc_per_line_delivery_c = 0.0; 12609 12610 if (l->dual_plane) { 12611 l->refcyc_per_line_delivery_pre_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; 12612 l->refcyc_per_line_delivery_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; 12613 12614 DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_c); 12615 DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_c); 12616 } 12617 12618 disp_dlg_regs->refcyc_per_vm_dmdata = (unsigned int)(mode_lib->mp.Tdmdl_vm[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); 12619 disp_dlg_regs->dmdata_dl_delta = (unsigned int)(mode_lib->mp.Tdmdl[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); 12620 12621 l->refcyc_per_req_delivery_pre_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; 12622 l->refcyc_per_req_delivery_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; 12623 12624 DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_l); 12625 DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_l); 12626 12627 l->refcyc_per_req_delivery_pre_c = 0.0; 12628 l->refcyc_per_req_delivery_c = 0.0; 12629 if (l->dual_plane) { 12630 l->refcyc_per_req_delivery_pre_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; 12631 l->refcyc_per_req_delivery_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; 12632 12633 DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_c); 12634 DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_c); 12635 } 12636 12637 // TTU - Cursor 12638 DML_ASSERT(display_cfg->plane_descriptors[l->plane_idx].cursor.num_cursors <= 1); 12639 12640 // Assign to register structures 12641 disp_dlg_regs->min_dst_y_next_start = (unsigned int)((double)l->min_dst_y_next_start * math_pow(2, 2)); 12642 DML_ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)math_pow(2, 18)); 12643 12644 disp_dlg_regs->dst_y_after_scaler = l->dst_y_after_scaler; // in terms of line 12645 disp_dlg_regs->refcyc_x_after_scaler = (unsigned int)((double)l->dst_x_after_scaler * l->ref_freq_to_pix_freq); // in terms of refclk 12646 disp_dlg_regs->dst_y_prefetch = (unsigned int)(l->dst_y_prefetch * math_pow(2, 2)); 12647 disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int)(l->dst_y_per_vm_vblank * math_pow(2, 2)); 12648 disp_dlg_regs->dst_y_per_row_vblank = (unsigned int)(l->dst_y_per_row_vblank * math_pow(2, 2)); 12649 disp_dlg_regs->dst_y_per_vm_flip = (unsigned int)(l->dst_y_per_vm_flip * math_pow(2, 2)); 12650 disp_dlg_regs->dst_y_per_row_flip = (unsigned int)(l->dst_y_per_row_flip * math_pow(2, 2)); 12651 12652 disp_dlg_regs->vratio_prefetch = (unsigned int)(l->vratio_pre_l * math_pow(2, 19)); 12653 disp_dlg_regs->vratio_prefetch_c = (unsigned int)(l->vratio_pre_c * math_pow(2, 19)); 12654 12655 DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank); 12656 DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank); 12657 DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip); 12658 DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip); 12659 12660 disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(mode_lib->mp.TimePerVMGroupVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); 12661 disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(mode_lib->mp.TimePerVMGroupFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); 12662 disp_dlg_regs->refcyc_per_vm_req_vblank = (unsigned int)(mode_lib->mp.TimePerVMRequestVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz * math_pow(2, 10)); 12663 disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(mode_lib->mp.TimePerVMRequestFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz * math_pow(2, 10)); 12664 12665 l->dst_y_per_pte_row_nom_l = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L[mode_lib->mp.pipe_plane[pipe_idx]]; 12666 l->dst_y_per_pte_row_nom_c = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C[mode_lib->mp.pipe_plane[pipe_idx]]; 12667 l->refcyc_per_pte_group_nom_l = mode_lib->mp.time_per_pte_group_nom_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; 12668 l->refcyc_per_pte_group_nom_c = mode_lib->mp.time_per_pte_group_nom_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; 12669 l->refcyc_per_pte_group_vblank_l = mode_lib->mp.time_per_pte_group_vblank_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; 12670 l->refcyc_per_pte_group_vblank_c = mode_lib->mp.time_per_pte_group_vblank_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; 12671 l->refcyc_per_pte_group_flip_l = mode_lib->mp.time_per_pte_group_flip_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; 12672 l->refcyc_per_pte_group_flip_c = mode_lib->mp.time_per_pte_group_flip_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; 12673 l->refcyc_per_tdlut_group = mode_lib->mp.time_per_tdlut_group[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; 12674 12675 disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int)(l->dst_y_per_pte_row_nom_l * math_pow(2, 2)); 12676 disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int)(l->dst_y_per_pte_row_nom_c * math_pow(2, 2)); 12677 12678 disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int)(l->refcyc_per_pte_group_nom_l); 12679 disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(l->refcyc_per_pte_group_nom_c); 12680 disp_dlg_regs->refcyc_per_pte_group_vblank_l = (unsigned int)(l->refcyc_per_pte_group_vblank_l); 12681 disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int)(l->refcyc_per_pte_group_vblank_c); 12682 disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int)(l->refcyc_per_pte_group_flip_l); 12683 disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int)(l->refcyc_per_pte_group_flip_c); 12684 disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int)math_floor2(l->refcyc_per_line_delivery_pre_l, 1); 12685 disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int)math_floor2(l->refcyc_per_line_delivery_l, 1); 12686 disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int)math_floor2(l->refcyc_per_line_delivery_pre_c, 1); 12687 disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int)math_floor2(l->refcyc_per_line_delivery_c, 1); 12688 12689 l->dst_y_per_meta_row_nom_l = mode_lib->mp.DST_Y_PER_META_ROW_NOM_L[mode_lib->mp.pipe_plane[pipe_idx]]; 12690 l->dst_y_per_meta_row_nom_c = mode_lib->mp.DST_Y_PER_META_ROW_NOM_C[mode_lib->mp.pipe_plane[pipe_idx]]; 12691 l->refcyc_per_meta_chunk_nom_l = mode_lib->mp.TimePerMetaChunkNominal[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; 12692 l->refcyc_per_meta_chunk_nom_c = mode_lib->mp.TimePerChromaMetaChunkNominal[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; 12693 l->refcyc_per_meta_chunk_vblank_l = mode_lib->mp.TimePerMetaChunkVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; 12694 l->refcyc_per_meta_chunk_vblank_c = mode_lib->mp.TimePerChromaMetaChunkVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; 12695 l->refcyc_per_meta_chunk_flip_l = mode_lib->mp.TimePerMetaChunkFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; 12696 l->refcyc_per_meta_chunk_flip_c = mode_lib->mp.TimePerChromaMetaChunkFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; 12697 12698 disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int)(l->dst_y_per_meta_row_nom_l * math_pow(2, 2)); 12699 disp_dlg_regs->dst_y_per_meta_row_nom_c = (unsigned int)(l->dst_y_per_meta_row_nom_c * math_pow(2, 2)); 12700 disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int)(l->refcyc_per_meta_chunk_nom_l); 12701 disp_dlg_regs->refcyc_per_meta_chunk_nom_c = (unsigned int)(l->refcyc_per_meta_chunk_nom_c); 12702 disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = (unsigned int)(l->refcyc_per_meta_chunk_vblank_l); 12703 disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = (unsigned int)(l->refcyc_per_meta_chunk_vblank_c); 12704 disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int)(l->refcyc_per_meta_chunk_flip_l); 12705 disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int)(l->refcyc_per_meta_chunk_flip_c); 12706 12707 disp_dlg_regs->refcyc_per_tdlut_group = (unsigned int)(l->refcyc_per_tdlut_group); 12708 disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off 12709 12710 disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int)(l->refcyc_per_req_delivery_pre_l * math_pow(2, 10)); 12711 disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int)(l->refcyc_per_req_delivery_l * math_pow(2, 10)); 12712 disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int)(l->refcyc_per_req_delivery_pre_c * math_pow(2, 10)); 12713 disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int)(l->refcyc_per_req_delivery_c * math_pow(2, 10)); 12714 disp_ttu_regs->qos_level_low_wm = 0; 12715 12716 disp_ttu_regs->qos_level_high_wm = (unsigned int)(4.0 * (double)l->htotal * l->ref_freq_to_pix_freq); 12717 12718 disp_ttu_regs->qos_level_flip = 14; 12719 disp_ttu_regs->qos_level_fixed_l = 8; 12720 disp_ttu_regs->qos_level_fixed_c = 8; 12721 disp_ttu_regs->qos_ramp_disable_l = 0; 12722 disp_ttu_regs->qos_ramp_disable_c = 0; 12723 disp_ttu_regs->min_ttu_vblank = (unsigned int)(l->min_ttu_vblank * l->refclk_freq_in_mhz); 12724 12725 // CHECK for HW registers' range, DML_ASSERT or clamp 12726 DML_ASSERT(l->refcyc_per_req_delivery_pre_l < math_pow(2, 13)); 12727 DML_ASSERT(l->refcyc_per_req_delivery_l < math_pow(2, 13)); 12728 DML_ASSERT(l->refcyc_per_req_delivery_pre_c < math_pow(2, 13)); 12729 DML_ASSERT(l->refcyc_per_req_delivery_c < math_pow(2, 13)); 12730 if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int)math_pow(2, 23)) 12731 disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(math_pow(2, 23) - 1); 12732 12733 if (disp_dlg_regs->refcyc_per_vm_group_flip >= (unsigned int)math_pow(2, 23)) 12734 disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(math_pow(2, 23) - 1); 12735 12736 if (disp_dlg_regs->refcyc_per_vm_req_vblank >= (unsigned int)math_pow(2, 23)) 12737 disp_dlg_regs->refcyc_per_vm_req_vblank = (unsigned int)(math_pow(2, 23) - 1); 12738 12739 if (disp_dlg_regs->refcyc_per_vm_req_flip >= (unsigned int)math_pow(2, 23)) 12740 disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(math_pow(2, 23) - 1); 12741 12742 12743 DML_ASSERT(disp_dlg_regs->dst_y_after_scaler < 8U); 12744 DML_ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)math_pow(2, 13)); 12745 12746 if (disp_dlg_regs->dst_y_per_pte_row_nom_l >= (unsigned int)math_pow(2, 17)) { 12747 DML_LOG_VERBOSE("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_L %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_l, (unsigned int)math_pow(2, 17) - 1); 12748 l->dst_y_per_pte_row_nom_l = (unsigned int)math_pow(2, 17) - 1; 12749 } 12750 if (l->dual_plane) { 12751 if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int)math_pow(2, 17)) { 12752 DML_LOG_VERBOSE("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_C %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_c, (unsigned int)math_pow(2, 17) - 1); 12753 l->dst_y_per_pte_row_nom_c = (unsigned int)math_pow(2, 17) - 1; 12754 } 12755 } 12756 12757 if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int)math_pow(2, 23)) 12758 disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int)(math_pow(2, 23) - 1); 12759 if (l->dual_plane) { 12760 if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int)math_pow(2, 23)) 12761 disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(math_pow(2, 23) - 1); 12762 } 12763 DML_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)math_pow(2, 13)); 12764 if (l->dual_plane) { 12765 DML_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int)math_pow(2, 13)); 12766 } 12767 12768 DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)math_pow(2, 13)); 12769 DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)math_pow(2, 13)); 12770 DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)math_pow(2, 13)); 12771 DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)math_pow(2, 13)); 12772 DML_ASSERT(disp_ttu_regs->qos_level_low_wm < (unsigned int)math_pow(2, 14)); 12773 DML_ASSERT(disp_ttu_regs->qos_level_high_wm < (unsigned int)math_pow(2, 14)); 12774 DML_ASSERT(disp_ttu_regs->min_ttu_vblank < (unsigned int)math_pow(2, 24)); 12775 12776 DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx); 12777 12778 } 12779 } 12780 12781 static void rq_dlg_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *arb_param) 12782 { 12783 double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; 12784 12785 arb_param->max_req_outstanding = mode_lib->soc.max_outstanding_reqs; 12786 arb_param->min_req_outstanding = mode_lib->soc.max_outstanding_reqs; // turn off the sat level feature if this set to max 12787 arb_param->sdpif_request_rate_limit = (3 * mode_lib->ip.words_per_channel * mode_lib->soc.clk_table.dram_config.channel_count) / 4; 12788 arb_param->sdpif_request_rate_limit = arb_param->sdpif_request_rate_limit < 96 ? 96 : arb_param->sdpif_request_rate_limit; 12789 arb_param->sat_level_us = 60; 12790 arb_param->hvm_max_qos_commit_threshold = 0xf; 12791 arb_param->hvm_min_req_outstand_commit_threshold = 0xa; 12792 arb_param->compbuf_reserved_space_kbytes = dml_get_compbuf_reserved_space_64b(mode_lib) * 64 / 1024; 12793 arb_param->compbuf_size = mode_lib->mp.CompressedBufferSizeInkByte / mode_lib->ip.compressed_buffer_segment_size_in_kbytes; 12794 arb_param->allow_sdpif_rate_limit_when_cstate_req = dml_get_hw_debug5(mode_lib); 12795 arb_param->dcfclk_deep_sleep_hysteresis = dml_get_dcfclk_deep_sleep_hysteresis(mode_lib); 12796 arb_param->pstate_stall_threshold = (unsigned int)(mode_lib->ip_caps.fams2.max_allow_delay_us * refclk_freq_in_mhz); 12797 12798 #ifdef __DML_VBA_DEBUG__ 12799 DML_LOG_VERBOSE("DML::%s: max_req_outstanding = %d\n", __func__, arb_param->max_req_outstanding); 12800 DML_LOG_VERBOSE("DML::%s: sdpif_request_rate_limit = %d\n", __func__, arb_param->sdpif_request_rate_limit); 12801 DML_LOG_VERBOSE("DML::%s: compbuf_reserved_space_kbytes = %d\n", __func__, arb_param->compbuf_reserved_space_kbytes); 12802 DML_LOG_VERBOSE("DML::%s: allow_sdpif_rate_limit_when_cstate_req = %d\n", __func__, arb_param->allow_sdpif_rate_limit_when_cstate_req); 12803 DML_LOG_VERBOSE("DML::%s: dcfclk_deep_sleep_hysteresis = %d\n", __func__, arb_param->dcfclk_deep_sleep_hysteresis); 12804 #endif 12805 12806 } 12807 12808 void dml2_core_calcs_get_watermarks(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *out) 12809 { 12810 rq_dlg_get_wm_regs(display_cfg, mode_lib, out); 12811 } 12812 12813 void dml2_core_calcs_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *out) 12814 { 12815 rq_dlg_get_arb_params(display_cfg, mode_lib, out); 12816 } 12817 12818 void dml2_core_calcs_get_pipe_regs(const struct dml2_display_cfg *display_cfg, 12819 struct dml2_core_internal_display_mode_lib *mode_lib, 12820 struct dml2_dchub_per_pipe_register_set *out, int pipe_index) 12821 { 12822 rq_dlg_get_rq_reg(&out->rq_regs, display_cfg, mode_lib, pipe_index); 12823 rq_dlg_get_dlg_reg(&mode_lib->scratch, &out->dlg_regs, &out->ttu_regs, display_cfg, mode_lib, pipe_index); 12824 out->det_size = dml_get_det_buffer_size_kbytes(mode_lib, pipe_index) / mode_lib->ip.config_return_buffer_segment_size_in_kbytes; 12825 } 12826 12827 void dml2_core_calcs_get_global_sync_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, union dml2_global_sync_programming *out, int pipe_index) 12828 { 12829 out->dcn4x.vready_offset_pixels = dml_get_vready_offset(mode_lib, pipe_index); 12830 out->dcn4x.vstartup_lines = dml_get_vstartup_calculated(mode_lib, pipe_index); 12831 out->dcn4x.vupdate_offset_pixels = dml_get_vupdate_offset(mode_lib, pipe_index); 12832 out->dcn4x.vupdate_vupdate_width_pixels = dml_get_vupdate_width(mode_lib, pipe_index); 12833 out->dcn4x.pstate_keepout_start_lines = dml_get_pstate_keepout_dst_lines(mode_lib, pipe_index); 12834 } 12835 12836 void dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index) 12837 { 12838 dml2_core_calcs_get_global_sync_programming(mode_lib, &out->global_sync, pipe_index); 12839 } 12840 12841 void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, 12842 const struct display_configuation_with_meta *display_cfg, 12843 struct dmub_cmd_fams2_global_config *fams2_global_config) 12844 { 12845 fams2_global_config->features.bits.enable = display_cfg->stage3.fams2_required; 12846 12847 if (fams2_global_config->features.bits.enable) { 12848 fams2_global_config->features.bits.enable_stall_recovery = true; 12849 fams2_global_config->features.bits.allow_delay_check_mode = FAMS2_ALLOW_DELAY_CHECK_FROM_START; 12850 12851 fams2_global_config->max_allow_delay_us = mode_lib->ip_caps.fams2.max_allow_delay_us; 12852 fams2_global_config->lock_wait_time_us = mode_lib->ip_caps.fams2.lock_timeout_us; 12853 fams2_global_config->recovery_timeout_us = mode_lib->ip_caps.fams2.recovery_timeout_us; 12854 fams2_global_config->hwfq_flip_programming_delay_us = mode_lib->ip_caps.fams2.flip_programming_delay_us; 12855 12856 fams2_global_config->num_streams = display_cfg->display_config.num_streams; 12857 } 12858 } 12859 12860 void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, 12861 const struct display_configuation_with_meta *display_cfg, 12862 union dmub_cmd_fams2_config *fams2_base_programming, 12863 union dmub_cmd_fams2_config *fams2_sub_programming, 12864 enum dml2_pstate_method pstate_method, 12865 int plane_index) 12866 { 12867 const struct dml2_plane_parameters *plane_descriptor = &display_cfg->display_config.plane_descriptors[plane_index]; 12868 const struct dml2_stream_parameters *stream_descriptor = &display_cfg->display_config.stream_descriptors[plane_descriptor->stream_index]; 12869 const struct dml2_pstate_meta *stream_pstate_meta = &display_cfg->stage3.stream_pstate_meta[plane_descriptor->stream_index]; 12870 12871 struct dmub_fams2_cmd_stream_static_base_state *base_programming = &fams2_base_programming->stream_v1.base; 12872 union dmub_fams2_cmd_stream_static_sub_state *sub_programming = &fams2_sub_programming->stream_v1.sub_state; 12873 12874 unsigned int i; 12875 12876 if (display_cfg->display_config.overrides.all_streams_blanked) { 12877 /* stream is blanked, so do nothing */ 12878 return; 12879 } 12880 12881 /* from display configuration */ 12882 base_programming->htotal = (uint16_t)stream_descriptor->timing.h_total; 12883 base_programming->vtotal = (uint16_t)stream_descriptor->timing.v_total; 12884 base_programming->vblank_start = (uint16_t)(stream_pstate_meta->nom_vtotal - 12885 stream_descriptor->timing.v_front_porch); 12886 base_programming->vblank_end = (uint16_t)(stream_pstate_meta->nom_vtotal - 12887 stream_descriptor->timing.v_front_porch - 12888 stream_descriptor->timing.v_active); 12889 base_programming->config.bits.is_drr = stream_descriptor->timing.drr_config.enabled; 12890 12891 /* from meta */ 12892 base_programming->otg_vline_time_ns = 12893 (unsigned int)(stream_pstate_meta->otg_vline_time_us * 1000.0); 12894 base_programming->scheduling_delay_otg_vlines = (uint8_t)stream_pstate_meta->scheduling_delay_otg_vlines; 12895 base_programming->contention_delay_otg_vlines = (uint8_t)stream_pstate_meta->contention_delay_otg_vlines; 12896 base_programming->vline_int_ack_delay_otg_vlines = (uint8_t)stream_pstate_meta->vertical_interrupt_ack_delay_otg_vlines; 12897 base_programming->drr_keepout_otg_vline = (uint16_t)(stream_pstate_meta->nom_vtotal - 12898 stream_descriptor->timing.v_front_porch - 12899 stream_pstate_meta->method_drr.programming_delay_otg_vlines); 12900 base_programming->allow_to_target_delay_otg_vlines = (uint8_t)stream_pstate_meta->allow_to_target_delay_otg_vlines; 12901 base_programming->max_vtotal = (uint16_t)stream_pstate_meta->max_vtotal; 12902 12903 /* from core */ 12904 base_programming->config.bits.min_ttu_vblank_usable = true; 12905 for (i = 0; i < display_cfg->display_config.num_planes; i++) { 12906 /* check if all planes support p-state in blank */ 12907 if (display_cfg->display_config.plane_descriptors[i].stream_index == plane_descriptor->stream_index && 12908 mode_lib->mp.MinTTUVBlank[i] <= mode_lib->mp.Watermark.DRAMClockChangeWatermark) { 12909 base_programming->config.bits.min_ttu_vblank_usable = false; 12910 break; 12911 } 12912 } 12913 12914 switch (pstate_method) { 12915 case dml2_pstate_method_vactive: 12916 case dml2_pstate_method_fw_vactive_drr: 12917 /* legacy vactive */ 12918 base_programming->type = FAMS2_STREAM_TYPE_VACTIVE; 12919 sub_programming->legacy.vactive_det_fill_delay_otg_vlines = 12920 (uint8_t)stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines; 12921 base_programming->allow_start_otg_vline = 12922 (uint16_t)stream_pstate_meta->method_vactive.common.allow_start_otg_vline; 12923 base_programming->allow_end_otg_vline = 12924 (uint16_t)stream_pstate_meta->method_vactive.common.allow_end_otg_vline; 12925 base_programming->config.bits.clamp_vtotal_min = true; 12926 break; 12927 case dml2_pstate_method_vblank: 12928 case dml2_pstate_method_fw_vblank_drr: 12929 /* legacy vblank */ 12930 base_programming->type = FAMS2_STREAM_TYPE_VBLANK; 12931 base_programming->allow_start_otg_vline = 12932 (uint16_t)stream_pstate_meta->method_vblank.common.allow_start_otg_vline; 12933 base_programming->allow_end_otg_vline = 12934 (uint16_t)stream_pstate_meta->method_vblank.common.allow_end_otg_vline; 12935 base_programming->config.bits.clamp_vtotal_min = true; 12936 break; 12937 case dml2_pstate_method_fw_drr: 12938 /* drr */ 12939 base_programming->type = FAMS2_STREAM_TYPE_DRR; 12940 sub_programming->drr.programming_delay_otg_vlines = 12941 (uint8_t)stream_pstate_meta->method_drr.programming_delay_otg_vlines; 12942 sub_programming->drr.nom_stretched_vtotal = 12943 (uint16_t)stream_pstate_meta->method_drr.stretched_vtotal; 12944 base_programming->allow_start_otg_vline = 12945 (uint16_t)stream_pstate_meta->method_drr.common.allow_start_otg_vline; 12946 base_programming->allow_end_otg_vline = 12947 (uint16_t)stream_pstate_meta->method_drr.common.allow_end_otg_vline; 12948 /* drr only clamps to vtotal min for single display */ 12949 base_programming->config.bits.clamp_vtotal_min = display_cfg->display_config.num_streams == 1; 12950 sub_programming->drr.only_stretch_if_required = true; 12951 break; 12952 case dml2_pstate_method_fw_svp: 12953 case dml2_pstate_method_fw_svp_drr: 12954 /* subvp */ 12955 base_programming->type = FAMS2_STREAM_TYPE_SUBVP; 12956 sub_programming->subvp.vratio_numerator = 12957 (uint16_t)(plane_descriptor->composition.scaler_info.plane0.v_ratio * 1000.0); 12958 sub_programming->subvp.vratio_denominator = 1000; 12959 sub_programming->subvp.programming_delay_otg_vlines = 12960 (uint8_t)stream_pstate_meta->method_subvp.programming_delay_otg_vlines; 12961 sub_programming->subvp.prefetch_to_mall_otg_vlines = 12962 (uint8_t)stream_pstate_meta->method_subvp.prefetch_to_mall_delay_otg_vlines; 12963 sub_programming->subvp.phantom_vtotal = 12964 (uint16_t)stream_pstate_meta->method_subvp.phantom_vtotal; 12965 sub_programming->subvp.phantom_vactive = 12966 (uint16_t)stream_pstate_meta->method_subvp.phantom_vactive; 12967 sub_programming->subvp.config.bits.is_multi_planar = 12968 plane_descriptor->surface.plane1.height > 0; 12969 sub_programming->subvp.config.bits.is_yuv420 = 12970 plane_descriptor->pixel_format == dml2_420_8 || 12971 plane_descriptor->pixel_format == dml2_420_10 || 12972 plane_descriptor->pixel_format == dml2_420_12; 12973 12974 base_programming->allow_start_otg_vline = 12975 (uint16_t)stream_pstate_meta->method_subvp.common.allow_start_otg_vline; 12976 base_programming->allow_end_otg_vline = 12977 (uint16_t)stream_pstate_meta->method_subvp.common.allow_end_otg_vline; 12978 base_programming->config.bits.clamp_vtotal_min = true; 12979 break; 12980 case dml2_pstate_method_reserved_hw: 12981 case dml2_pstate_method_reserved_fw: 12982 case dml2_pstate_method_reserved_fw_drr_clamped: 12983 case dml2_pstate_method_reserved_fw_drr_var: 12984 case dml2_pstate_method_na: 12985 case dml2_pstate_method_count: 12986 default: 12987 /* this should never happen */ 12988 break; 12989 } 12990 } 12991 12992 void dml2_core_calcs_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_mcache_surface_allocation *out, int plane_idx) 12993 { 12994 unsigned int n; 12995 12996 out->num_mcaches_plane0 = dml_get_plane_num_mcaches_plane0(mode_lib, plane_idx); 12997 out->num_mcaches_plane1 = dml_get_plane_num_mcaches_plane1(mode_lib, plane_idx); 12998 out->shift_granularity.p0 = dml_get_plane_mcache_shift_granularity_plane0(mode_lib, plane_idx); 12999 out->shift_granularity.p1 = dml_get_plane_mcache_shift_granularity_plane1(mode_lib, plane_idx); 13000 13001 for (n = 0; n < out->num_mcaches_plane0; n++) 13002 out->mcache_x_offsets_plane0[n] = dml_get_plane_array_mcache_offsets_plane0(mode_lib, plane_idx, n); 13003 13004 for (n = 0; n < out->num_mcaches_plane1; n++) 13005 out->mcache_x_offsets_plane1[n] = dml_get_plane_array_mcache_offsets_plane1(mode_lib, plane_idx, n); 13006 13007 out->last_slice_sharing.mall_comb_mcache_p0 = dml_get_plane_mall_comb_mcache_l(mode_lib, plane_idx); 13008 out->last_slice_sharing.mall_comb_mcache_p1 = dml_get_plane_mall_comb_mcache_c(mode_lib, plane_idx); 13009 out->last_slice_sharing.plane0_plane1 = dml_get_plane_lc_comb_mcache(mode_lib, plane_idx); 13010 out->informative.meta_row_bytes_plane0 = dml_get_plane_mcache_row_bytes_plane0(mode_lib, plane_idx); 13011 out->informative.meta_row_bytes_plane1 = dml_get_plane_mcache_row_bytes_plane1(mode_lib, plane_idx); 13012 13013 out->valid = true; 13014 } 13015 13016 void dml2_core_calcs_get_mall_allocation(struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int *out, int pipe_index) 13017 { 13018 *out = dml_get_surface_size_in_mall_bytes(mode_lib, pipe_index); 13019 } 13020 13021 void dml2_core_calcs_get_plane_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_plane_support_info *out, int plane_idx) 13022 { 13023 out->mall_svp_size_requirement_ways = 0; 13024 13025 out->nominal_vblank_pstate_latency_hiding_us = 13026 (int)(display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_idx].stream_index].timing.h_total / 13027 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_idx].stream_index].timing.pixel_clock_khz / 1000) * mode_lib->ms.TWait[plane_idx]); 13028 13029 out->dram_change_latency_hiding_margin_in_active = (int)mode_lib->ms.VActiveLatencyHidingMargin[plane_idx]; 13030 13031 out->active_latency_hiding_us = (int)mode_lib->ms.VActiveLatencyHidingUs[plane_idx]; 13032 13033 out->vactive_det_fill_delay_us[dml2_pstate_type_uclk] = 13034 (unsigned int)math_ceil(mode_lib->ms.pstate_vactive_det_fill_delay_us[dml2_pstate_type_uclk][plane_idx]); 13035 } 13036 13037 void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index) 13038 { 13039 double phantom_processing_delay_pix; 13040 unsigned int phantom_processing_delay_lines; 13041 unsigned int phantom_min_v_active_lines; 13042 unsigned int phantom_v_active_lines; 13043 unsigned int phantom_v_startup_lines; 13044 unsigned int phantom_v_blank_lines; 13045 unsigned int main_v_blank_lines; 13046 unsigned int rem; 13047 13048 phantom_processing_delay_pix = (double)((mode_lib->ip.subvp_fw_processing_delay_us + mode_lib->ip.subvp_pstate_allow_width_us) * 13049 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.pixel_clock_khz / 1000)); 13050 phantom_processing_delay_lines = (unsigned int)(phantom_processing_delay_pix / (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total); 13051 dml2_core_div_rem(phantom_processing_delay_pix, 13052 display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total, 13053 &rem); 13054 if (rem) 13055 phantom_processing_delay_lines++; 13056 13057 phantom_v_startup_lines = dml_get_plane_max_vstartup_lines(mode_lib, plane_index); 13058 phantom_min_v_active_lines = (unsigned int)math_ceil((double)dml_get_plane_subviewport_lines_needed_in_mall(mode_lib, plane_index) / 13059 display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane0.v_ratio); 13060 phantom_v_active_lines = phantom_processing_delay_lines + phantom_min_v_active_lines + mode_lib->ip.subvp_swath_height_margin_lines; 13061 13062 // phantom_vblank = max(vbp(vstartup) + vactive + vfp(always 1) + vsync(can be 1), main_vblank) 13063 phantom_v_blank_lines = phantom_v_startup_lines + 1 + 1; 13064 main_v_blank_lines = display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.v_active; 13065 if (phantom_v_blank_lines > main_v_blank_lines) 13066 phantom_v_blank_lines = main_v_blank_lines; 13067 13068 out->phantom_v_active = phantom_v_active_lines; 13069 // phantom_vtotal = vactive + vblank 13070 out->phantom_v_total = phantom_v_active_lines + phantom_v_blank_lines; 13071 13072 out->phantom_min_v_active = phantom_min_v_active_lines; 13073 out->phantom_v_startup = phantom_v_startup_lines; 13074 13075 out->vblank_reserved_time_us = display_cfg->plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000; 13076 #if defined(__DML_VBA_DEBUG__) 13077 DML_LOG_VERBOSE("DML::%s: subvp_fw_processing_delay_us = %d\n", __func__, mode_lib->ip.subvp_fw_processing_delay_us); 13078 DML_LOG_VERBOSE("DML::%s: subvp_pstate_allow_width_us = %d\n", __func__, mode_lib->ip.subvp_pstate_allow_width_us); 13079 DML_LOG_VERBOSE("DML::%s: subvp_swath_height_margin_lines = %d\n", __func__, mode_lib->ip.subvp_swath_height_margin_lines); 13080 DML_LOG_VERBOSE("DML::%s: vblank_reserved_time_us = %u\n", __func__, out->vblank_reserved_time_us); 13081 #endif 13082 } 13083 13084 void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_cfg_programming *out) 13085 { 13086 unsigned int k, n; 13087 13088 out->informative.mode_support_info.ModeIsSupported = mode_lib->ms.support.ModeSupport; 13089 out->informative.mode_support_info.ImmediateFlipSupport = mode_lib->ms.support.ImmediateFlipSupport; 13090 out->informative.mode_support_info.WritebackLatencySupport = mode_lib->ms.support.WritebackLatencySupport; 13091 out->informative.mode_support_info.ScaleRatioAndTapsSupport = mode_lib->ms.support.ScaleRatioAndTapsSupport; 13092 out->informative.mode_support_info.SourceFormatPixelAndScanSupport = mode_lib->ms.support.SourceFormatPixelAndScanSupport; 13093 out->informative.mode_support_info.P2IWith420 = mode_lib->ms.support.P2IWith420; 13094 out->informative.mode_support_info.DSCOnlyIfNecessaryWithBPP = false; 13095 out->informative.mode_support_info.DSC422NativeNotSupported = mode_lib->ms.support.DSC422NativeNotSupported; 13096 out->informative.mode_support_info.LinkRateDoesNotMatchDPVersion = mode_lib->ms.support.LinkRateDoesNotMatchDPVersion; 13097 out->informative.mode_support_info.LinkRateForMultistreamNotIndicated = mode_lib->ms.support.LinkRateForMultistreamNotIndicated; 13098 out->informative.mode_support_info.BPPForMultistreamNotIndicated = mode_lib->ms.support.BPPForMultistreamNotIndicated; 13099 out->informative.mode_support_info.MultistreamWithHDMIOreDP = mode_lib->ms.support.MultistreamWithHDMIOreDP; 13100 out->informative.mode_support_info.MSOOrODMSplitWithNonDPLink = mode_lib->ms.support.MSOOrODMSplitWithNonDPLink; 13101 out->informative.mode_support_info.NotEnoughLanesForMSO = mode_lib->ms.support.NotEnoughLanesForMSO; 13102 out->informative.mode_support_info.NumberOfOTGSupport = mode_lib->ms.support.NumberOfOTGSupport; 13103 out->informative.mode_support_info.NumberOfHDMIFRLSupport = mode_lib->ms.support.NumberOfHDMIFRLSupport; 13104 out->informative.mode_support_info.NumberOfDP2p0Support = mode_lib->ms.support.NumberOfDP2p0Support; 13105 out->informative.mode_support_info.WritebackScaleRatioAndTapsSupport = mode_lib->ms.support.WritebackScaleRatioAndTapsSupport; 13106 out->informative.mode_support_info.CursorSupport = mode_lib->ms.support.CursorSupport; 13107 out->informative.mode_support_info.PitchSupport = mode_lib->ms.support.PitchSupport; 13108 out->informative.mode_support_info.ViewportExceedsSurface = mode_lib->ms.support.ViewportExceedsSurface; 13109 out->informative.mode_support_info.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = false; 13110 out->informative.mode_support_info.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe; 13111 out->informative.mode_support_info.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen; 13112 out->informative.mode_support_info.InvalidCombinationOfMALLUseForPState = mode_lib->ms.support.InvalidCombinationOfMALLUseForPState; 13113 out->informative.mode_support_info.ExceededMALLSize = mode_lib->ms.support.ExceededMALLSize; 13114 out->informative.mode_support_info.EnoughWritebackUnits = mode_lib->ms.support.EnoughWritebackUnits; 13115 out->informative.mode_support_info.temp_read_or_ppt_support = mode_lib->ms.support.global_temp_read_or_ppt_supported; 13116 out->informative.mode_support_info.g6_temp_read_support = mode_lib->ms.support.g6_temp_read_support; 13117 13118 out->informative.mode_support_info.ExceededMultistreamSlots = mode_lib->ms.support.ExceededMultistreamSlots; 13119 out->informative.mode_support_info.NotEnoughDSCUnits = mode_lib->ms.support.NotEnoughDSCUnits; 13120 out->informative.mode_support_info.NotEnoughDSCSlices = mode_lib->ms.support.NotEnoughDSCSlices; 13121 out->informative.mode_support_info.PixelsPerLinePerDSCUnitSupport = mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport; 13122 out->informative.mode_support_info.DSCCLKRequiredMoreThanSupported = mode_lib->ms.support.DSCCLKRequiredMoreThanSupported; 13123 out->informative.mode_support_info.DTBCLKRequiredMoreThanSupported = mode_lib->ms.support.DTBCLKRequiredMoreThanSupported; 13124 out->informative.mode_support_info.LinkCapacitySupport = mode_lib->ms.support.LinkCapacitySupport; 13125 13126 out->informative.mode_support_info.ROBSupport = mode_lib->ms.support.ROBSupport; 13127 out->informative.mode_support_info.OutstandingRequestsSupport = mode_lib->ms.support.OutstandingRequestsSupport; 13128 out->informative.mode_support_info.OutstandingRequestsUrgencyAvoidance = mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance; 13129 out->informative.mode_support_info.PTEBufferSizeNotExceeded = mode_lib->ms.support.PTEBufferSizeNotExceeded; 13130 out->informative.mode_support_info.DCCMetaBufferSizeNotExceeded = mode_lib->ms.support.DCCMetaBufferSizeNotExceeded; 13131 13132 out->informative.mode_support_info.TotalVerticalActiveBandwidthSupport = mode_lib->ms.support.AvgBandwidthSupport; 13133 out->informative.mode_support_info.VActiveBandwidthSupport = mode_lib->ms.support.UrgVactiveBandwidthSupport; 13134 out->informative.mode_support_info.USRRetrainingSupport = mode_lib->ms.support.USRRetrainingSupport; 13135 13136 out->informative.mode_support_info.PrefetchSupported = mode_lib->ms.support.PrefetchSupported; 13137 out->informative.mode_support_info.DynamicMetadataSupported = mode_lib->ms.support.DynamicMetadataSupported; 13138 out->informative.mode_support_info.VRatioInPrefetchSupported = mode_lib->ms.support.VRatioInPrefetchSupported; 13139 out->informative.mode_support_info.DISPCLK_DPPCLK_Support = mode_lib->ms.support.DISPCLK_DPPCLK_Support; 13140 out->informative.mode_support_info.TotalAvailablePipesSupport = mode_lib->ms.support.TotalAvailablePipesSupport; 13141 out->informative.mode_support_info.NumberOfTDLUT33cubeSupport = mode_lib->ms.support.NumberOfTDLUT33cubeSupport; 13142 out->informative.mode_support_info.ViewportSizeSupport = mode_lib->ms.support.ViewportSizeSupport; 13143 out->informative.mode_support_info.qos_bandwidth_support = mode_lib->ms.support.qos_bandwidth_support; 13144 out->informative.mode_support_info.dcfclk_support = mode_lib->ms.support.dcfclk_support; 13145 13146 for (k = 0; k < out->display_config.num_planes; k++) { 13147 13148 out->informative.mode_support_info.FCLKChangeSupport[k] = mode_lib->ms.support.FCLKChangeSupport[k]; 13149 out->informative.mode_support_info.MPCCombineEnable[k] = mode_lib->ms.support.MPCCombineEnable[k]; 13150 out->informative.mode_support_info.ODMMode[k] = mode_lib->ms.support.ODMMode[k]; 13151 out->informative.mode_support_info.DPPPerSurface[k] = mode_lib->ms.support.DPPPerSurface[k]; 13152 out->informative.mode_support_info.DSCEnabled[k] = mode_lib->ms.support.DSCEnabled[k]; 13153 out->informative.mode_support_info.FECEnabled[k] = mode_lib->ms.support.FECEnabled[k]; 13154 out->informative.mode_support_info.NumberOfDSCSlices[k] = mode_lib->ms.support.NumberOfDSCSlices[k]; 13155 out->informative.mode_support_info.OutputBpp[k] = mode_lib->ms.support.OutputBpp[k]; 13156 13157 if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_unknown) 13158 out->informative.mode_support_info.OutputType[k] = dml2_output_type_unknown; 13159 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_dp) 13160 out->informative.mode_support_info.OutputType[k] = dml2_output_type_dp; 13161 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_edp) 13162 out->informative.mode_support_info.OutputType[k] = dml2_output_type_edp; 13163 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_dp2p0) 13164 out->informative.mode_support_info.OutputType[k] = dml2_output_type_dp2p0; 13165 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_hdmi) 13166 out->informative.mode_support_info.OutputType[k] = dml2_output_type_hdmi; 13167 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_hdmifrl) 13168 out->informative.mode_support_info.OutputType[k] = dml2_output_type_hdmifrl; 13169 13170 if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_unknown) 13171 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_unknown; 13172 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr) 13173 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr; 13174 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr2) 13175 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr2; 13176 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr3) 13177 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr3; 13178 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr10) 13179 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr10; 13180 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr13p5) 13181 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr13p5; 13182 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr20) 13183 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr20; 13184 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_3x3) 13185 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_3x3; 13186 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_6x3) 13187 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_6x3; 13188 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_6x4) 13189 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_6x4; 13190 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_8x4) 13191 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_8x4; 13192 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_10x4) 13193 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_10x4; 13194 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_12x4) 13195 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_12x4; 13196 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_16x4) 13197 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_16x4; 13198 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_20x4) 13199 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_20x4; 13200 13201 out->informative.mode_support_info.AlignedYPitch[k] = mode_lib->ms.support.AlignedYPitch[k]; 13202 out->informative.mode_support_info.AlignedCPitch[k] = mode_lib->ms.support.AlignedCPitch[k]; 13203 } 13204 13205 out->informative.watermarks.urgent_us = dml_get_wm_urgent(mode_lib); 13206 out->informative.watermarks.writeback_urgent_us = dml_get_wm_writeback_urgent(mode_lib); 13207 out->informative.watermarks.writeback_pstate_us = dml_get_wm_writeback_dram_clock_change(mode_lib); 13208 out->informative.watermarks.writeback_fclk_pstate_us = dml_get_wm_writeback_fclk_change(mode_lib); 13209 13210 out->informative.watermarks.cstate_exit_us = dml_get_wm_stutter_exit(mode_lib); 13211 out->informative.watermarks.cstate_enter_plus_exit_us = dml_get_wm_stutter_enter_exit(mode_lib); 13212 out->informative.watermarks.z8_cstate_exit_us = dml_get_wm_z8_stutter_exit(mode_lib); 13213 out->informative.watermarks.z8_cstate_enter_plus_exit_us = dml_get_wm_z8_stutter_enter_exit(mode_lib); 13214 out->informative.watermarks.pstate_change_us = dml_get_wm_dram_clock_change(mode_lib); 13215 out->informative.watermarks.fclk_pstate_change_us = dml_get_wm_fclk_change(mode_lib); 13216 out->informative.watermarks.usr_retraining_us = dml_get_wm_usr_retraining(mode_lib); 13217 out->informative.watermarks.temp_read_or_ppt_watermark_us = dml_get_wm_temp_read_or_ppt(mode_lib); 13218 13219 out->informative.mall.total_surface_size_in_mall_bytes = 0; 13220 out->informative.dpp.total_num_dpps_required = 0; 13221 for (k = 0; k < out->display_config.num_planes; ++k) { 13222 out->informative.mall.total_surface_size_in_mall_bytes += mode_lib->mp.SurfaceSizeInTheMALL[k]; 13223 out->informative.dpp.total_num_dpps_required += mode_lib->mp.NoOfDPP[k]; 13224 } 13225 13226 out->informative.qos.min_return_latency_in_dcfclk = mode_lib->mp.min_return_latency_in_dcfclk; 13227 out->informative.qos.urgent_latency_us = dml_get_urgent_latency(mode_lib); 13228 13229 out->informative.qos.max_urgent_latency_us = dml_get_max_urgent_latency_us(mode_lib); 13230 out->informative.qos.avg_non_urgent_latency_us = dml_get_avg_non_urgent_latency_us(mode_lib); 13231 out->informative.qos.avg_urgent_latency_us = dml_get_avg_urgent_latency_us(mode_lib); 13232 13233 out->informative.qos.wm_memory_trip_us = dml_get_wm_memory_trip(mode_lib); 13234 out->informative.qos.meta_trip_memory_us = dml_get_meta_trip_memory_us(mode_lib); 13235 out->informative.qos.fraction_of_urgent_bandwidth = dml_get_fraction_of_urgent_bandwidth(mode_lib); 13236 out->informative.qos.fraction_of_urgent_bandwidth_immediate_flip = dml_get_fraction_of_urgent_bandwidth_imm_flip(mode_lib); 13237 out->informative.qos.fraction_of_urgent_bandwidth_mall = dml_get_fraction_of_urgent_bandwidth_mall(mode_lib); 13238 13239 out->informative.qos.avg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_avg_bw_required_sdp(mode_lib); 13240 out->informative.qos.avg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_avg_bw_required_dram(mode_lib); 13241 out->informative.qos.avg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_avg_bw_required_sdp(mode_lib); 13242 out->informative.qos.avg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_avg_bw_required_dram(mode_lib); 13243 13244 out->informative.qos.avg_bw_available.sys_active.sdp_bw_mbps = dml_get_sys_active_avg_bw_available_sdp(mode_lib); 13245 out->informative.qos.avg_bw_available.sys_active.dram_bw_mbps = dml_get_sys_active_avg_bw_available_dram(mode_lib); 13246 out->informative.qos.avg_bw_available.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_avg_bw_available_sdp(mode_lib); 13247 out->informative.qos.avg_bw_available.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_avg_bw_available_dram(mode_lib); 13248 13249 out->informative.qos.urg_bw_available.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_available_sdp(mode_lib); 13250 out->informative.qos.urg_bw_available.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_available_dram(mode_lib); 13251 out->informative.qos.urg_bw_available.sys_active.dram_vm_only_bw_mbps = dml_get_sys_active_urg_bw_available_dram_vm_only(mode_lib); 13252 13253 out->informative.qos.urg_bw_available.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_available_sdp(mode_lib); 13254 out->informative.qos.urg_bw_available.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_available_dram(mode_lib); 13255 out->informative.qos.urg_bw_available.svp_prefetch.dram_vm_only_bw_mbps = dml_get_svp_prefetch_urg_bw_available_dram_vm_only(mode_lib); 13256 13257 out->informative.qos.urg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_required_sdp(mode_lib); 13258 out->informative.qos.urg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_required_dram(mode_lib); 13259 out->informative.qos.urg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_required_sdp(mode_lib); 13260 out->informative.qos.urg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_required_dram(mode_lib); 13261 13262 out->informative.qos.non_urg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_non_urg_required_sdp(mode_lib); 13263 out->informative.qos.non_urg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_non_urg_required_dram(mode_lib); 13264 out->informative.qos.non_urg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_sdp(mode_lib); 13265 out->informative.qos.non_urg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_dram(mode_lib); 13266 13267 out->informative.qos.urg_bw_required_with_flip.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_required_sdp_flip(mode_lib); 13268 out->informative.qos.urg_bw_required_with_flip.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_required_dram_flip(mode_lib); 13269 out->informative.qos.urg_bw_required_with_flip.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_required_sdp_flip(mode_lib); 13270 out->informative.qos.urg_bw_required_with_flip.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_required_dram_flip(mode_lib); 13271 13272 out->informative.qos.non_urg_bw_required_with_flip.sys_active.sdp_bw_mbps = dml_get_sys_active_non_urg_required_sdp_flip(mode_lib); 13273 out->informative.qos.non_urg_bw_required_with_flip.sys_active.dram_bw_mbps = dml_get_sys_active_non_urg_required_dram_flip(mode_lib); 13274 out->informative.qos.non_urg_bw_required_with_flip.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_sdp_flip(mode_lib); 13275 out->informative.qos.non_urg_bw_required_with_flip.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_dram_flip(mode_lib); 13276 13277 out->informative.crb.comp_buffer_size_kbytes = dml_get_comp_buffer_size_kbytes(mode_lib); 13278 out->informative.crb.UnboundedRequestEnabled = dml_get_unbounded_request_enabled(mode_lib); 13279 13280 out->informative.crb.compbuf_reserved_space_64b = dml_get_compbuf_reserved_space_64b(mode_lib); 13281 out->informative.misc.hw_debug5 = dml_get_hw_debug5(mode_lib); 13282 out->informative.misc.dcfclk_deep_sleep_hysteresis = dml_get_dcfclk_deep_sleep_hysteresis(mode_lib); 13283 13284 out->informative.power_management.stutter_efficiency = dml_get_stutter_efficiency_no_vblank(mode_lib); 13285 out->informative.power_management.stutter_efficiency_with_vblank = dml_get_stutter_efficiency(mode_lib); 13286 out->informative.power_management.stutter_num_bursts = dml_get_stutter_num_bursts(mode_lib); 13287 13288 out->informative.power_management.z8.stutter_efficiency = dml_get_stutter_efficiency_no_vblank_z8(mode_lib); 13289 out->informative.power_management.z8.stutter_efficiency_with_vblank = dml_get_stutter_efficiency_z8(mode_lib); 13290 out->informative.power_management.z8.stutter_num_bursts = dml_get_stutter_num_bursts_z8(mode_lib); 13291 out->informative.power_management.z8.stutter_period = dml_get_stutter_period(mode_lib); 13292 13293 out->informative.power_management.z8.bestcase.stutter_efficiency = dml_get_stutter_efficiency_z8_bestcase(mode_lib); 13294 out->informative.power_management.z8.bestcase.stutter_num_bursts = dml_get_stutter_num_bursts_z8_bestcase(mode_lib); 13295 out->informative.power_management.z8.bestcase.stutter_period = dml_get_stutter_period_bestcase(mode_lib); 13296 13297 out->informative.misc.cstate_max_cap_mode = dml_get_cstate_max_cap_mode(mode_lib); 13298 13299 out->min_clocks.dcn4x.dpprefclk_khz = (unsigned int)dml_get_global_dppclk_khz(mode_lib); 13300 13301 out->informative.qos.max_active_fclk_change_latency_supported = dml_get_fclk_change_latency(mode_lib); 13302 13303 out->informative.misc.LowestPrefetchMargin = 10 * 1000 * 1000; 13304 13305 for (k = 0; k < out->display_config.num_planes; k++) { 13306 13307 if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us) 13308 && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.fclk_change_blackout_us) 13309 && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us)) 13310 out->informative.misc.PrefetchMode[k] = 0; 13311 else if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.fclk_change_blackout_us) 13312 && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us)) 13313 out->informative.misc.PrefetchMode[k] = 1; 13314 else if (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us) 13315 out->informative.misc.PrefetchMode[k] = 2; 13316 else 13317 out->informative.misc.PrefetchMode[k] = 3; 13318 13319 out->informative.misc.min_ttu_vblank_us[k] = mode_lib->mp.MinTTUVBlank[k]; 13320 out->informative.mall.subviewport_lines_needed_in_mall[k] = mode_lib->mp.SubViewportLinesNeededInMALL[k]; 13321 out->informative.crb.det_size_in_kbytes[k] = mode_lib->mp.DETBufferSizeInKByte[k]; 13322 out->informative.crb.DETBufferSizeY[k] = mode_lib->mp.DETBufferSizeY[k]; 13323 out->informative.misc.ImmediateFlipSupportedForPipe[k] = mode_lib->mp.ImmediateFlipSupportedForPipe[k]; 13324 out->informative.misc.UsesMALLForStaticScreen[k] = mode_lib->mp.is_using_mall_for_ss[k]; 13325 out->informative.plane_info[k].dpte_row_height_plane0 = mode_lib->mp.dpte_row_height[k]; 13326 out->informative.plane_info[k].dpte_row_height_plane1 = mode_lib->mp.dpte_row_height_chroma[k]; 13327 out->informative.plane_info[k].meta_row_height_plane0 = mode_lib->mp.meta_row_height[k]; 13328 out->informative.plane_info[k].meta_row_height_plane1 = mode_lib->mp.meta_row_height_chroma[k]; 13329 out->informative.dcc_control[k].max_uncompressed_block_plane0 = mode_lib->mp.DCCYMaxUncompressedBlock[k]; 13330 out->informative.dcc_control[k].max_compressed_block_plane0 = mode_lib->mp.DCCYMaxCompressedBlock[k]; 13331 out->informative.dcc_control[k].independent_block_plane0 = mode_lib->mp.DCCYIndependentBlock[k]; 13332 out->informative.dcc_control[k].max_uncompressed_block_plane1 = mode_lib->mp.DCCCMaxUncompressedBlock[k]; 13333 out->informative.dcc_control[k].max_compressed_block_plane1 = mode_lib->mp.DCCCMaxCompressedBlock[k]; 13334 out->informative.dcc_control[k].independent_block_plane1 = mode_lib->mp.DCCCIndependentBlock[k]; 13335 out->informative.misc.dst_x_after_scaler[k] = mode_lib->mp.DSTXAfterScaler[k]; 13336 out->informative.misc.dst_y_after_scaler[k] = mode_lib->mp.DSTYAfterScaler[k]; 13337 out->informative.misc.prefetch_source_lines_plane0[k] = mode_lib->mp.PrefetchSourceLinesY[k]; 13338 out->informative.misc.prefetch_source_lines_plane1[k] = mode_lib->mp.PrefetchSourceLinesC[k]; 13339 out->informative.misc.vready_at_or_after_vsync[k] = mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k]; 13340 out->informative.misc.min_dst_y_next_start[k] = mode_lib->mp.MIN_DST_Y_NEXT_START[k]; 13341 out->informative.plane_info[k].swath_width_plane0 = mode_lib->mp.SwathWidthY[k]; 13342 out->informative.plane_info[k].swath_height_plane0 = mode_lib->mp.SwathHeightY[k]; 13343 out->informative.plane_info[k].swath_height_plane1 = mode_lib->mp.SwathHeightC[k]; 13344 out->informative.misc.CursorDstXOffset[k] = mode_lib->mp.CursorDstXOffset[k]; 13345 out->informative.misc.CursorDstYOffset[k] = mode_lib->mp.CursorDstYOffset[k]; 13346 out->informative.misc.CursorChunkHDLAdjust[k] = mode_lib->mp.CursorChunkHDLAdjust[k]; 13347 out->informative.misc.dpte_group_bytes[k] = mode_lib->mp.dpte_group_bytes[k]; 13348 out->informative.misc.vm_group_bytes[k] = mode_lib->mp.vm_group_bytes[k]; 13349 out->informative.misc.DisplayPipeRequestDeliveryTimeLuma[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[k]; 13350 out->informative.misc.DisplayPipeRequestDeliveryTimeChroma[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[k]; 13351 out->informative.misc.DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[k]; 13352 out->informative.misc.DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[k]; 13353 out->informative.misc.TimePerVMGroupVBlank[k] = mode_lib->mp.TimePerVMGroupVBlank[k]; 13354 out->informative.misc.TimePerVMGroupFlip[k] = mode_lib->mp.TimePerVMGroupFlip[k]; 13355 out->informative.misc.TimePerVMRequestVBlank[k] = mode_lib->mp.TimePerVMRequestVBlank[k]; 13356 out->informative.misc.TimePerVMRequestFlip[k] = mode_lib->mp.TimePerVMRequestFlip[k]; 13357 out->informative.misc.Tdmdl_vm[k] = mode_lib->mp.Tdmdl_vm[k]; 13358 out->informative.misc.Tdmdl[k] = mode_lib->mp.Tdmdl[k]; 13359 out->informative.misc.VStartup[k] = mode_lib->mp.VStartup[k]; 13360 out->informative.misc.VUpdateOffsetPix[k] = mode_lib->mp.VUpdateOffsetPix[k]; 13361 out->informative.misc.VUpdateWidthPix[k] = mode_lib->mp.VUpdateWidthPix[k]; 13362 out->informative.misc.VReadyOffsetPix[k] = mode_lib->mp.VReadyOffsetPix[k]; 13363 13364 out->informative.misc.DST_Y_PER_PTE_ROW_NOM_L[k] = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L[k]; 13365 out->informative.misc.DST_Y_PER_PTE_ROW_NOM_C[k] = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C[k]; 13366 out->informative.misc.time_per_pte_group_nom_luma[k] = mode_lib->mp.time_per_pte_group_nom_luma[k]; 13367 out->informative.misc.time_per_pte_group_nom_chroma[k] = mode_lib->mp.time_per_pte_group_nom_chroma[k]; 13368 out->informative.misc.time_per_pte_group_vblank_luma[k] = mode_lib->mp.time_per_pte_group_vblank_luma[k]; 13369 out->informative.misc.time_per_pte_group_vblank_chroma[k] = mode_lib->mp.time_per_pte_group_vblank_chroma[k]; 13370 out->informative.misc.time_per_pte_group_flip_luma[k] = mode_lib->mp.time_per_pte_group_flip_luma[k]; 13371 out->informative.misc.time_per_pte_group_flip_chroma[k] = mode_lib->mp.time_per_pte_group_flip_chroma[k]; 13372 out->informative.misc.VRatioPrefetchY[k] = mode_lib->mp.VRatioPrefetchY[k]; 13373 out->informative.misc.VRatioPrefetchC[k] = mode_lib->mp.VRatioPrefetchC[k]; 13374 out->informative.misc.DestinationLinesForPrefetch[k] = mode_lib->mp.dst_y_prefetch[k]; 13375 out->informative.misc.DestinationLinesToRequestVMInVBlank[k] = mode_lib->mp.dst_y_per_vm_vblank[k]; 13376 out->informative.misc.DestinationLinesToRequestRowInVBlank[k] = mode_lib->mp.dst_y_per_row_vblank[k]; 13377 out->informative.misc.DestinationLinesToRequestVMInImmediateFlip[k] = mode_lib->mp.dst_y_per_vm_flip[k]; 13378 out->informative.misc.DestinationLinesToRequestRowInImmediateFlip[k] = mode_lib->mp.dst_y_per_row_flip[k]; 13379 out->informative.misc.DisplayPipeLineDeliveryTimeLuma[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[k]; 13380 out->informative.misc.DisplayPipeLineDeliveryTimeChroma[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[k]; 13381 out->informative.misc.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[k]; 13382 out->informative.misc.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[k]; 13383 13384 out->informative.misc.WritebackRequiredBandwidth = mode_lib->mp.TotalWRBandwidth / 1000.0; 13385 out->informative.misc.WritebackAllowDRAMClockChangeEndPosition[k] = mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k]; 13386 out->informative.misc.WritebackAllowFCLKChangeEndPosition[k] = mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k]; 13387 out->informative.misc.DSCCLK_calculated[k] = mode_lib->mp.DSCCLK[k]; 13388 out->informative.misc.BIGK_FRAGMENT_SIZE[k] = mode_lib->mp.BIGK_FRAGMENT_SIZE[k]; 13389 out->informative.misc.PTE_BUFFER_MODE[k] = mode_lib->mp.PTE_BUFFER_MODE[k]; 13390 out->informative.misc.DSCDelay[k] = mode_lib->mp.DSCDelay[k]; 13391 out->informative.misc.MaxActiveDRAMClockChangeLatencySupported[k] = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported[k]; 13392 13393 if (mode_lib->mp.impacted_prefetch_margin_us[k] < out->informative.misc.LowestPrefetchMargin) 13394 out->informative.misc.LowestPrefetchMargin = mode_lib->mp.impacted_prefetch_margin_us[k]; 13395 } 13396 13397 // For this DV informative layer, all pipes in the same planes will just use the same id 13398 // will have the optimization and helper layer later on 13399 // only work when we can have high "mcache" that fit everything without thrashing the cache 13400 for (k = 0; k < out->display_config.num_planes; k++) { 13401 out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane0 = dml_get_plane_num_mcaches_plane0(mode_lib, k); 13402 out->informative.non_optimized_mcache_allocation[k].informative.meta_row_bytes_plane0 = dml_get_plane_mcache_row_bytes_plane0(mode_lib, k); 13403 13404 for (n = 0; n < out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane0; n++) { 13405 out->informative.non_optimized_mcache_allocation[k].mcache_x_offsets_plane0[n] = dml_get_plane_array_mcache_offsets_plane0(mode_lib, k, n); 13406 out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane0[n] = k; 13407 } 13408 13409 out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane1 = dml_get_plane_num_mcaches_plane1(mode_lib, k); 13410 out->informative.non_optimized_mcache_allocation[k].informative.meta_row_bytes_plane1 = dml_get_plane_mcache_row_bytes_plane1(mode_lib, k); 13411 13412 for (n = 0; n < out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane1; n++) { 13413 out->informative.non_optimized_mcache_allocation[k].mcache_x_offsets_plane1[n] = dml_get_plane_array_mcache_offsets_plane1(mode_lib, k, n); 13414 out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane1[n] = k; 13415 } 13416 } 13417 out->informative.qos.max_non_urgent_latency_us = dml_get_max_non_urgent_latency_us(mode_lib); 13418 13419 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { 13420 if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 13421 / mode_lib->ms.support.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= out->informative.qos.max_non_urgent_latency_us) { 13422 out->informative.misc.ROBUrgencyAvoidance = true; 13423 } else { 13424 out->informative.misc.ROBUrgencyAvoidance = false; 13425 } 13426 } else { 13427 out->informative.misc.ROBUrgencyAvoidance = true; 13428 } 13429 } 13430