1 // SPDX-License-Identifier: MIT
2 //
3 // Copyright 2024 Advanced Micro Devices, Inc.
4
5
6 #include "dml2_internal_shared_types.h"
7 #include "dml2_core_shared.h"
8 #include "dml2_debug.h"
9 #include "lib_float_math.h"
10
dml2_core_shared_div_rem(double dividend,unsigned int divisor,unsigned int * remainder)11 double dml2_core_shared_div_rem(double dividend, unsigned int divisor, unsigned int *remainder)
12 {
13 *remainder = ((dividend / divisor) - (int)(dividend / divisor) > 0);
14 return dividend / divisor;
15
16 }
17
18 /*
19 * START OF STATIC HELPERS
20 * These static methods are baseline implemenations from DCN4. These should NEVER
21 * be modified when developing new DCNs. New DCN code should replace the static helpers
22 * using the function pointer pattern.
23 */
24
25 static void dml2_print_dml_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only);
26 static void get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg);
27 static unsigned int dml_round_to_multiple(unsigned int num, unsigned int multiple, bool up);
28 static unsigned int dml_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info);
29 static void dml_calc_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane);
30 static bool dml_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg);
31 static bool dml_get_is_phantom_pipe(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx);
32 static void CalculateMaxDETAndMinCompressedBufferSize(unsigned int ConfigReturnBufferSizeInKByte,
33 unsigned int ConfigReturnBufferSegmentSizeInKByte,
34 unsigned int ROBBufferSizeInKByte,
35 unsigned int MaxNumDPP,
36 unsigned int nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
37 unsigned int nomDETInKByteOverrideValue, // VBA_DELTA
38 bool is_mrq_present,
39
40 // Output
41 unsigned int *MaxTotalDETInKByte,
42 unsigned int *nomDETInKByte,
43 unsigned int *MinCompressedBufferSizeInKByte);
44 static void PixelClockAdjustmentForProgressiveToInterlaceUnit(const struct dml2_display_cfg *display_cfg, bool ptoi_supported, double *PixelClockBackEnd);
45 static unsigned int dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode);
46 static bool dml_is_vertical_rotation(enum dml2_rotation_angle Scan);
47 static int unsigned dml_get_gfx_version(enum dml2_swizzle_mode sw_mode);
48 static void CalculateBytePerPixelAndBlockSizes(enum dml2_source_format_class SourcePixelFormat,
49 enum dml2_swizzle_mode SurfaceTiling,
50 unsigned int pitch_y,
51 unsigned int pitch_c,
52
53 // Output
54 unsigned int *BytePerPixelY,
55 unsigned int *BytePerPixelC,
56 double *BytePerPixelDETY,
57 double *BytePerPixelDETC,
58 unsigned int *BlockHeight256BytesY,
59 unsigned int *BlockHeight256BytesC,
60 unsigned int *BlockWidth256BytesY,
61 unsigned int *BlockWidth256BytesC,
62 unsigned int *MacroTileHeightY,
63 unsigned int *MacroTileHeightC,
64 unsigned int *MacroTileWidthY,
65 unsigned int *MacroTileWidthC,
66 bool *surf_linear128_l,
67 bool *surf_linear128_c);
68 static void CalculateSinglePipeDPPCLKAndSCLThroughput(
69 double HRatio,
70 double HRatioChroma,
71 double VRatio,
72 double VRatioChroma,
73 double MaxDCHUBToPSCLThroughput,
74 double MaxPSCLToLBThroughput,
75 double PixelClock,
76 enum dml2_source_format_class SourcePixelFormat,
77 unsigned int HTaps,
78 unsigned int HTapsChroma,
79 unsigned int VTaps,
80 unsigned int VTapsChroma,
81
82 // Output
83 double *PSCL_THROUGHPUT,
84 double *PSCL_THROUGHPUT_CHROMA,
85 double *DPPCLKUsingSingleDPP);
86 static void CalculateSwathWidth(
87 const struct dml2_display_cfg *display_cfg,
88 bool ForceSingleDPP,
89 unsigned int NumberOfActiveSurfaces,
90 enum dml2_odm_mode ODMMode[],
91 unsigned int BytePerPixY[],
92 unsigned int BytePerPixC[],
93 unsigned int Read256BytesBlockHeightY[],
94 unsigned int Read256BytesBlockHeightC[],
95 unsigned int Read256BytesBlockWidthY[],
96 unsigned int Read256BytesBlockWidthC[],
97 bool surf_linear128_l[],
98 bool surf_linear128_c[],
99 unsigned int DPPPerSurface[],
100
101 // Output
102 unsigned int req_per_swath_ub_l[],
103 unsigned int req_per_swath_ub_c[],
104 unsigned int SwathWidthSingleDPPY[],
105 unsigned int SwathWidthSingleDPPC[],
106 unsigned int SwathWidthY[], // per-pipe
107 unsigned int SwathWidthC[], // per-pipe
108 unsigned int MaximumSwathHeightY[],
109 unsigned int MaximumSwathHeightC[],
110 unsigned int swath_width_luma_ub[], // per-pipe
111 unsigned int swath_width_chroma_ub[]); // per-pipe
112 static bool UnboundedRequest(bool unb_req_force_en, bool unb_req_force_val, unsigned int TotalNumberOfActiveDPP, bool NoChromaOrLinear);
113 static void CalculateDETBufferSize(struct dml2_core_shared_calculate_det_buffer_size_params *p);
114 static double CalculateRequiredDispclk(enum dml2_odm_mode ODMMode, double PixelClock);
115 static double TruncToValidBPP(
116 struct dml2_core_shared_TruncToValidBPP_locals *l,
117 double LinkBitRate,
118 unsigned int Lanes,
119 unsigned int HTotal,
120 unsigned int HActive,
121 double PixelClock,
122 double DesiredBPP,
123 bool DSCEnable,
124 enum dml2_output_encoder_class Output,
125 enum dml2_output_format_class Format,
126 unsigned int DSCInputBitPerComponent,
127 unsigned int DSCSlices,
128 unsigned int AudioRate,
129 unsigned int AudioLayout,
130 enum dml2_odm_mode ODMModeNoDSC,
131 enum dml2_odm_mode ODMModeDSC,
132
133 // Output
134 unsigned int *RequiredSlots);
135 static unsigned int dscceComputeDelay(
136 unsigned int bpc,
137 double BPP,
138 unsigned int sliceWidth,
139 unsigned int numSlices,
140 enum dml2_output_format_class pixelFormat,
141 enum dml2_output_encoder_class Output);
142 static unsigned int dscComputeDelay(enum dml2_output_format_class pixelFormat, enum dml2_output_encoder_class Output);
143 static unsigned int CalculateHostVMDynamicLevels(bool GPUVMEnable, bool HostVMEnable, unsigned int HostVMMinPageSize, unsigned int HostVMMaxNonCachedPageTableLevels);
144 static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_and_row_bytes_params *p);
145 static unsigned int CalculatePrefetchSourceLines(
146 double VRatio,
147 unsigned int VTaps,
148 bool Interlace,
149 bool ProgressiveToInterlaceUnitInOPP,
150 unsigned int SwathHeight,
151 enum dml2_rotation_angle RotationAngle,
152 bool mirrored,
153 bool ViewportStationary,
154 unsigned int SwathWidth,
155 unsigned int ViewportHeight,
156 unsigned int ViewportXStart,
157 unsigned int ViewportYStart,
158
159 // Output
160 unsigned int *VInitPreFill,
161 unsigned int *MaxNumSwath);
162 static void CalculateRowBandwidth(
163 bool GPUVMEnable,
164 bool use_one_row_for_frame,
165 enum dml2_source_format_class SourcePixelFormat,
166 double VRatio,
167 double VRatioChroma,
168 bool DCCEnable,
169 double LineTime,
170 unsigned int PixelPTEBytesPerRowLuma,
171 unsigned int PixelPTEBytesPerRowChroma,
172 unsigned int dpte_row_height_luma,
173 unsigned int dpte_row_height_chroma,
174
175 bool mrq_present,
176 unsigned int meta_row_bytes_per_row_ub_l,
177 unsigned int meta_row_bytes_per_row_ub_c,
178 unsigned int meta_row_height_luma,
179 unsigned int meta_row_height_chroma,
180
181 // Output
182 double *dpte_row_bw,
183 double *meta_row_bw);
184 static void CalculateMALLUseForStaticScreen(
185 const struct dml2_display_cfg *display_cfg,
186 unsigned int NumberOfActiveSurfaces,
187 unsigned int MALLAllocatedForDCN,
188 unsigned int SurfaceSizeInMALL[],
189 bool one_row_per_frame_fits_in_buffer[],
190
191 // Output
192 bool is_using_mall_for_ss[]);
193 static void CalculateDCCConfiguration(
194 bool DCCEnabled,
195 bool DCCProgrammingAssumesScanDirectionUnknown,
196 enum dml2_source_format_class SourcePixelFormat,
197 unsigned int SurfaceWidthLuma,
198 unsigned int SurfaceWidthChroma,
199 unsigned int SurfaceHeightLuma,
200 unsigned int SurfaceHeightChroma,
201 unsigned int nomDETInKByte,
202 unsigned int RequestHeight256ByteLuma,
203 unsigned int RequestHeight256ByteChroma,
204 enum dml2_swizzle_mode TilingFormat,
205 unsigned int BytePerPixelY,
206 unsigned int BytePerPixelC,
207 double BytePerPixelDETY,
208 double BytePerPixelDETC,
209 enum dml2_rotation_angle RotationAngle,
210
211 // Output
212 enum dml2_core_internal_request_type *RequestLuma,
213 enum dml2_core_internal_request_type *RequestChroma,
214 unsigned int *MaxUncompressedBlockLuma,
215 unsigned int *MaxUncompressedBlockChroma,
216 unsigned int *MaxCompressedBlockLuma,
217 unsigned int *MaxCompressedBlockChroma,
218 unsigned int *IndependentBlockLuma,
219 unsigned int *IndependentBlockChroma);
220 static void calculate_mcache_row_bytes(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_calculate_mcache_row_bytes_params *p);
221 static void calculate_mcache_setting(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_calculate_mcache_setting_params *p);
222 static void calculate_mall_bw_overhead_factor(
223 double mall_prefetch_sdp_overhead_factor[],
224 double mall_prefetch_dram_overhead_factor[],
225
226 // input
227 const struct dml2_display_cfg *display_cfg,
228 unsigned int num_active_planes);
229 static double dml_get_return_bandwidth_available(
230 const struct dml2_soc_bb *soc,
231 enum dml2_core_internal_soc_state_type state_type,
232 enum dml2_core_internal_bw_type bw_type,
233 bool is_avg_bw,
234 bool is_hvm_en,
235 bool is_hvm_only,
236 double dcflk_mhz,
237 double fclk_mhz,
238 double dram_bw_mbps);
239 static void calculate_bandwidth_available(
240 double avg_bandwidth_available_min[dml2_core_internal_soc_state_max],
241 double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
242 double urg_bandwidth_available_min[dml2_core_internal_soc_state_max], // min between SDP and DRAM
243 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
244 double urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_max],
245 double urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_max],
246
247 const struct dml2_soc_bb *soc,
248 bool HostVMEnable,
249 double dcfclk_mhz,
250 double fclk_mhz,
251 double dram_bw_mbps);
252 static void calculate_avg_bandwidth_required(
253 double avg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
254
255 // input
256 const struct dml2_display_cfg *display_cfg,
257 unsigned int num_active_planes,
258 double ReadBandwidthLuma[],
259 double ReadBandwidthChroma[],
260 double cursor_bw[],
261 double dcc_dram_bw_nom_overhead_factor_p0[],
262 double dcc_dram_bw_nom_overhead_factor_p1[],
263 double mall_prefetch_dram_overhead_factor[],
264 double mall_prefetch_sdp_overhead_factor[]);
265 static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_CalculateVMRowAndSwath_params *p);
266 static double CalculateUrgentLatency(
267 double UrgentLatencyPixelDataOnly,
268 double UrgentLatencyPixelMixedWithVMData,
269 double UrgentLatencyVMDataOnly,
270 bool DoUrgentLatencyAdjustment,
271 double UrgentLatencyAdjustmentFabricClockComponent,
272 double UrgentLatencyAdjustmentFabricClockReference,
273 double FabricClock,
274 double uclk_freq_mhz,
275 enum dml2_qos_param_type qos_type,
276 unsigned int urgent_ramp_uclk_cycles,
277 unsigned int df_qos_response_time_fclk_cycles,
278 unsigned int max_round_trip_to_furthest_cs_fclk_cycles,
279 unsigned int mall_overhead_fclk_cycles,
280 double umc_urgent_ramp_latency_margin,
281 double fabric_max_transport_latency_margin);
282 static double CalculateTripToMemory(
283 double UrgLatency,
284 double FabricClock,
285 double uclk_freq_mhz,
286 enum dml2_qos_param_type qos_type,
287 unsigned int trip_to_memory_uclk_cycles,
288 unsigned int max_round_trip_to_furthest_cs_fclk_cycles,
289 unsigned int mall_overhead_fclk_cycles,
290 double umc_max_latency_margin,
291 double fabric_max_transport_latency_margin);
292 static double CalculateMetaTripToMemory(
293 double UrgLatency,
294 double FabricClock,
295 double uclk_freq_mhz,
296 enum dml2_qos_param_type qos_type,
297 unsigned int meta_trip_to_memory_uclk_cycles,
298 unsigned int meta_trip_to_memory_fclk_cycles,
299 double umc_max_latency_margin,
300 double fabric_max_transport_latency_margin);
301 static void calculate_cursor_req_attributes(
302 unsigned int cursor_width,
303 unsigned int cursor_bpp,
304
305 // output
306 unsigned int *cursor_lines_per_chunk,
307 unsigned int *cursor_bytes_per_line,
308 unsigned int *cursor_bytes_per_chunk,
309 unsigned int *cursor_bytes);
310 static void calculate_cursor_urgent_burst_factor(
311 unsigned int CursorBufferSize,
312 unsigned int CursorWidth,
313 unsigned int cursor_bytes_per_chunk,
314 unsigned int cursor_lines_per_chunk,
315 double LineTime,
316 double UrgentLatency,
317
318 double *UrgentBurstFactorCursor,
319 bool *NotEnoughUrgentLatencyHiding);
320 static void CalculateUrgentBurstFactor(
321 const struct dml2_plane_parameters *plane_cfg,
322 unsigned int swath_width_luma_ub,
323 unsigned int swath_width_chroma_ub,
324 unsigned int SwathHeightY,
325 unsigned int SwathHeightC,
326 double LineTime,
327 double UrgentLatency,
328 double VRatio,
329 double VRatioC,
330 double BytePerPixelInDETY,
331 double BytePerPixelInDETC,
332 unsigned int DETBufferSizeY,
333 unsigned int DETBufferSizeC,
334 // Output
335 double *UrgentBurstFactorLuma,
336 double *UrgentBurstFactorChroma,
337 bool *NotEnoughUrgentLatencyHiding);
338 static void CalculateDCFCLKDeepSleep(
339 const struct dml2_display_cfg *display_cfg,
340 unsigned int NumberOfActiveSurfaces,
341 unsigned int BytePerPixelY[],
342 unsigned int BytePerPixelC[],
343 unsigned int SwathWidthY[],
344 unsigned int SwathWidthC[],
345 unsigned int DPPPerSurface[],
346 double PSCL_THROUGHPUT[],
347 double PSCL_THROUGHPUT_CHROMA[],
348 double Dppclk[],
349 double ReadBandwidthLuma[],
350 double ReadBandwidthChroma[],
351 unsigned int ReturnBusWidth,
352
353 // Output
354 double *DCFClkDeepSleep);
355 static double CalculateWriteBackDelay(
356 enum dml2_source_format_class WritebackPixelFormat,
357 double WritebackHRatio,
358 double WritebackVRatio,
359 unsigned int WritebackVTaps,
360 unsigned int WritebackDestinationWidth,
361 unsigned int WritebackDestinationHeight,
362 unsigned int WritebackSourceHeight,
363 unsigned int HTotal);
364 static unsigned int CalculateMaxVStartup(
365 bool ptoi_supported,
366 unsigned int vblank_nom_default_us,
367 const struct dml2_timing_cfg *timing,
368 double write_back_delay_us);
369 static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *p);
370 static void CalculateODMMode(
371 unsigned int MaximumPixelsPerLinePerDSCUnit,
372 unsigned int HActive,
373 enum dml2_output_encoder_class Output,
374 enum dml2_odm_mode ODMUse,
375 double MaxDispclk,
376 bool DSCEnable,
377 unsigned int TotalNumberOfActiveDPP,
378 unsigned int MaxNumDPP,
379 double PixelClock,
380
381 // Output
382 bool *TotalAvailablePipesSupport,
383 unsigned int *NumberOfDPP,
384 enum dml2_odm_mode *ODMMode,
385 double *RequiredDISPCLKPerSurface);
386 static void CalculateOutputLink(
387 struct dml2_core_internal_scratch *s,
388 double PHYCLK,
389 double PHYCLKD18,
390 double PHYCLKD32,
391 double Downspreading,
392 bool IsMainSurfaceUsingTheIndicatedTiming,
393 enum dml2_output_encoder_class Output,
394 enum dml2_output_format_class OutputFormat,
395 unsigned int HTotal,
396 unsigned int HActive,
397 double PixelClockBackEnd,
398 double ForcedOutputLinkBPP,
399 unsigned int DSCInputBitPerComponent,
400 unsigned int NumberOfDSCSlices,
401 double AudioSampleRate,
402 unsigned int AudioSampleLayout,
403 enum dml2_odm_mode ODMModeNoDSC,
404 enum dml2_odm_mode ODMModeDSC,
405 enum dml2_dsc_enable_option DSCEnable,
406 unsigned int OutputLinkDPLanes,
407 enum dml2_output_link_dp_rate OutputLinkDPRate,
408
409 // Output
410 bool *RequiresDSC,
411 bool *RequiresFEC,
412 double *OutBpp,
413 enum dml2_core_internal_output_type *OutputType,
414 enum dml2_core_internal_output_type_rate *OutputRate,
415 unsigned int *RequiredSlots);
416 static double CalculateWriteBackDISPCLK(
417 enum dml2_source_format_class WritebackPixelFormat,
418 double PixelClock,
419 double WritebackHRatio,
420 double WritebackVRatio,
421 unsigned int WritebackHTaps,
422 unsigned int WritebackVTaps,
423 unsigned int WritebackSourceWidth,
424 unsigned int WritebackDestinationWidth,
425 unsigned int HTotal,
426 unsigned int WritebackLineBufferSize);
427 static double RequiredDTBCLK(
428 bool DSCEnable,
429 double PixelClock,
430 enum dml2_output_format_class OutputFormat,
431 double OutputBpp,
432 unsigned int DSCSlices,
433 unsigned int HTotal,
434 unsigned int HActive,
435 unsigned int AudioRate,
436 unsigned int AudioLayout);
437 static unsigned int DSCDelayRequirement(
438 bool DSCEnabled,
439 enum dml2_odm_mode ODMMode,
440 unsigned int DSCInputBitPerComponent,
441 double OutputBpp,
442 unsigned int HActive,
443 unsigned int HTotal,
444 unsigned int NumberOfDSCSlices,
445 enum dml2_output_format_class OutputFormat,
446 enum dml2_output_encoder_class Output,
447 double PixelClock,
448 double PixelClockBackEnd);
449 static void CalculateSurfaceSizeInMall(
450 const struct dml2_display_cfg *display_cfg,
451 unsigned int NumberOfActiveSurfaces,
452 unsigned int MALLAllocatedForDCN,
453 unsigned int BytesPerPixelY[],
454 unsigned int BytesPerPixelC[],
455 unsigned int Read256BytesBlockWidthY[],
456 unsigned int Read256BytesBlockWidthC[],
457 unsigned int Read256BytesBlockHeightY[],
458 unsigned int Read256BytesBlockHeightC[],
459 unsigned int ReadBlockWidthY[],
460 unsigned int ReadBlockWidthC[],
461 unsigned int ReadBlockHeightY[],
462 unsigned int ReadBlockHeightC[],
463
464 // Output
465 unsigned int SurfaceSizeInMALL[],
466 bool *ExceededMALLSize);
467 static void calculate_tdlut_setting(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_calculate_tdlut_setting_params *p);
468 static void CalculateTarb(
469 const struct dml2_display_cfg *display_cfg,
470 unsigned int PixelChunkSizeInKByte,
471 unsigned int NumberOfActiveSurfaces,
472 unsigned int NumberOfDPP[],
473 unsigned int dpte_group_bytes[],
474 unsigned int tdlut_bytes_per_group[],
475 double HostVMInefficiencyFactor,
476 double HostVMInefficiencyFactorPrefetch,
477 unsigned int HostVMMinPageSize,
478 double ReturnBW,
479
480 unsigned int MetaChunkSize,
481
482 // output
483 double *Tarb,
484 double *Tarb_prefetch);
485 static double CalculateTWait(long reserved_vblank_time_ns, double UrgentLatency, double Ttrip);
486 static void CalculateVUpdateAndDynamicMetadataParameters(
487 unsigned int MaxInterDCNTileRepeaters,
488 double Dppclk,
489 double Dispclk,
490 double DCFClkDeepSleep,
491 double PixelClock,
492 unsigned int HTotal,
493 unsigned int VBlank,
494 unsigned int DynamicMetadataTransmittedBytes,
495 unsigned int DynamicMetadataLinesBeforeActiveRequired,
496 unsigned int InterlaceEnable,
497 bool ProgressiveToInterlaceUnitInOPP,
498
499 // Output
500 double *TSetup,
501 double *Tdmbf,
502 double *Tdmec,
503 double *Tdmsks,
504 unsigned int *VUpdateOffsetPix,
505 unsigned int *VUpdateWidthPix,
506 unsigned int *VReadyOffsetPix);
507 static double get_urgent_bandwidth_required(
508 struct dml2_core_shared_get_urgent_bandwidth_required_locals *l,
509 const struct dml2_display_cfg *display_cfg,
510 enum dml2_core_internal_soc_state_type state_type,
511 enum dml2_core_internal_bw_type bw_type,
512 bool inc_flip_bw, // including flip bw
513 unsigned int NumberOfActiveSurfaces,
514 unsigned int NumberOfDPP[],
515 double dcc_dram_bw_nom_overhead_factor_p0[],
516 double dcc_dram_bw_nom_overhead_factor_p1[],
517 double dcc_dram_bw_pref_overhead_factor_p0[],
518 double dcc_dram_bw_pref_overhead_factor_p1[],
519 double mall_prefetch_sdp_overhead_factor[],
520 double mall_prefetch_dram_overhead_factor[],
521 double ReadBandwidthLuma[],
522 double ReadBandwidthChroma[],
523 double PrefetchBandwidthLuma[],
524 double PrefetchBandwidthChroma[],
525 double cursor_bw[],
526 double dpte_row_bw[],
527 double meta_row_bw[],
528 double prefetch_cursor_bw[],
529 double prefetch_vmrow_bw[],
530 double flip_bw[],
531 double UrgentBurstFactorLuma[],
532 double UrgentBurstFactorChroma[],
533 double UrgentBurstFactorCursor[],
534 double UrgentBurstFactorLumaPre[],
535 double UrgentBurstFactorChromaPre[],
536 double UrgentBurstFactorCursorPre[]);
537 static void CalculateExtraLatency(
538 const struct dml2_display_cfg *display_cfg,
539 unsigned int ROBBufferSizeInKByte,
540 unsigned int RoundTripPingLatencyCycles,
541 unsigned int ReorderingBytes,
542 double DCFCLK,
543 double FabricClock,
544 unsigned int PixelChunkSizeInKByte,
545 double ReturnBW,
546 unsigned int NumberOfActiveSurfaces,
547 unsigned int NumberOfDPP[],
548 unsigned int dpte_group_bytes[],
549 unsigned int tdlut_bytes_per_group[],
550 double HostVMInefficiencyFactor,
551 double HostVMInefficiencyFactorPrefetch,
552 unsigned int HostVMMinPageSize,
553 enum dml2_qos_param_type qos_type,
554 bool max_oustanding_when_urgent_expected,
555 unsigned int max_outstanding_requests,
556 unsigned int request_size_bytes_luma[],
557 unsigned int request_size_bytes_chroma[],
558 unsigned int MetaChunkSize,
559 unsigned int dchub_arb_to_ret_delay,
560 double Ttrip,
561 unsigned int hostvm_mode,
562
563 // output
564 double *ExtraLatency, // Tex
565 double *ExtraLatency_sr, // Tex_sr
566 double *ExtraLatencyPrefetch);
567 static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_CalculatePrefetchSchedule_params *p);
568 static void calculate_peak_bandwidth_required(
569 struct dml2_core_internal_scratch *s,
570
571 // output
572 double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
573 double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
574 double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
575
576 // input
577 const struct dml2_display_cfg *display_cfg,
578 unsigned int inc_flip_bw,
579 unsigned int NumberOfActiveSurfaces,
580 unsigned int NumberOfDPP[],
581 double dcc_dram_bw_nom_overhead_factor_p0[],
582 double dcc_dram_bw_nom_overhead_factor_p1[],
583 double dcc_dram_bw_pref_overhead_factor_p0[],
584 double dcc_dram_bw_pref_overhead_factor_p1[],
585 double mall_prefetch_sdp_overhead_factor[],
586 double mall_prefetch_dram_overhead_factor[],
587 double ReadBandwidthLuma[],
588 double ReadBandwidthChroma[],
589 double PrefetchBandwidthLuma[],
590 double PrefetchBandwidthChroma[],
591 double cursor_bw[],
592 double dpte_row_bw[],
593 double meta_row_bw[],
594 double prefetch_cursor_bw[],
595 double prefetch_vmrow_bw[],
596 double flip_bw[],
597 double UrgentBurstFactorLuma[],
598 double UrgentBurstFactorChroma[],
599 double UrgentBurstFactorCursor[],
600 double UrgentBurstFactorLumaPre[],
601 double UrgentBurstFactorChromaPre[],
602 double UrgentBurstFactorCursorPre[]);
603 static void check_urgent_bandwidth_support(
604 double *frac_urg_bandwidth_nom,
605 double *frac_urg_bandwidth_mall,
606 bool *vactive_bandwidth_support_ok, // vactive ok
607 bool *bandwidth_support_ok, // max of vm, prefetch, vactive all ok
608
609 unsigned int mall_allocated_for_dcn_mbytes,
610 double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
611 double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
612 double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
613 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]);
614 static double get_bandwidth_available_for_immediate_flip(
615 enum dml2_core_internal_soc_state_type eval_state,
616 double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], // no flip
617 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]);
618 static void calculate_immediate_flip_bandwidth_support(
619 // Output
620 double *frac_urg_bandwidth_flip,
621 bool *flip_bandwidth_support_ok,
622
623 // Input
624 enum dml2_core_internal_soc_state_type eval_state,
625 double urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
626 double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
627 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]);
628 static void CalculateFlipSchedule(
629 struct dml2_core_internal_scratch *s,
630 bool iflip_enable,
631 bool use_lb_flip_bw,
632 double HostVMInefficiencyFactor,
633 double Tvm_trips_flip,
634 double Tr0_trips_flip,
635 double Tvm_trips_flip_rounded,
636 double Tr0_trips_flip_rounded,
637 bool GPUVMEnable,
638 double vm_bytes, // vm_bytes
639 double DPTEBytesPerRow, // dpte_row_bytes
640 double BandwidthAvailableForImmediateFlip,
641 unsigned int TotImmediateFlipBytes,
642 enum dml2_source_format_class SourcePixelFormat,
643 double LineTime,
644 double VRatio,
645 double VRatioChroma,
646 double Tno_bw_flip,
647 unsigned int dpte_row_height,
648 unsigned int dpte_row_height_chroma,
649 bool use_one_row_for_frame_flip,
650 unsigned int max_flip_time_us,
651 unsigned int per_pipe_flip_bytes,
652 unsigned int meta_row_bytes,
653 unsigned int meta_row_height,
654 unsigned int meta_row_height_chroma,
655 bool dcc_mrq_enable,
656
657 // Output
658 double *dst_y_per_vm_flip,
659 double *dst_y_per_row_flip,
660 double *final_flip_bw,
661 bool *ImmediateFlipSupportedForPipe);
662 static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
663 struct dml2_core_internal_scratch *scratch,
664 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *p);
665 static double uclk_khz_to_dram_bw_mbps(unsigned long uclk_khz, const struct dml2_dram_params *dram_config);
666 static double dram_bw_kbps_to_uclk_mhz(unsigned long long bw_kbps, const struct dml2_dram_params *dram_config);
667 static unsigned int get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params);
668 static unsigned int get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table);
669 static unsigned int get_pipe_flip_bytes(
670 double hostvm_inefficiency_factor,
671 unsigned int vm_bytes,
672 unsigned int dpte_row_bytes,
673 unsigned int meta_row_bytes);
674 static void calculate_hostvm_inefficiency_factor(
675 double *HostVMInefficiencyFactor,
676 double *HostVMInefficiencyFactorPrefetch,
677
678 bool gpuvm_enable,
679 bool hostvm_enable,
680 unsigned int remote_iommu_outstanding_translations,
681 unsigned int max_outstanding_reqs,
682 double urg_bandwidth_avail_active_pixel_and_vm,
683 double urg_bandwidth_avail_active_vm_only);
684 static void CalculatePixelDeliveryTimes(
685 const struct dml2_display_cfg *display_cfg,
686 const struct core_display_cfg_support_info *cfg_support_info,
687 unsigned int NumberOfActiveSurfaces,
688 double VRatioPrefetchY[],
689 double VRatioPrefetchC[],
690 unsigned int swath_width_luma_ub[],
691 unsigned int swath_width_chroma_ub[],
692 double PSCL_THROUGHPUT[],
693 double PSCL_THROUGHPUT_CHROMA[],
694 double Dppclk[],
695 unsigned int BytePerPixelC[],
696 unsigned int req_per_swath_ub_l[],
697 unsigned int req_per_swath_ub_c[],
698
699 // Output
700 double DisplayPipeLineDeliveryTimeLuma[],
701 double DisplayPipeLineDeliveryTimeChroma[],
702 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
703 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
704 double DisplayPipeRequestDeliveryTimeLuma[],
705 double DisplayPipeRequestDeliveryTimeChroma[],
706 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
707 double DisplayPipeRequestDeliveryTimeChromaPrefetch[]);
708 static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTETimes_params *p);
709 static void CalculateVMGroupAndRequestTimes(
710 const struct dml2_display_cfg *display_cfg,
711 unsigned int NumberOfActiveSurfaces,
712 unsigned int BytePerPixelC[],
713 double dst_y_per_vm_vblank[],
714 double dst_y_per_vm_flip[],
715 unsigned int dpte_row_width_luma_ub[],
716 unsigned int dpte_row_width_chroma_ub[],
717 unsigned int vm_group_bytes[],
718 unsigned int dpde0_bytes_per_frame_ub_l[],
719 unsigned int dpde0_bytes_per_frame_ub_c[],
720 unsigned int tdlut_pte_bytes_per_frame[],
721 unsigned int meta_pte_bytes_per_frame_ub_l[],
722 unsigned int meta_pte_bytes_per_frame_ub_c[],
723 bool mrq_present,
724
725 // Output
726 double TimePerVMGroupVBlank[],
727 double TimePerVMGroupFlip[],
728 double TimePerVMRequestVBlank[],
729 double TimePerVMRequestFlip[]);
730 static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_CalculateStutterEfficiency_params *p);
731 static bool dml_is_dual_plane(enum dml2_source_format_class source_format);
732 static unsigned int dml_get_plane_idx(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx);
733 static void rq_dlg_get_wm_regs(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *wm_regs);
734 static unsigned int log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend);
735 static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs,
736 const struct dml2_display_cfg *display_cfg,
737 const struct dml2_core_internal_display_mode_lib *mode_lib,
738 unsigned int pipe_idx);
739 static void rq_dlg_get_dlg_reg(struct dml2_core_internal_scratch *s,
740 struct dml2_display_dlg_regs *disp_dlg_regs,
741 struct dml2_display_ttu_regs *disp_ttu_regs,
742 const struct dml2_display_cfg *display_cfg,
743 const struct dml2_core_internal_display_mode_lib *mode_lib,
744 const unsigned int pipe_idx);
745 static void rq_dlg_get_arb_params(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *arb_param);
746
747 /*
748 * END OF STATIC HELPERS
749 */
750
dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex * in_out_params)751 bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_out_params)
752 {
753 struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib;
754 const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg;
755 const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table;
756
757 struct dml2_core_calcs_mode_support_locals *s = &mode_lib->scratch.dml_core_mode_support_locals;
758 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
759 struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
760 struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
761 struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
762 struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params;
763 struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params;
764 unsigned int k, m, n;
765
766 memset(&mode_lib->ms, 0, sizeof(struct dml2_core_internal_mode_support));
767
768 mode_lib->ms.num_active_planes = display_cfg->num_planes;
769 get_stream_output_bpp(s->OutputBpp, display_cfg);
770
771 mode_lib->ms.state_idx = in_out_params->min_clk_index;
772 mode_lib->ms.SOCCLK = ((double)mode_lib->soc.clk_table.socclk.clk_values_khz[0] / 1000);
773 mode_lib->ms.DCFCLK = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_dcfclk_khz / 1000);
774 mode_lib->ms.FabricClock = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz / 1000);
775 mode_lib->ms.MaxDCFCLK = (double)min_clk_table->max_clocks_khz.dcfclk / 1000;
776 mode_lib->ms.MaxFabricClock = (double)min_clk_table->max_clocks_khz.fclk / 1000;
777 mode_lib->ms.max_dispclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dispclk / 1000;
778 mode_lib->ms.max_dscclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dscclk / 1000;
779 mode_lib->ms.max_dppclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dppclk / 1000;
780 mode_lib->ms.uclk_freq_mhz = dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config);
781 mode_lib->ms.dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps / 1000);
782 mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params);
783 mode_lib->ms.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index((unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000.0), &mode_lib->soc.clk_table);
784
785 #if defined(__DML_VBA_DEBUG__)
786 dml2_printf("DML::%s: --- START --- \n", __func__);
787 dml2_printf("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes);
788 dml2_printf("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index);
789 dml2_printf("DML::%s: qos_param_index = %0d\n", __func__, mode_lib->ms.qos_param_index);
790 dml2_printf("DML::%s: SOCCLK = %f\n", __func__, mode_lib->ms.SOCCLK);
791 dml2_printf("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->ms.dram_bw_mbps);
792 dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz);
793 dml2_printf("DML::%s: DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK);
794 dml2_printf("DML::%s: FabricClock = %f\n", __func__, mode_lib->ms.FabricClock);
795 dml2_printf("DML::%s: MaxDCFCLK = %f\n", __func__, mode_lib->ms.MaxDCFCLK);
796 dml2_printf("DML::%s: max_dispclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dispclk_freq_mhz);
797 dml2_printf("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz);
798 dml2_printf("DML::%s: max_dppclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dppclk_freq_mhz);
799 dml2_printf("DML::%s: MaxFabricClock = %f\n", __func__, mode_lib->ms.MaxFabricClock);
800 dml2_printf("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz);
801 dml2_printf("DML::%s: ip.compressed_buffer_segment_size_in_kbytes = %u\n", __func__, mode_lib->ip.compressed_buffer_segment_size_in_kbytes);
802 dml2_printf("DML::%s: ip.dcn_mrq_present = %u\n", __func__, mode_lib->ip.dcn_mrq_present);
803
804 for (k = 0; k < mode_lib->ms.num_active_planes; k++)
805 dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
806
807 // dml2_printf_dml_policy(&mode_lib->ms.policy);
808 // dml2_printf_dml_display_cfg_timing(&display_cfg->timing, mode_lib->ms.num_active_planes);
809 // dml2_printf_dml_display_cfg_plane(&display_cfg->plane, mode_lib->ms.num_active_planes);
810 // dml2_printf_dml_display_cfg_surface(&display_cfg->surface, mode_lib->ms.num_active_planes);
811 // dml2_printf_dml_display_cfg_output(&display_cfg->output, mode_lib->ms.num_active_planes);
812 #endif
813
814 CalculateMaxDETAndMinCompressedBufferSize(
815 mode_lib->ip.config_return_buffer_size_in_kbytes,
816 mode_lib->ip.config_return_buffer_segment_size_in_kbytes,
817 mode_lib->ip.rob_buffer_size_kbytes,
818 mode_lib->ip.max_num_dpp,
819 display_cfg->overrides.hw.force_nom_det_size_kbytes.enable,
820 display_cfg->overrides.hw.force_nom_det_size_kbytes.value,
821 mode_lib->ip.dcn_mrq_present,
822
823 /* Output */
824 &mode_lib->ms.MaxTotalDETInKByte,
825 &mode_lib->ms.NomDETInKByte,
826 &mode_lib->ms.MinCompressedBufferSizeInKByte);
827
828 PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg, mode_lib->ip.ptoi_supported, s->PixelClockBackEnd);
829
830 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
831
832 /*Scale Ratio, taps Support Check*/
833 mode_lib->ms.support.ScaleRatioAndTapsSupport = true;
834 // Many core tests are still setting scaling parameters "incorrectly"
835 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
836 if (display_cfg->plane_descriptors[k].composition.scaler_info.enabled == false
837 && (dml2_core_shared_is_420(display_cfg->plane_descriptors[k].pixel_format)
838 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio != 1.0
839 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps != 1.0
840 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio != 1.0
841 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps != 1.0)) {
842 mode_lib->ms.support.ScaleRatioAndTapsSupport = false;
843 } else if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps < 1.0 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps > 8.0
844 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps < 1.0 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps > 8.0
845 || (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps > 1.0 && (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps % 2) == 1)
846 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio > mode_lib->ip.max_hscl_ratio
847 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio > mode_lib->ip.max_vscl_ratio
848 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps
849 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps
850 || (dml2_core_shared_is_420(display_cfg->plane_descriptors[k].pixel_format)
851 && (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps < 1 || display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps > 8 ||
852 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps < 1 || display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps > 8 ||
853 (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps > 1 && display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps % 2 == 1) ||
854 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio > mode_lib->ip.max_hscl_ratio ||
855 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio > mode_lib->ip.max_vscl_ratio ||
856 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps ||
857 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps))) {
858 mode_lib->ms.support.ScaleRatioAndTapsSupport = false;
859 }
860 }
861
862 /*Source Format, Pixel Format and Scan Support Check*/
863 mode_lib->ms.support.SourceFormatPixelAndScanSupport = true;
864 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
865 if (display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear && dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
866 mode_lib->ms.support.SourceFormatPixelAndScanSupport = false;
867 }
868 }
869
870 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
871 CalculateBytePerPixelAndBlockSizes(
872 display_cfg->plane_descriptors[k].pixel_format,
873 display_cfg->plane_descriptors[k].surface.tiling,
874 display_cfg->plane_descriptors[k].surface.plane0.pitch,
875 display_cfg->plane_descriptors[k].surface.plane1.pitch,
876
877 /* Output */
878 &mode_lib->ms.BytePerPixelY[k],
879 &mode_lib->ms.BytePerPixelC[k],
880 &mode_lib->ms.BytePerPixelInDETY[k],
881 &mode_lib->ms.BytePerPixelInDETC[k],
882 &mode_lib->ms.Read256BlockHeightY[k],
883 &mode_lib->ms.Read256BlockHeightC[k],
884 &mode_lib->ms.Read256BlockWidthY[k],
885 &mode_lib->ms.Read256BlockWidthC[k],
886 &mode_lib->ms.MacroTileHeightY[k],
887 &mode_lib->ms.MacroTileHeightC[k],
888 &mode_lib->ms.MacroTileWidthY[k],
889 &mode_lib->ms.MacroTileWidthC[k],
890 &mode_lib->ms.surf_linear128_l[k],
891 &mode_lib->ms.surf_linear128_c[k]);
892 }
893
894 /*Bandwidth Support Check*/
895 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
896 if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
897 mode_lib->ms.SwathWidthYSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
898 mode_lib->ms.SwathWidthCSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane1.width;
899 } else {
900 mode_lib->ms.SwathWidthYSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
901 mode_lib->ms.SwathWidthCSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
902 }
903 }
904
905 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
906 mode_lib->ms.SurfaceReadBandwidthLuma[k] = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
907 mode_lib->ms.SurfaceReadBandwidthChroma[k] = mode_lib->ms.SwathWidthCSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
908
909 mode_lib->ms.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width *
910 display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000));
911
912 #ifdef __DML_VBA_DEBUG__
913 double old_ReadBandwidthLuma = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelInDETY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
914 double old_ReadBandwidthChroma = mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * math_ceil2(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio / 2.0;
915 dml2_printf("DML::%s: k=%u, old_ReadBandwidthLuma = %f\n", __func__, k, old_ReadBandwidthLuma);
916 dml2_printf("DML::%s: k=%u, old_ReadBandwidthChroma = %f\n", __func__, k, old_ReadBandwidthChroma);
917 dml2_printf("DML::%s: k=%u, ReadBandwidthLuma = %f\n", __func__, k, mode_lib->ms.SurfaceReadBandwidthLuma[k]);
918 dml2_printf("DML::%s: k=%u, ReadBandwidthChroma = %f\n", __func__, k, mode_lib->ms.SurfaceReadBandwidthChroma[k]);
919 #endif
920 }
921
922 // Writeback bandwidth
923 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
924 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format == dml2_444_64) {
925 mode_lib->ms.WriteBandwidth[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height
926 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width
927 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height
928 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total
929 / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 8.0;
930 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) {
931 mode_lib->ms.WriteBandwidth[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height
932 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width
933 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height
934 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total
935 / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 4.0;
936 } else {
937 mode_lib->ms.WriteBandwidth[k] = 0.0;
938 }
939 }
940
941 /*Writeback Latency support check*/
942 mode_lib->ms.support.WritebackLatencySupport = true;
943 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
944 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true &&
945 (mode_lib->ms.WriteBandwidth[k] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024.0 / mode_lib->soc.qos_parameters.writeback.base_latency_us)) {
946 mode_lib->ms.support.WritebackLatencySupport = false;
947 }
948 }
949
950 /* Writeback Mode Support Check */
951 s->TotalNumberOfActiveWriteback = 0;
952 for (k = 0; k <= (unsigned int)mode_lib->ms.num_active_planes - 1; k++) {
953 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true
954 && (display_cfg->plane_descriptors[k].stream_index == k)) {
955 s->TotalNumberOfActiveWriteback = s->TotalNumberOfActiveWriteback + 1;
956 }
957 }
958
959 mode_lib->ms.support.EnoughWritebackUnits = 1;
960 if (s->TotalNumberOfActiveWriteback > (unsigned int)mode_lib->ip.max_num_wb) {
961 mode_lib->ms.support.EnoughWritebackUnits = false;
962 }
963
964 /* Writeback Scale Ratio and Taps Support Check */
965 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = true;
966 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
967 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) {
968 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio > mode_lib->ip.writeback_max_hscl_ratio
969 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio > mode_lib->ip.writeback_max_vscl_ratio
970 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio < mode_lib->ip.writeback_min_hscl_ratio
971 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio < mode_lib->ip.writeback_min_vscl_ratio
972 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps > (unsigned int) mode_lib->ip.writeback_max_hscl_taps
973 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps > (unsigned int) mode_lib->ip.writeback_max_vscl_taps
974 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps
975 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps
976 || (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps > 2.0 && ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps % 2) == 1))) {
977 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false;
978 }
979 if (2.0 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height * (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps - 1) * 57 > mode_lib->ip.writeback_line_buffer_buffer_size) {
980 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false;
981 }
982 }
983 }
984
985 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
986 CalculateSinglePipeDPPCLKAndSCLThroughput(
987 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
988 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
989 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
990 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
991 mode_lib->ip.max_dchub_pscl_bw_pix_per_clk,
992 mode_lib->ip.max_pscl_lb_bw_pix_per_clk,
993 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
994 display_cfg->plane_descriptors[k].pixel_format,
995 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps,
996 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps,
997 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps,
998 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps,
999 /* Output */
1000 &mode_lib->ms.PSCL_FACTOR[k],
1001 &mode_lib->ms.PSCL_FACTOR_CHROMA[k],
1002 &mode_lib->ms.MinDPPCLKUsingSingleDPP[k]);
1003 }
1004
1005 // Max Viewport Size support
1006 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
1007 if (display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) {
1008 s->MaximumSwathWidthSupportLuma = 15360;
1009 } else if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelC[k] > 0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { // horz video
1010 s->MaximumSwathWidthSupportLuma = 7680 + 16;
1011 } else if (dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelC[k] > 0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { // vert video
1012 s->MaximumSwathWidthSupportLuma = 4320 + 16;
1013 } else if (display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { // rgbe + alpha
1014 s->MaximumSwathWidthSupportLuma = 5120 + 16;
1015 } else if (dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelY[k] == 8 && display_cfg->plane_descriptors[k].surface.dcc.enable == true) { // vert 64bpp
1016 s->MaximumSwathWidthSupportLuma = 3072 + 16;
1017 } else {
1018 s->MaximumSwathWidthSupportLuma = 6144 + 16;
1019 }
1020
1021 if (dml2_core_shared_is_420(display_cfg->plane_descriptors[k].pixel_format)) {
1022 s->MaximumSwathWidthSupportChroma = (unsigned int)(s->MaximumSwathWidthSupportLuma / 2.0);
1023 } else {
1024 s->MaximumSwathWidthSupportChroma = s->MaximumSwathWidthSupportLuma;
1025 }
1026 mode_lib->ms.MaximumSwathWidthInLineBufferLuma = mode_lib->ip.line_buffer_size_bits * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, 1.0) / 57 /*FIXME_STAGE2 was: LBBitPerPixel*/ /
1027 (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, 1.0) - 2, 0.0));
1028 if (mode_lib->ms.BytePerPixelC[k] == 0.0) {
1029 mode_lib->ms.MaximumSwathWidthInLineBufferChroma = 0;
1030 } else {
1031 mode_lib->ms.MaximumSwathWidthInLineBufferChroma = mode_lib->ip.line_buffer_size_bits * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, 1.0) / 57 /*FIXME_STAGE2 was: LBBitPerPixel*/ /
1032 (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, 1.0) - 2, 0.0));
1033 }
1034 mode_lib->ms.MaximumSwathWidthLuma[k] = math_min2(s->MaximumSwathWidthSupportLuma, mode_lib->ms.MaximumSwathWidthInLineBufferLuma);
1035 mode_lib->ms.MaximumSwathWidthChroma[k] = math_min2(s->MaximumSwathWidthSupportChroma, mode_lib->ms.MaximumSwathWidthInLineBufferChroma);
1036 }
1037
1038 /* Cursor Support Check */
1039 mode_lib->ms.support.CursorSupport = true;
1040 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
1041 if (display_cfg->plane_descriptors[k].cursor.cursor_width > 0.0) {
1042 if (display_cfg->plane_descriptors[k].cursor.cursor_bpp == 64 && mode_lib->ip.cursor_64bpp_support == false) {
1043 mode_lib->ms.support.CursorSupport = false;
1044 }
1045 }
1046 }
1047
1048 /* Valid Pitch Check */
1049 mode_lib->ms.support.PitchSupport = true;
1050 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
1051
1052 // data pitch
1053 unsigned int alignment_l = mode_lib->ms.MacroTileWidthY[k];
1054
1055 if (mode_lib->ms.surf_linear128_l[k])
1056 alignment_l = alignment_l / 2;
1057
1058 mode_lib->ms.support.AlignedYPitch[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.plane0.pitch, display_cfg->plane_descriptors[k].surface.plane0.width), alignment_l);
1059 if (dml2_core_shared_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) {
1060 unsigned int alignment_c = mode_lib->ms.MacroTileWidthC[k];
1061
1062 if (mode_lib->ms.surf_linear128_c[k])
1063 alignment_c = alignment_c / 2;
1064 mode_lib->ms.support.AlignedCPitch[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.plane1.pitch, display_cfg->plane_descriptors[k].surface.plane1.width), alignment_c);
1065 } else {
1066 mode_lib->ms.support.AlignedCPitch[k] = display_cfg->plane_descriptors[k].surface.plane1.pitch;
1067 }
1068
1069 if (mode_lib->ms.support.AlignedYPitch[k] > display_cfg->plane_descriptors[k].surface.plane0.pitch ||
1070 mode_lib->ms.support.AlignedCPitch[k] > display_cfg->plane_descriptors[k].surface.plane1.pitch) {
1071 mode_lib->ms.support.PitchSupport = false;
1072 #if defined(__DML_VBA_DEBUG__)
1073 dml2_printf("DML::%s: k=%u AlignedYPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedYPitch[k]);
1074 dml2_printf("DML::%s: k=%u PitchY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.pitch);
1075 dml2_printf("DML::%s: k=%u AlignedCPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedCPitch[k]);
1076 dml2_printf("DML::%s: k=%u PitchC = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane1.pitch);
1077 dml2_printf("DML::%s: k=%u PitchSupport = %d\n", __func__, k, mode_lib->ms.support.PitchSupport);
1078 #endif
1079 }
1080
1081 // meta pitch
1082 if (mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable) {
1083 mode_lib->ms.support.AlignedDCCMetaPitchY[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch,
1084 display_cfg->plane_descriptors[k].surface.plane0.width), 64.0 * mode_lib->ms.Read256BlockWidthY[k]);
1085
1086 if (mode_lib->ms.support.AlignedDCCMetaPitchY[k] > display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch)
1087 mode_lib->ms.support.PitchSupport = false;
1088
1089 if (dml2_core_shared_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) {
1090 mode_lib->ms.support.AlignedDCCMetaPitchC[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch,
1091 display_cfg->plane_descriptors[k].surface.plane1.width), 64.0 * mode_lib->ms.Read256BlockWidthC[k]);
1092
1093 if (mode_lib->ms.support.AlignedDCCMetaPitchC[k] > display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch)
1094 mode_lib->ms.support.PitchSupport = false;
1095 }
1096 } else {
1097 mode_lib->ms.support.AlignedDCCMetaPitchY[k] = 0;
1098 mode_lib->ms.support.AlignedDCCMetaPitchC[k] = 0;
1099 }
1100 }
1101
1102 mode_lib->ms.support.ViewportExceedsSurface = false;
1103 if (!display_cfg->overrides.hw.surface_viewport_size_check_disable) {
1104 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
1105 if (display_cfg->plane_descriptors[k].composition.viewport.plane0.width > display_cfg->plane_descriptors[k].surface.plane0.width || display_cfg->plane_descriptors[k].composition.viewport.plane0.height > display_cfg->plane_descriptors[k].surface.plane0.height) {
1106 mode_lib->ms.support.ViewportExceedsSurface = true;
1107 #if defined(__DML_VBA_DEBUG__)
1108 dml2_printf("DML::%s: k=%u ViewportWidth = %d\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width);
1109 dml2_printf("DML::%s: k=%u SurfaceWidthY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.width);
1110 dml2_printf("DML::%s: k=%u ViewportHeight = %d\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height);
1111 dml2_printf("DML::%s: k=%u SurfaceHeightY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.height);
1112 dml2_printf("DML::%s: k=%u ViewportExceedsSurface = %d\n", __func__, k, mode_lib->ms.support.ViewportExceedsSurface);
1113 #endif
1114 if (dml2_core_shared_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) {
1115 if (display_cfg->plane_descriptors[k].composition.viewport.plane1.width > display_cfg->plane_descriptors[k].surface.plane1.width ||
1116 display_cfg->plane_descriptors[k].composition.viewport.plane1.height > display_cfg->plane_descriptors[k].surface.plane1.height) {
1117 mode_lib->ms.support.ViewportExceedsSurface = true;
1118 }
1119 }
1120 }
1121 }
1122 }
1123
1124 CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg;
1125 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ip.config_return_buffer_size_in_kbytes;
1126 CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte;
1127 CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte;
1128 CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
1129 CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
1130 CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
1131 CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
1132 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = 1;
1133 CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
1134 CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte;
1135 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes;
1136 CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes;
1137 CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.SurfaceReadBandwidthLuma;
1138 CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.SurfaceReadBandwidthChroma;
1139 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma;
1140 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma;
1141 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY;
1142 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC;
1143 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY;
1144 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC;
1145 CalculateSwathAndDETConfiguration_params->surf_linear128_l = mode_lib->ms.surf_linear128_l;
1146 CalculateSwathAndDETConfiguration_params->surf_linear128_c = mode_lib->ms.surf_linear128_c;
1147 CalculateSwathAndDETConfiguration_params->ODMMode = s->dummy_odm_mode;
1148 CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY;
1149 CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC;
1150 CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY;
1151 CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC;
1152 CalculateSwathAndDETConfiguration_params->DPPPerSurface = s->dummy_integer_array[2];
1153 CalculateSwathAndDETConfiguration_params->mrq_present = mode_lib->ip.dcn_mrq_present;
1154
1155 // output
1156 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = s->dummy_integer_array[0];
1157 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = s->dummy_integer_array[1];
1158 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_integer_array[3];
1159 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_integer_array[4];
1160 CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_integer_array[5];
1161 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_integer_array[6];
1162 CalculateSwathAndDETConfiguration_params->SwathHeightY = s->dummy_integer_array[7];
1163 CalculateSwathAndDETConfiguration_params->SwathHeightC = s->dummy_integer_array[8];
1164 CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = s->dummy_integer_array[26];
1165 CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = s->dummy_integer_array[27];
1166 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = s->dummy_integer_array[9];
1167 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = s->dummy_integer_array[10];
1168 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = s->dummy_integer_array[11];
1169 CalculateSwathAndDETConfiguration_params->full_swath_bytes_l = s->full_swath_bytes_l;
1170 CalculateSwathAndDETConfiguration_params->full_swath_bytes_c = s->full_swath_bytes_c;
1171 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &s->dummy_boolean[0];
1172 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[1];
1173 CalculateSwathAndDETConfiguration_params->hw_debug5 = &s->dummy_boolean[2];
1174 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &s->dummy_integer[0];
1175 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = mode_lib->ms.SingleDPPViewportSizeSupportPerSurface;
1176 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[1];
1177 CalculateSwathAndDETConfiguration_params->funcs = &mode_lib->funcs;
1178
1179 // This calls is just to find out if there is enough DET space to support full vp in 1 pipe.
1180 CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params);
1181
1182 {
1183 mode_lib->ms.TotalNumberOfActiveDPP = 0;
1184 mode_lib->ms.support.TotalAvailablePipesSupport = true;
1185
1186 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
1187 CalculateODMMode(
1188 mode_lib->ip.maximum_pixels_per_line_per_dsc_unit,
1189 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
1190 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
1191 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode,
1192 mode_lib->ms.max_dispclk_freq_mhz,
1193 false, // DSCEnable
1194 mode_lib->ms.TotalNumberOfActiveDPP,
1195 mode_lib->ip.max_num_dpp,
1196 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
1197
1198 /* Output */
1199 &s->TotalAvailablePipesSupportNoDSC,
1200 &s->NumberOfDPPNoDSC,
1201 &s->ODMModeNoDSC,
1202 &s->RequiredDISPCLKPerSurfaceNoDSC);
1203
1204 CalculateODMMode(
1205 mode_lib->ip.maximum_pixels_per_line_per_dsc_unit,
1206 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
1207 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
1208 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode,
1209 mode_lib->ms.max_dispclk_freq_mhz,
1210 true, // DSCEnable
1211 mode_lib->ms.TotalNumberOfActiveDPP,
1212 mode_lib->ip.max_num_dpp,
1213 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
1214
1215 /* Output */
1216 &s->TotalAvailablePipesSupportDSC,
1217 &s->NumberOfDPPDSC,
1218 &s->ODMModeDSC,
1219 &s->RequiredDISPCLKPerSurfaceDSC);
1220
1221 /*Number Of DSC Slices*/
1222 if (display_cfg->plane_descriptors[k].stream_index == k) {
1223 if (s->PixelClockBackEnd[k] > 4800) {
1224 mode_lib->ms.support.NumberOfDSCSlices[k] = (unsigned int)(math_ceil2(s->PixelClockBackEnd[k] / 600, 4));
1225 } else if (s->PixelClockBackEnd[k] > 2400) {
1226 mode_lib->ms.support.NumberOfDSCSlices[k] = 8;
1227 } else if (s->PixelClockBackEnd[k] > 1200) {
1228 mode_lib->ms.support.NumberOfDSCSlices[k] = 4;
1229 } else if (s->PixelClockBackEnd[k] > 340) {
1230 mode_lib->ms.support.NumberOfDSCSlices[k] = 2;
1231 } else {
1232 mode_lib->ms.support.NumberOfDSCSlices[k] = 1;
1233 }
1234 } else {
1235 mode_lib->ms.support.NumberOfDSCSlices[k] = 0;
1236 }
1237
1238 if (s->ODMModeDSC == dml2_odm_mode_combine_2to1)
1239 mode_lib->ms.support.NumberOfDSCSlices[k] = 2 * (unsigned int)math_ceil2(mode_lib->ms.support.NumberOfDSCSlices[k] / 2.0, 1.0);
1240 else if (s->ODMModeDSC == dml2_odm_mode_combine_3to1)
1241 mode_lib->ms.support.NumberOfDSCSlices[k] = 12;
1242 else if (s->ODMModeDSC == dml2_odm_mode_combine_4to1)
1243 mode_lib->ms.support.NumberOfDSCSlices[k] = 4 * (unsigned int)math_ceil2(mode_lib->ms.support.NumberOfDSCSlices[k] / 4.0, 1.0);
1244
1245 CalculateOutputLink(
1246 &mode_lib->scratch,
1247 ((double)mode_lib->soc.clk_table.phyclk.clk_values_khz[0] / 1000),
1248 ((double)mode_lib->soc.clk_table.phyclk_d18.clk_values_khz[0] / 1000),
1249 ((double)mode_lib->soc.clk_table.phyclk_d32.clk_values_khz[0] / 1000),
1250 mode_lib->soc.phy_downspread_percent,
1251 (display_cfg->plane_descriptors[k].stream_index == k),
1252 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
1253 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
1254 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
1255 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
1256 s->PixelClockBackEnd[k],
1257 s->OutputBpp[k],
1258 mode_lib->ip.maximum_dsc_bits_per_component,
1259 mode_lib->ms.support.NumberOfDSCSlices[k],
1260 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate,
1261 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout,
1262 s->ODMModeNoDSC,
1263 s->ODMModeDSC,
1264 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable,
1265 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count,
1266 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate,
1267
1268 /* Output */
1269 &mode_lib->ms.RequiresDSC[k],
1270 &mode_lib->ms.RequiresFEC[k],
1271 &mode_lib->ms.OutputBpp[k],
1272 &mode_lib->ms.OutputType[k], // VBA_DELTA, VBA uses a string to represent type and rate, but DML uses enum, don't want to rely on strng
1273 &mode_lib->ms.OutputRate[k],
1274 &mode_lib->ms.RequiredSlots[k]);
1275
1276 if (mode_lib->ms.RequiresDSC[k] == false) {
1277 mode_lib->ms.ODMMode[k] = s->ODMModeNoDSC;
1278 mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceNoDSC;
1279 if (!s->TotalAvailablePipesSupportNoDSC)
1280 mode_lib->ms.support.TotalAvailablePipesSupport = false;
1281 mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPNoDSC;
1282 } else {
1283 mode_lib->ms.ODMMode[k] = s->ODMModeDSC;
1284 mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceDSC;
1285 if (!s->TotalAvailablePipesSupportDSC)
1286 mode_lib->ms.support.TotalAvailablePipesSupport = false;
1287 mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPDSC;
1288 }
1289 dml2_printf("DML::%s: k=%d RequiresDSC = %d\n", __func__, k, mode_lib->ms.RequiresDSC[k]);
1290 dml2_printf("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]);
1291 }
1292
1293 // FIXME_DCN4 - add odm vs mpc use check
1294
1295 // FIXME_DCN4 - add imall cap check
1296 mode_lib->ms.support.incorrect_imall_usage = 0;
1297 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
1298 if (mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall)
1299 mode_lib->ms.support.incorrect_imall_usage = 1;
1300 }
1301
1302 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
1303 mode_lib->ms.MPCCombine[k] = false;
1304 mode_lib->ms.NoOfDPP[k] = 1;
1305
1306 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) {
1307 mode_lib->ms.MPCCombine[k] = false;
1308 mode_lib->ms.NoOfDPP[k] = 4;
1309 } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) {
1310 mode_lib->ms.MPCCombine[k] = false;
1311 mode_lib->ms.NoOfDPP[k] = 3;
1312 } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) {
1313 mode_lib->ms.MPCCombine[k] = false;
1314 mode_lib->ms.NoOfDPP[k] = 2;
1315 } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 2) {
1316 mode_lib->ms.MPCCombine[k] = true;
1317 mode_lib->ms.NoOfDPP[k] = 2;
1318 mode_lib->ms.TotalNumberOfActiveDPP++;
1319 } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 1) {
1320 mode_lib->ms.MPCCombine[k] = false;
1321 mode_lib->ms.NoOfDPP[k] = 1;
1322 if (!mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) {
1323 dml2_printf("ERROR: DML::%s: MPCC is override to disable but viewport is too large to be supported with single pipe!\n", __func__);
1324 }
1325 } else {
1326 if ((mode_lib->ms.MinDPPCLKUsingSingleDPP[k] > mode_lib->ms.max_dppclk_freq_mhz) || !mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) {
1327 mode_lib->ms.MPCCombine[k] = true;
1328 mode_lib->ms.NoOfDPP[k] = 2;
1329 mode_lib->ms.TotalNumberOfActiveDPP++;
1330 }
1331 }
1332 #if defined(__DML_VBA_DEBUG__)
1333 dml2_printf("DML::%s: k=%d, NoOfDPP = %d\n", __func__, k, mode_lib->ms.NoOfDPP[k]);
1334 #endif
1335 }
1336
1337 if (mode_lib->ms.TotalNumberOfActiveDPP > (unsigned int)mode_lib->ip.max_num_dpp)
1338 mode_lib->ms.support.TotalAvailablePipesSupport = false;
1339
1340
1341 mode_lib->ms.TotalNumberOfSingleDPPSurfaces = 0;
1342 for (k = 0; k < (unsigned int)mode_lib->ms.num_active_planes; ++k) {
1343 if (mode_lib->ms.NoOfDPP[k] == 1)
1344 mode_lib->ms.TotalNumberOfSingleDPPSurfaces = mode_lib->ms.TotalNumberOfSingleDPPSurfaces + 1;
1345 }
1346
1347 //DISPCLK/DPPCLK
1348 mode_lib->ms.WritebackRequiredDISPCLK = 0;
1349 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
1350 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable) {
1351 mode_lib->ms.WritebackRequiredDISPCLK = math_max2(mode_lib->ms.WritebackRequiredDISPCLK,
1352 CalculateWriteBackDISPCLK(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format,
1353 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
1354 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio,
1355 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio,
1356 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps,
1357 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps,
1358 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_width,
1359 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height,
1360 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
1361 mode_lib->ip.writeback_line_buffer_buffer_size));
1362 }
1363 }
1364
1365 mode_lib->ms.RequiredDISPCLK = mode_lib->ms.WritebackRequiredDISPCLK;
1366 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
1367 mode_lib->ms.RequiredDISPCLK = math_max2(mode_lib->ms.RequiredDISPCLK, mode_lib->ms.RequiredDISPCLKPerSurface[k]);
1368 }
1369
1370 mode_lib->ms.GlobalDPPCLK = 0;
1371 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
1372 mode_lib->ms.RequiredDPPCLK[k] = mode_lib->ms.MinDPPCLKUsingSingleDPP[k] / mode_lib->ms.NoOfDPP[k];
1373 mode_lib->ms.GlobalDPPCLK = math_max2(mode_lib->ms.GlobalDPPCLK, mode_lib->ms.RequiredDPPCLK[k]);
1374 }
1375
1376 mode_lib->ms.support.DISPCLK_DPPCLK_Support = !((mode_lib->ms.RequiredDISPCLK > mode_lib->ms.max_dispclk_freq_mhz) || (mode_lib->ms.GlobalDPPCLK > mode_lib->ms.max_dppclk_freq_mhz));
1377 }
1378
1379 /* Total Available OTG, HDMIFRL, DP Support Check */
1380 s->TotalNumberOfActiveOTG = 0;
1381 s->TotalNumberOfActiveHDMIFRL = 0;
1382 s->TotalNumberOfActiveDP2p0 = 0;
1383 s->TotalNumberOfActiveDP2p0Outputs = 0;
1384
1385 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
1386 if (display_cfg->plane_descriptors[k].stream_index == k) {
1387 s->TotalNumberOfActiveOTG = s->TotalNumberOfActiveOTG + 1;
1388 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl)
1389 s->TotalNumberOfActiveHDMIFRL = s->TotalNumberOfActiveHDMIFRL + 1;
1390 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0) {
1391 s->TotalNumberOfActiveDP2p0 = s->TotalNumberOfActiveDP2p0 + 1;
1392 // FIXME_STAGE2: SW not using backend related stuff, need mapping for mst setup
1393 //if (display_cfg->output.OutputMultistreamId[k] == k || display_cfg->output.OutputMultistreamEn[k] == false) {
1394 s->TotalNumberOfActiveDP2p0Outputs = s->TotalNumberOfActiveDP2p0Outputs + 1;
1395 //}
1396 }
1397 }
1398 }
1399
1400 mode_lib->ms.support.NumberOfOTGSupport = (s->TotalNumberOfActiveOTG <= (unsigned int)mode_lib->ip.max_num_otg);
1401 mode_lib->ms.support.NumberOfHDMIFRLSupport = (s->TotalNumberOfActiveHDMIFRL <= (unsigned int)mode_lib->ip.max_num_hdmi_frl_outputs);
1402 mode_lib->ms.support.NumberOfDP2p0Support = (s->TotalNumberOfActiveDP2p0 <= (unsigned int)mode_lib->ip.max_num_dp2p0_streams && s->TotalNumberOfActiveDP2p0Outputs <= (unsigned int)mode_lib->ip.max_num_dp2p0_outputs);
1403
1404 mode_lib->ms.support.ExceededMultistreamSlots = false;
1405 mode_lib->ms.support.LinkCapacitySupport = true;
1406 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
1407 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_disabled == false &&
1408 display_cfg->plane_descriptors[k].stream_index == k && (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
1409 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) && mode_lib->ms.OutputBpp[k] == 0) {
1410 mode_lib->ms.support.LinkCapacitySupport = false;
1411 }
1412 }
1413
1414 mode_lib->ms.support.P2IWith420 = false;
1415 mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = false;
1416 mode_lib->ms.support.DSC422NativeNotSupported = false;
1417 mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = false;
1418 mode_lib->ms.support.LinkRateForMultistreamNotIndicated = false;
1419 mode_lib->ms.support.BPPForMultistreamNotIndicated = false;
1420 mode_lib->ms.support.MultistreamWithHDMIOreDP = false;
1421 mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = false;
1422 mode_lib->ms.support.NotEnoughLanesForMSO = false;
1423
1424 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
1425 if (display_cfg->plane_descriptors[k].stream_index == k && (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
1426 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl)) {
1427 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && mode_lib->ip.ptoi_supported == true)
1428 mode_lib->ms.support.P2IWith420 = true;
1429
1430 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary && s->OutputBpp[k] != 0)
1431 mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = true;
1432 if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 && !mode_lib->ip.dsc422_native_support)
1433 mode_lib->ms.support.DSC422NativeNotSupported = true;
1434
1435 if (((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr2 ||
1436 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr3) &&
1437 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_edp) ||
1438 ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr10 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr13p5 ||
1439 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr20) &&
1440 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp2p0))
1441 mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = true;
1442
1443 // FIXME_STAGE2
1444 //if (display_cfg->output.OutputMultistreamEn[k] == 1) {
1445 // if (display_cfg->output.OutputMultistreamId[k] == k && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_na)
1446 // mode_lib->ms.support.LinkRateForMultistreamNotIndicated = true;
1447 // if (display_cfg->output.OutputMultistreamId[k] == k && s->OutputBpp[k] == 0)
1448 // mode_lib->ms.support.BPPForMultistreamNotIndicated = true;
1449 // for (n = 0; n < mode_lib->ms.num_active_planes; ++n) {
1450 // if (display_cfg->output.OutputMultistreamId[k] == n && s->OutputBpp[k] == 0)
1451 // mode_lib->ms.support.BPPForMultistreamNotIndicated = true;
1452 // }
1453 //}
1454
1455 if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
1456 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi ||
1457 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl)) {
1458 // FIXME_STAGE2
1459 //if (display_cfg->output.OutputMultistreamEn[k] == 1 && display_cfg->output.OutputMultistreamId[k] == k)
1460 // mode_lib->ms.support.MultistreamWithHDMIOreDP = true;
1461 //for (n = 0; n < mode_lib->ms.num_active_planes; ++n) {
1462 // if (display_cfg->output.OutputMultistreamEn[k] == 1 && display_cfg->output.OutputMultistreamId[k] == n)
1463 // mode_lib->ms.support.MultistreamWithHDMIOreDP = true;
1464 //}
1465 }
1466 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp && (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_split_1to2 ||
1467 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to2 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to4))
1468 mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = true;
1469
1470 if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to2 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count < 2) ||
1471 (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to4 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count < 4))
1472 mode_lib->ms.support.NotEnoughLanesForMSO = true;
1473 }
1474 }
1475
1476 mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = false;
1477 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
1478 if (display_cfg->plane_descriptors[k].stream_index == k &&
1479 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl &&
1480 RequiredDTBCLK(
1481 mode_lib->ms.RequiresDSC[k],
1482 s->PixelClockBackEnd[k],
1483 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
1484 mode_lib->ms.OutputBpp[k],
1485 mode_lib->ms.support.NumberOfDSCSlices[k],
1486 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
1487 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
1488 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate,
1489 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout) > ((double)mode_lib->soc.clk_table.dtbclk.clk_values_khz[0] / 1000)) {
1490 mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = true;
1491 }
1492 }
1493
1494 mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = false;
1495 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
1496 if (display_cfg->plane_descriptors[k].stream_index == k) {
1497 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp ||
1498 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 ||
1499 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
1500 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) {
1501 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420) {
1502 s->DSCFormatFactor = 2;
1503 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_444) {
1504 s->DSCFormatFactor = 1;
1505 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) {
1506 s->DSCFormatFactor = 2;
1507 } else {
1508 s->DSCFormatFactor = 1;
1509 }
1510 #ifdef __DML_VBA_DEBUG__
1511 dml2_printf("DML::%s: k=%u, RequiresDSC = %u\n", __func__, k, mode_lib->ms.RequiresDSC[k]);
1512 #endif
1513 if (mode_lib->ms.RequiresDSC[k] == true) {
1514 s->PixelClockBackEndFactor = 3.0;
1515
1516 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
1517 s->PixelClockBackEndFactor = 12.0;
1518 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
1519 s->PixelClockBackEndFactor = 9.0;
1520 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
1521 s->PixelClockBackEndFactor = 6.0;
1522
1523 mode_lib->ms.required_dscclk_freq_mhz[k] = s->PixelClockBackEnd[k] / s->PixelClockBackEndFactor / (double)s->DSCFormatFactor;
1524 if (mode_lib->ms.required_dscclk_freq_mhz[k] > mode_lib->ms.max_dscclk_freq_mhz) {
1525 mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true;
1526 }
1527
1528 #ifdef __DML_VBA_DEBUG__
1529 dml2_printf("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, s->PixelClockBackEnd[k]);
1530 dml2_printf("DML::%s: k=%u, required_dscclk_freq_mhz = %f\n", __func__, k, mode_lib->ms.required_dscclk_freq_mhz[k]);
1531 dml2_printf("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor);
1532 dml2_printf("DML::%s: k=%u, DSCCLKRequiredMoreThanSupported = %u\n", __func__, k, mode_lib->ms.support.DSCCLKRequiredMoreThanSupported);
1533 #endif
1534 }
1535 }
1536 }
1537 }
1538
1539 /* Check DSC Unit and Slices Support */
1540 mode_lib->ms.support.NotEnoughDSCSlices = false;
1541 s->TotalDSCUnitsRequired = 0;
1542 mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = true;
1543 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
1544 if (mode_lib->ms.RequiresDSC[k] == true) {
1545 s->NumDSCUnitRequired = 1;
1546
1547 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
1548 s->NumDSCUnitRequired = 4;
1549 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
1550 s->NumDSCUnitRequired = 3;
1551 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
1552 s->NumDSCUnitRequired = 2;
1553
1554 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active > s->NumDSCUnitRequired * (unsigned int)mode_lib->ip.maximum_pixels_per_line_per_dsc_unit)
1555 mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false;
1556 s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + s->NumDSCUnitRequired;
1557 if (mode_lib->ms.support.NumberOfDSCSlices[k] > 4 * s->NumDSCUnitRequired)
1558 mode_lib->ms.support.NotEnoughDSCSlices = true;
1559 }
1560 }
1561
1562 mode_lib->ms.support.NotEnoughDSCUnits = false;
1563 if (s->TotalDSCUnitsRequired > (unsigned int)mode_lib->ip.num_dsc) {
1564 mode_lib->ms.support.NotEnoughDSCUnits = true;
1565 }
1566
1567 /*DSC Delay per state*/
1568 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
1569 mode_lib->ms.DSCDelay[k] = DSCDelayRequirement(mode_lib->ms.RequiresDSC[k],
1570 mode_lib->ms.ODMMode[k],
1571 mode_lib->ip.maximum_dsc_bits_per_component,
1572 mode_lib->ms.OutputBpp[k],
1573 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
1574 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
1575 mode_lib->ms.support.NumberOfDSCSlices[k],
1576 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
1577 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
1578 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
1579 s->PixelClockBackEnd[k]);
1580 }
1581
1582 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
1583 for (m = 0; m < mode_lib->ms.num_active_planes; m++) {
1584 if (display_cfg->plane_descriptors[k].stream_index == m && mode_lib->ms.RequiresDSC[m] == true) {
1585 mode_lib->ms.DSCDelay[k] = mode_lib->ms.DSCDelay[m];
1586 }
1587 }
1588 }
1589
1590 // Figure out the swath and DET configuration after the num dpp per plane is figured out
1591 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false;
1592 CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.ODMMode;
1593 CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.NoOfDPP;
1594
1595 // output
1596 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = s->dummy_integer_array[0];
1597 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = s->dummy_integer_array[1];
1598 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub;
1599 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub;
1600 CalculateSwathAndDETConfiguration_params->SwathWidth = mode_lib->ms.SwathWidthY;
1601 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = mode_lib->ms.SwathWidthC;
1602 CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->ms.SwathHeightY;
1603 CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->ms.SwathHeightC;
1604 CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = mode_lib->ms.support.request_size_bytes_luma;
1605 CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = mode_lib->ms.support.request_size_bytes_chroma;
1606 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->ms.DETBufferSizeInKByte; // FIXME: This is per pipe but the pipes in plane will use that
1607 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY;
1608 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC;
1609 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->ms.UnboundedRequestEnabled;
1610 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = s->dummy_integer_array[3];
1611 CalculateSwathAndDETConfiguration_params->hw_debug5 = s->dummy_boolean_array[1];
1612 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->ms.CompressedBufferSizeInkByte;
1613 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = s->dummy_boolean_array[0];
1614 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &mode_lib->ms.support.ViewportSizeSupport;
1615 CalculateSwathAndDETConfiguration_params->funcs = &mode_lib->funcs;
1616
1617 CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params);
1618
1619 if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0) {
1620 for (k = 0; k < mode_lib->ms.num_active_planes; k++)
1621 mode_lib->ms.SurfaceSizeInMALL[k] = 0;
1622 mode_lib->ms.support.ExceededMALLSize = 0;
1623 } else {
1624 CalculateSurfaceSizeInMall(
1625 display_cfg,
1626 mode_lib->ms.num_active_planes,
1627 mode_lib->soc.mall_allocated_for_dcn_mbytes,
1628
1629 mode_lib->ms.BytePerPixelY,
1630 mode_lib->ms.BytePerPixelC,
1631 mode_lib->ms.Read256BlockWidthY,
1632 mode_lib->ms.Read256BlockWidthC,
1633 mode_lib->ms.Read256BlockHeightY,
1634 mode_lib->ms.Read256BlockHeightC,
1635 mode_lib->ms.MacroTileWidthY,
1636 mode_lib->ms.MacroTileWidthC,
1637 mode_lib->ms.MacroTileHeightY,
1638 mode_lib->ms.MacroTileHeightC,
1639
1640 /* Output */
1641 mode_lib->ms.SurfaceSizeInMALL,
1642 &mode_lib->ms.support.ExceededMALLSize);
1643 }
1644
1645 mode_lib->ms.TotalNumberOfDCCActiveDPP = 0;
1646 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
1647 if (display_cfg->plane_descriptors[k].surface.dcc.enable == true) {
1648 mode_lib->ms.TotalNumberOfDCCActiveDPP = mode_lib->ms.TotalNumberOfDCCActiveDPP + mode_lib->ms.NoOfDPP[k];
1649 }
1650 }
1651
1652 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
1653 s->SurfParameters[k].PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
1654 s->SurfParameters[k].DPPPerSurface = mode_lib->ms.NoOfDPP[k];
1655 s->SurfParameters[k].RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
1656 s->SurfParameters[k].ViewportHeight = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
1657 s->SurfParameters[k].ViewportHeightC = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
1658 s->SurfParameters[k].BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k];
1659 s->SurfParameters[k].BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k];
1660 s->SurfParameters[k].BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k];
1661 s->SurfParameters[k].BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k];
1662 s->SurfParameters[k].BlockWidthY = mode_lib->ms.MacroTileWidthY[k];
1663 s->SurfParameters[k].BlockHeightY = mode_lib->ms.MacroTileHeightY[k];
1664 s->SurfParameters[k].BlockWidthC = mode_lib->ms.MacroTileWidthC[k];
1665 s->SurfParameters[k].BlockHeightC = mode_lib->ms.MacroTileHeightC[k];
1666 s->SurfParameters[k].InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
1667 s->SurfParameters[k].HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
1668 s->SurfParameters[k].DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
1669 s->SurfParameters[k].SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
1670 s->SurfParameters[k].SurfaceTiling = display_cfg->plane_descriptors[k].surface.tiling;
1671 s->SurfParameters[k].BytePerPixelY = mode_lib->ms.BytePerPixelY[k];
1672 s->SurfParameters[k].BytePerPixelC = mode_lib->ms.BytePerPixelC[k];
1673 s->SurfParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
1674 s->SurfParameters[k].VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
1675 s->SurfParameters[k].VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
1676 s->SurfParameters[k].VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
1677 s->SurfParameters[k].VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
1678 s->SurfParameters[k].PitchY = display_cfg->plane_descriptors[k].surface.plane0.pitch;
1679 s->SurfParameters[k].PitchC = display_cfg->plane_descriptors[k].surface.plane1.pitch;
1680 s->SurfParameters[k].ViewportStationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
1681 s->SurfParameters[k].ViewportXStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
1682 s->SurfParameters[k].ViewportYStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
1683 s->SurfParameters[k].ViewportXStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
1684 s->SurfParameters[k].ViewportYStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
1685 s->SurfParameters[k].FORCE_ONE_ROW_FOR_FRAME = display_cfg->plane_descriptors[k].overrides.hw.force_one_row_for_frame;
1686 s->SurfParameters[k].SwathHeightY = mode_lib->ms.SwathHeightY[k];
1687 s->SurfParameters[k].SwathHeightC = mode_lib->ms.SwathHeightC[k];
1688
1689 s->SurfParameters[k].DCCMetaPitchY = display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch;
1690 s->SurfParameters[k].DCCMetaPitchC = display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch;
1691 }
1692
1693 CalculateVMRowAndSwath_params->display_cfg = display_cfg;
1694 CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
1695 CalculateVMRowAndSwath_params->myPipe = s->SurfParameters;
1696 CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->ms.SurfaceSizeInMALL;
1697 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
1698 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma;
1699 CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes;
1700 CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->ms.SwathWidthY;
1701 CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->ms.SwathWidthC;
1702 CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
1703 CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes;
1704 CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present;
1705
1706 // output
1707 CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceeded;
1708 CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = s->dummy_integer_array[12];
1709 CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = s->dummy_integer_array[13];
1710 CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->ms.dpte_row_height;
1711 CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->ms.dpte_row_height_chroma;
1712 CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = s->dummy_integer_array[14]; // VBA_DELTA
1713 CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = s->dummy_integer_array[15]; // VBA_DELTA
1714 CalculateVMRowAndSwath_params->vm_group_bytes = s->dummy_integer_array[16];
1715 CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
1716 CalculateVMRowAndSwath_params->PixelPTEReqWidthY = s->dummy_integer_array[17];
1717 CalculateVMRowAndSwath_params->PixelPTEReqHeightY = s->dummy_integer_array[18];
1718 CalculateVMRowAndSwath_params->PTERequestSizeY = s->dummy_integer_array[19];
1719 CalculateVMRowAndSwath_params->PixelPTEReqWidthC = s->dummy_integer_array[20];
1720 CalculateVMRowAndSwath_params->PixelPTEReqHeightC = s->dummy_integer_array[21];
1721 CalculateVMRowAndSwath_params->PTERequestSizeC = s->dummy_integer_array[22];
1722 CalculateVMRowAndSwath_params->vmpg_width_y = s->vmpg_width_y;
1723 CalculateVMRowAndSwath_params->vmpg_height_y = s->vmpg_height_y;
1724 CalculateVMRowAndSwath_params->vmpg_width_c = s->vmpg_width_c;
1725 CalculateVMRowAndSwath_params->vmpg_height_c = s->vmpg_height_c;
1726 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = s->dummy_integer_array[23];
1727 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = s->dummy_integer_array[24];
1728 CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY;
1729 CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC;
1730 CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->ms.PrefillY;
1731 CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->ms.PrefillC;
1732 CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY;
1733 CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC;
1734 CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->ms.dpte_row_bw;
1735 CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow;
1736 CalculateVMRowAndSwath_params->vm_bytes = mode_lib->ms.vm_bytes;
1737 CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->ms.use_one_row_for_frame;
1738 CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->ms.use_one_row_for_frame_flip;
1739 CalculateVMRowAndSwath_params->is_using_mall_for_ss = s->dummy_boolean_array[0];
1740 CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = s->dummy_boolean_array[1];
1741 CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = s->dummy_integer_array[25];
1742 CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = mode_lib->ms.DCCMetaBufferSizeNotExceeded;
1743 CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->ms.meta_row_bw;
1744 CalculateVMRowAndSwath_params->meta_row_bytes = mode_lib->ms.meta_row_bytes;
1745 CalculateVMRowAndSwath_params->meta_req_width_luma = s->dummy_integer_array[26];
1746 CalculateVMRowAndSwath_params->meta_req_height_luma = s->dummy_integer_array[27];
1747 CalculateVMRowAndSwath_params->meta_row_width_luma = s->dummy_integer_array[28];
1748 CalculateVMRowAndSwath_params->meta_row_height_luma = s->meta_row_height_luma;
1749 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = s->dummy_integer_array[29];
1750 CalculateVMRowAndSwath_params->meta_req_width_chroma = s->dummy_integer_array[30];
1751 CalculateVMRowAndSwath_params->meta_req_height_chroma = s->dummy_integer_array[31];
1752 CalculateVMRowAndSwath_params->meta_row_width_chroma = s->dummy_integer_array[32];
1753 CalculateVMRowAndSwath_params->meta_row_height_chroma = s->meta_row_height_chroma;
1754 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = s->dummy_integer_array[33];
1755
1756 CalculateVMRowAndSwath(&mode_lib->scratch, CalculateVMRowAndSwath_params);
1757
1758 mode_lib->ms.support.PTEBufferSizeNotExceeded = true;
1759 mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = true;
1760
1761 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
1762 if (mode_lib->ms.PTEBufferSizeNotExceeded[k] == false)
1763 mode_lib->ms.support.PTEBufferSizeNotExceeded = false;
1764
1765 if (mode_lib->ms.DCCMetaBufferSizeNotExceeded[k] == false)
1766 mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = false;
1767
1768 #ifdef __DML_VBA_DEBUG__
1769 dml2_printf("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.PTEBufferSizeNotExceeded[k]);
1770 dml2_printf("DML::%s: k=%u, DCCMetaBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.DCCMetaBufferSizeNotExceeded[k]);
1771 #endif
1772 }
1773 #ifdef __DML_VBA_DEBUG__
1774 dml2_printf("DML::%s: PTEBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.PTEBufferSizeNotExceeded);
1775 dml2_printf("DML::%s: DCCMetaBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.DCCMetaBufferSizeNotExceeded);
1776 #endif
1777
1778 mode_lib->ms.UrgLatency = CalculateUrgentLatency(
1779 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us,
1780 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us,
1781 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us,
1782 mode_lib->soc.do_urgent_latency_adjustment,
1783 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us,
1784 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz,
1785 mode_lib->ms.FabricClock,
1786 mode_lib->ms.uclk_freq_mhz,
1787 mode_lib->soc.qos_parameters.qos_type,
1788 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles,
1789 mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles,
1790 mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
1791 mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
1792 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin,
1793 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
1794
1795 mode_lib->ms.TripToMemory = CalculateTripToMemory(
1796 mode_lib->ms.UrgLatency,
1797 mode_lib->ms.FabricClock,
1798 mode_lib->ms.uclk_freq_mhz,
1799 mode_lib->soc.qos_parameters.qos_type,
1800 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles,
1801 mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
1802 mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
1803 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin,
1804 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
1805
1806 mode_lib->ms.TripToMemory = math_max2(mode_lib->ms.UrgLatency, mode_lib->ms.TripToMemory);
1807
1808 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
1809 double line_time_us = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
1810 calculate_cursor_req_attributes(
1811 display_cfg->plane_descriptors[k].cursor.cursor_width,
1812 display_cfg->plane_descriptors[k].cursor.cursor_bpp,
1813
1814 // output
1815 &s->cursor_lines_per_chunk[k],
1816 &s->cursor_bytes_per_line[k],
1817 &s->cursor_bytes_per_chunk[k],
1818 &s->cursor_bytes[k]);
1819
1820 bool cursor_not_enough_urgent_latency_hiding = 0;
1821 calculate_cursor_urgent_burst_factor(
1822 mode_lib->ip.cursor_buffer_size,
1823 display_cfg->plane_descriptors[k].cursor.cursor_width,
1824 s->cursor_bytes_per_chunk[k],
1825 s->cursor_lines_per_chunk[k],
1826 line_time_us,
1827 mode_lib->ms.UrgLatency,
1828
1829 // output
1830 &mode_lib->ms.UrgentBurstFactorCursor[k],
1831 &cursor_not_enough_urgent_latency_hiding);
1832 mode_lib->ms.UrgentBurstFactorCursorPre[k] = mode_lib->ms.UrgentBurstFactorCursor[k];
1833
1834 #ifdef __DML_VBA_DEBUG__
1835 dml2_printf("DML::%s: k=%d, Calling CalculateUrgentBurstFactor\n", __func__, k);
1836 dml2_printf("DML::%s: k=%d, VRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
1837 dml2_printf("DML::%s: k=%d, VRatioChroma=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio);
1838 #endif
1839
1840 CalculateUrgentBurstFactor(
1841 &display_cfg->plane_descriptors[k],
1842 mode_lib->ms.swath_width_luma_ub[k],
1843 mode_lib->ms.swath_width_chroma_ub[k],
1844 mode_lib->ms.SwathHeightY[k],
1845 mode_lib->ms.SwathHeightC[k],
1846 line_time_us,
1847 mode_lib->ms.UrgLatency,
1848 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
1849 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
1850 mode_lib->ms.BytePerPixelInDETY[k],
1851 mode_lib->ms.BytePerPixelInDETC[k],
1852 mode_lib->ms.DETBufferSizeY[k],
1853 mode_lib->ms.DETBufferSizeC[k],
1854
1855 // Output
1856 &mode_lib->ms.UrgentBurstFactorLuma[k],
1857 &mode_lib->ms.UrgentBurstFactorChroma[k],
1858 &mode_lib->ms.NotEnoughUrgentLatencyHiding[k]);
1859
1860 mode_lib->ms.NotEnoughUrgentLatencyHiding[k] = mode_lib->ms.NotEnoughUrgentLatencyHiding[k] || cursor_not_enough_urgent_latency_hiding;
1861 }
1862
1863 CalculateDCFCLKDeepSleep(
1864 display_cfg,
1865 mode_lib->ms.num_active_planes,
1866 mode_lib->ms.BytePerPixelY,
1867 mode_lib->ms.BytePerPixelC,
1868 mode_lib->ms.SwathWidthY,
1869 mode_lib->ms.SwathWidthC,
1870 mode_lib->ms.NoOfDPP,
1871 mode_lib->ms.PSCL_FACTOR,
1872 mode_lib->ms.PSCL_FACTOR_CHROMA,
1873 mode_lib->ms.RequiredDPPCLK,
1874 mode_lib->ms.SurfaceReadBandwidthLuma,
1875 mode_lib->ms.SurfaceReadBandwidthChroma,
1876 mode_lib->soc.return_bus_width_bytes,
1877
1878 /* Output */
1879 &mode_lib->ms.dcfclk_deepsleep);
1880
1881 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
1882 if (display_cfg->plane_descriptors[k].stream_index == k) {
1883 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) {
1884 mode_lib->ms.WritebackDelayTime[k] = mode_lib->soc.qos_parameters.writeback.base_latency_us + CalculateWriteBackDelay(
1885 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format,
1886 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio,
1887 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio,
1888 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps,
1889 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width,
1890 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height,
1891 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height,
1892 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->ms.RequiredDISPCLK;
1893 } else {
1894 mode_lib->ms.WritebackDelayTime[k] = 0.0;
1895 }
1896 for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) {
1897 if (display_cfg->plane_descriptors[m].stream_index == k && display_cfg->stream_descriptors[display_cfg->plane_descriptors[m].stream_index].writeback.enable == true) {
1898 mode_lib->ms.WritebackDelayTime[k] = math_max2(mode_lib->ms.WritebackDelayTime[k],
1899 mode_lib->soc.qos_parameters.writeback.base_latency_us + CalculateWriteBackDelay(
1900 display_cfg->stream_descriptors[display_cfg->plane_descriptors[m].stream_index].writeback.pixel_format,
1901 display_cfg->stream_descriptors[display_cfg->plane_descriptors[m].stream_index].writeback.scaling_info.h_ratio,
1902 display_cfg->stream_descriptors[display_cfg->plane_descriptors[m].stream_index].writeback.scaling_info.v_ratio,
1903 display_cfg->stream_descriptors[display_cfg->plane_descriptors[m].stream_index].writeback.scaling_info.v_taps,
1904 display_cfg->stream_descriptors[display_cfg->plane_descriptors[m].stream_index].writeback.scaling_info.output_width,
1905 display_cfg->stream_descriptors[display_cfg->plane_descriptors[m].stream_index].writeback.scaling_info.output_height,
1906 display_cfg->stream_descriptors[display_cfg->plane_descriptors[m].stream_index].writeback.scaling_info.input_height,
1907 display_cfg->stream_descriptors[display_cfg->plane_descriptors[m].stream_index].timing.h_total) / mode_lib->ms.RequiredDISPCLK);
1908 }
1909 }
1910 }
1911 }
1912 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
1913 for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) {
1914 if (display_cfg->plane_descriptors[k].stream_index == m) {
1915 mode_lib->ms.WritebackDelayTime[k] = mode_lib->ms.WritebackDelayTime[m];
1916 }
1917 }
1918 }
1919
1920 // MaximumVStartup is actually Tvstartup_min in DCN4 programming guide
1921 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
1922 bool isInterlaceTiming = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !mode_lib->ip.ptoi_supported);
1923 s->MaximumVStartup[k] = CalculateMaxVStartup(
1924 mode_lib->ip.ptoi_supported,
1925 mode_lib->ip.vblank_nom_default_us,
1926 &display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing,
1927 mode_lib->ms.WritebackDelayTime[k]);
1928 mode_lib->ms.MaxVStartupLines[k] = (isInterlaceTiming ? (2 * s->MaximumVStartup[k]) : s->MaximumVStartup[k]);
1929 }
1930
1931 #ifdef __DML_VBA_DEBUG__
1932 dml2_printf("DML::%s: k=%u, MaximumVStartup = %u\n", __func__, k, s->MaximumVStartup[k]);
1933 #endif
1934
1935 /* Immediate Flip and MALL parameters */
1936 s->ImmediateFlipRequired = false;
1937 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
1938 s->ImmediateFlipRequired = s->ImmediateFlipRequired || display_cfg->plane_descriptors[k].immediate_flip;
1939 }
1940
1941 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = false;
1942 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
1943 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe =
1944 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe ||
1945 ((display_cfg->hostvm_enable == true || display_cfg->plane_descriptors[k].immediate_flip == true) &&
1946 (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame || dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])));
1947 }
1948
1949 mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = false;
1950 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
1951 mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen ||
1952 ((display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable || display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_auto) && (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))) ||
1953 ((display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_disable || display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_auto) && (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame));
1954 }
1955
1956 s->FullFrameMALLPStateMethod = false;
1957 s->SubViewportMALLPStateMethod = false;
1958 s->PhantomPipeMALLPStateMethod = false;
1959 s->SubViewportMALLRefreshGreaterThan120Hz = false;
1960 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
1961 if (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame)
1962 s->FullFrameMALLPStateMethod = true;
1963 if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) {
1964 s->SubViewportMALLPStateMethod = true;
1965 if (!display_cfg->overrides.enable_subvp_implicit_pmo) {
1966 // For dv, small frame tests will have very high refresh rate
1967 unsigned long long refresh_rate = (unsigned long long) ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz * 1000 /
1968 (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
1969 (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total);
1970 if (refresh_rate > 120)
1971 s->SubViewportMALLRefreshGreaterThan120Hz = true;
1972 }
1973 }
1974 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))
1975 s->PhantomPipeMALLPStateMethod = true;
1976 }
1977 mode_lib->ms.support.InvalidCombinationOfMALLUseForPState = (s->SubViewportMALLPStateMethod != s->PhantomPipeMALLPStateMethod) ||
1978 (s->SubViewportMALLPStateMethod && s->FullFrameMALLPStateMethod) || s->SubViewportMALLRefreshGreaterThan120Hz;
1979
1980 #ifdef __DML_VBA_DEBUG__
1981 dml2_printf("DML::%s: SubViewportMALLPStateMethod = %u\n", __func__, s->SubViewportMALLPStateMethod);
1982 dml2_printf("DML::%s: PhantomPipeMALLPStateMethod = %u\n", __func__, s->PhantomPipeMALLPStateMethod);
1983 dml2_printf("DML::%s: FullFrameMALLPStateMethod = %u\n", __func__, s->FullFrameMALLPStateMethod);
1984 dml2_printf("DML::%s: SubViewportMALLRefreshGreaterThan120Hz = %u\n", __func__, s->SubViewportMALLRefreshGreaterThan120Hz);
1985 dml2_printf("DML::%s: InvalidCombinationOfMALLUseForPState = %u\n", __func__, mode_lib->ms.support.InvalidCombinationOfMALLUseForPState);
1986 dml2_printf("DML::%s: in_out_params->min_clk_index = %u\n", __func__, in_out_params->min_clk_index);
1987 dml2_printf("DML::%s: mode_lib->ms.DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK);
1988 dml2_printf("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock);
1989 dml2_printf("DML::%s: mode_lib->ms.uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz);
1990 dml2_printf("DML::%s: max_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.max_urgent_latency_us);
1991 dml2_printf("DML::%s: urgent latency tolerance = %f\n", __func__, ((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)));
1992 #endif
1993
1994 mode_lib->ms.support.OutstandingRequestsSupport = true;
1995 mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = true;
1996
1997 mode_lib->ms.support.avg_urgent_latency_us
1998 = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz
1999 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0)
2000 + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock)
2001 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0);
2002
2003 mode_lib->ms.support.avg_non_urgent_latency_us
2004 = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz
2005 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0)
2006 + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock)
2007 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0);
2008
2009 double outstanding_latency_us = 0;
2010 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
2011
2012 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) {
2013 outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_luma[k]
2014 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes));
2015
2016 if (outstanding_latency_us < mode_lib->ms.support.avg_urgent_latency_us) {
2017 mode_lib->ms.support.OutstandingRequestsSupport = false;
2018 }
2019
2020 if (outstanding_latency_us < mode_lib->ms.support.avg_non_urgent_latency_us) {
2021 mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false;
2022 }
2023
2024 #ifdef __DML_VBA_DEBUG__
2025 dml2_printf("DML::%s: avg_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_urgent_latency_us);
2026 dml2_printf("DML::%s: avg_non_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_non_urgent_latency_us);
2027 dml2_printf("DML::%s: k=%d, request_size_bytes_luma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_luma[k]);
2028 dml2_printf("DML::%s: k=%d, outstanding_latency_us = %f (luma)\n", __func__, k, outstanding_latency_us);
2029 #endif
2030 }
2031
2032 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x && mode_lib->ms.BytePerPixelC[k] > 0) {
2033 outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_chroma[k]
2034 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes));
2035
2036 if (outstanding_latency_us < mode_lib->ms.support.avg_urgent_latency_us) {
2037 mode_lib->ms.support.OutstandingRequestsSupport = false;
2038 }
2039
2040 if (outstanding_latency_us < mode_lib->ms.support.avg_non_urgent_latency_us) {
2041 mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false;
2042 }
2043 #ifdef __DML_VBA_DEBUG__
2044 dml2_printf("DML::%s: k=%d, request_size_bytes_chroma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_chroma[k]);
2045 dml2_printf("DML::%s: k=%d, outstanding_latency_us = %f (chroma)\n", __func__, k, outstanding_latency_us);
2046 #endif
2047 }
2048 }
2049
2050 memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params));
2051 if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0 || mode_lib->ip.dcn_mrq_present) {
2052 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
2053 mode_lib->ms.mall_prefetch_sdp_overhead_factor[k] = 1.0;
2054 mode_lib->ms.mall_prefetch_dram_overhead_factor[k] = 1.0;
2055 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0[k] = 1.0;
2056 mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0[k] = 1.0;
2057 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1[k] = 1.0;
2058 mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1[k] = 1.0;
2059 }
2060 } else {
2061 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
2062 calculate_mcache_setting_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
2063 calculate_mcache_setting_params->num_chans = mode_lib->soc.clk_table.dram_config.channel_count;
2064 calculate_mcache_setting_params->mem_word_bytes = mode_lib->soc.mem_word_bytes;
2065 calculate_mcache_setting_params->mcache_size_bytes = mode_lib->soc.mcache_size_bytes;
2066 calculate_mcache_setting_params->mcache_line_size_bytes = mode_lib->soc.mcache_line_size_bytes;
2067 calculate_mcache_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
2068 calculate_mcache_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
2069
2070 calculate_mcache_setting_params->source_format = display_cfg->plane_descriptors[k].pixel_format;
2071 calculate_mcache_setting_params->surf_vert = dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle);
2072 calculate_mcache_setting_params->vp_stationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
2073 calculate_mcache_setting_params->tiling_mode = display_cfg->plane_descriptors[k].surface.tiling;
2074 calculate_mcache_setting_params->imall_enable = mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall;
2075
2076 calculate_mcache_setting_params->vp_start_x_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
2077 calculate_mcache_setting_params->vp_start_y_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
2078 calculate_mcache_setting_params->full_vp_width_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
2079 calculate_mcache_setting_params->full_vp_height_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
2080 calculate_mcache_setting_params->blk_width_l = mode_lib->ms.MacroTileWidthY[k];
2081 calculate_mcache_setting_params->blk_height_l = mode_lib->ms.MacroTileHeightY[k];
2082 calculate_mcache_setting_params->vmpg_width_l = s->vmpg_width_y[k];
2083 calculate_mcache_setting_params->vmpg_height_l = s->vmpg_height_y[k];
2084 calculate_mcache_setting_params->full_swath_bytes_l = s->full_swath_bytes_l[k];
2085 calculate_mcache_setting_params->bytes_per_pixel_l = mode_lib->ms.BytePerPixelY[k];
2086
2087 calculate_mcache_setting_params->vp_start_x_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.x_start;
2088 calculate_mcache_setting_params->vp_start_y_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
2089 calculate_mcache_setting_params->full_vp_width_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.width;
2090 calculate_mcache_setting_params->full_vp_height_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
2091 calculate_mcache_setting_params->blk_width_c = mode_lib->ms.MacroTileWidthC[k];
2092 calculate_mcache_setting_params->blk_height_c = mode_lib->ms.MacroTileHeightC[k];
2093 calculate_mcache_setting_params->vmpg_width_c = s->vmpg_width_c[k];
2094 calculate_mcache_setting_params->vmpg_height_c = s->vmpg_height_c[k];
2095 calculate_mcache_setting_params->full_swath_bytes_c = s->full_swath_bytes_c[k];
2096 calculate_mcache_setting_params->bytes_per_pixel_c = mode_lib->ms.BytePerPixelC[k];
2097
2098 // output
2099 calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_l = &mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0[k];
2100 calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_l = &mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0[k];
2101 calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_c = &mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1[k];
2102 calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_c = &mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1[k];
2103
2104 calculate_mcache_setting_params->num_mcaches_l = &mode_lib->ms.num_mcaches_l[k];
2105 calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->ms.mcache_row_bytes_l[k];
2106 calculate_mcache_setting_params->mcache_offsets_l = mode_lib->ms.mcache_offsets_l[k];
2107 calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->ms.mcache_shift_granularity_l[k];
2108
2109 calculate_mcache_setting_params->num_mcaches_c = &mode_lib->ms.num_mcaches_c[k];
2110 calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->ms.mcache_row_bytes_c[k];
2111 calculate_mcache_setting_params->mcache_offsets_c = mode_lib->ms.mcache_offsets_c[k];
2112 calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->ms.mcache_shift_granularity_c[k];
2113
2114 calculate_mcache_setting_params->mall_comb_mcache_l = &mode_lib->ms.mall_comb_mcache_l[k];
2115 calculate_mcache_setting_params->mall_comb_mcache_c = &mode_lib->ms.mall_comb_mcache_c[k];
2116 calculate_mcache_setting_params->lc_comb_mcache = &mode_lib->ms.lc_comb_mcache[k];
2117
2118 calculate_mcache_setting(&mode_lib->scratch, calculate_mcache_setting_params);
2119 }
2120
2121 calculate_mall_bw_overhead_factor(
2122 mode_lib->ms.mall_prefetch_sdp_overhead_factor,
2123 mode_lib->ms.mall_prefetch_dram_overhead_factor,
2124
2125 // input
2126 display_cfg,
2127 mode_lib->ms.num_active_planes);
2128 }
2129
2130 // Calculate all the bandwidth available
2131 // Need anothe bw for latency evaluation
2132 calculate_bandwidth_available(
2133 mode_lib->ms.support.avg_bandwidth_available_min, // not used
2134 mode_lib->ms.support.avg_bandwidth_available, // not used
2135 mode_lib->ms.support.urg_bandwidth_available_min_latency,
2136 mode_lib->ms.support.urg_bandwidth_available, // not used
2137 mode_lib->ms.support.urg_bandwidth_available_vm_only, // not used
2138 mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm, // not used
2139
2140 &mode_lib->soc,
2141 display_cfg->hostvm_enable,
2142 mode_lib->ms.DCFCLK,
2143 mode_lib->ms.FabricClock,
2144 mode_lib->ms.dram_bw_mbps);
2145
2146 calculate_bandwidth_available(
2147 mode_lib->ms.support.avg_bandwidth_available_min,
2148 mode_lib->ms.support.avg_bandwidth_available,
2149 mode_lib->ms.support.urg_bandwidth_available_min,
2150 mode_lib->ms.support.urg_bandwidth_available,
2151 mode_lib->ms.support.urg_bandwidth_available_vm_only,
2152 mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm,
2153
2154 &mode_lib->soc,
2155 display_cfg->hostvm_enable,
2156 mode_lib->ms.MaxDCFCLK,
2157 mode_lib->ms.MaxFabricClock,
2158 mode_lib->ms.dram_bw_mbps);
2159
2160
2161 // Average BW support check
2162 calculate_avg_bandwidth_required(
2163 mode_lib->ms.support.avg_bandwidth_required,
2164 // input
2165 display_cfg,
2166 mode_lib->ms.num_active_planes,
2167 mode_lib->ms.SurfaceReadBandwidthLuma,
2168 mode_lib->ms.SurfaceReadBandwidthChroma,
2169 mode_lib->ms.cursor_bw,
2170 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0,
2171 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1,
2172 mode_lib->ms.mall_prefetch_dram_overhead_factor,
2173 mode_lib->ms.mall_prefetch_sdp_overhead_factor);
2174
2175 for (m = 0; m < dml2_core_internal_bw_max; m++) { // check sdp and dram
2176 mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_sys_idle][m] = 1;
2177 mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_sys_active][m] = (mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][m] <= mode_lib->ms.support.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][m]);
2178 mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_svp_prefetch][m] = (mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][m] <= mode_lib->ms.support.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][m]);
2179 }
2180
2181 mode_lib->ms.support.AvgBandwidthSupport = true;
2182 mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = true;
2183 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
2184 if (mode_lib->ms.NotEnoughUrgentLatencyHiding[k]) {
2185 mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = false;
2186 dml2_printf("DML::%s: k=%u NotEnoughUrgentLatencyHiding set\n", __func__, k);
2187
2188 }
2189 }
2190 for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
2191 for (n = 0; n < dml2_core_internal_bw_max; n++) { // check sdp and dram
2192 if (!mode_lib->ms.support.avg_bandwidth_support_ok[m][n] && (m == dml2_core_internal_soc_state_sys_active || mode_lib->soc.mall_allocated_for_dcn_mbytes > 0)) {
2193 mode_lib->ms.support.AvgBandwidthSupport = false;
2194 #ifdef __DML_VBA_DEBUG__
2195 dml2_printf("DML::%s: avg_bandwidth_support_ok[%s][%s] not ok\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n));
2196 #endif
2197 }
2198 }
2199 }
2200
2201 /* Prefetch Check */
2202 {
2203 mode_lib->ms.TimeCalc = 24 / mode_lib->ms.dcfclk_deepsleep;
2204
2205
2206 calculate_hostvm_inefficiency_factor(
2207 &s->HostVMInefficiencyFactor,
2208 &s->HostVMInefficiencyFactorPrefetch,
2209
2210 display_cfg->gpuvm_enable,
2211 display_cfg->hostvm_enable,
2212 mode_lib->ip.remote_iommu_outstanding_translations,
2213 mode_lib->soc.max_outstanding_reqs,
2214 mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active],
2215 mode_lib->ms.support.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]);
2216
2217 mode_lib->ms.Total3dlutActive = 0;
2218 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
2219 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut)
2220 mode_lib->ms.Total3dlutActive = mode_lib->ms.Total3dlutActive + 1;
2221
2222 // Calculate tdlut schedule related terms
2223 calculate_tdlut_setting_params->dispclk_mhz = mode_lib->ms.RequiredDISPCLK;
2224 calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
2225 calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode;
2226 calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode;
2227 calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size;
2228 calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
2229 calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
2230 calculate_tdlut_setting_params->tdlut_mpc_width_flag = display_cfg->plane_descriptors[k].tdlut.tdlut_mpc_width_flag;
2231 calculate_tdlut_setting_params->is_gfx11 = dml_get_gfx_version(display_cfg->plane_descriptors[k].surface.tiling);
2232
2233 // output
2234 calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k];
2235 calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k];
2236 calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k];
2237 calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k];
2238 calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k];
2239 calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k];
2240
2241 calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params);
2242 }
2243
2244 double min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active];
2245 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3)
2246 s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes,
2247 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes,
2248 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes));
2249
2250 CalculateExtraLatency(
2251 display_cfg,
2252 mode_lib->ip.rob_buffer_size_kbytes,
2253 mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles,
2254 s->ReorderingBytes,
2255 mode_lib->ms.DCFCLK,
2256 mode_lib->ms.FabricClock,
2257 mode_lib->ip.pixel_chunk_size_kbytes,
2258 min_return_bw_for_latency,
2259 mode_lib->ms.num_active_planes,
2260 mode_lib->ms.NoOfDPP,
2261 mode_lib->ms.dpte_group_bytes,
2262 s->tdlut_bytes_per_group,
2263 s->HostVMInefficiencyFactor,
2264 s->HostVMInefficiencyFactorPrefetch,
2265 mode_lib->soc.hostvm_min_page_size_kbytes,
2266 mode_lib->soc.qos_parameters.qos_type,
2267 !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable),
2268 mode_lib->soc.max_outstanding_reqs,
2269 mode_lib->ms.support.request_size_bytes_luma,
2270 mode_lib->ms.support.request_size_bytes_chroma,
2271 mode_lib->ip.meta_chunk_size_kbytes,
2272 mode_lib->ip.dchub_arb_to_ret_delay,
2273 mode_lib->ms.TripToMemory,
2274 mode_lib->ip.hostvm_mode,
2275
2276 // output
2277 &mode_lib->ms.ExtraLatency,
2278 &mode_lib->ms.ExtraLatency_sr,
2279 &mode_lib->ms.ExtraLatencyPrefetch);
2280
2281 {
2282 mode_lib->ms.support.PrefetchSupported = true;
2283 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
2284
2285 mode_lib->ms.TWait[k] = CalculateTWait(
2286 display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns,
2287 mode_lib->ms.UrgLatency,
2288 mode_lib->ms.TripToMemory);
2289
2290 struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe;
2291 myPipe->Dppclk = mode_lib->ms.RequiredDPPCLK[k];
2292 myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK;
2293 myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
2294 myPipe->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep;
2295 myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[k];
2296 myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled;
2297 myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
2298 myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
2299 myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
2300 myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
2301 myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
2302 myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored;
2303 myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k];
2304 myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k];
2305 myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k];
2306 myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k];
2307 myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
2308 myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors;
2309 myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active;
2310 myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
2311 myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active;
2312 myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
2313 myPipe->ODMMode = mode_lib->ms.ODMMode[k];
2314 myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
2315 myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k];
2316 myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k];
2317 myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
2318
2319 #ifdef __DML_VBA_DEBUG__
2320 dml2_printf("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k);
2321 dml2_printf("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[k]);
2322 #endif
2323 CalculatePrefetchSchedule_params->display_cfg = display_cfg;
2324 CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch;
2325 CalculatePrefetchSchedule_params->myPipe = myPipe;
2326 CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelay[k];
2327 CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter;
2328 CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl;
2329 CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only;
2330 CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor;
2331 CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal;
2332 CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->ms.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
2333 CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format;
2334 CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters;
2335 CalculatePrefetchSchedule_params->VStartup = s->MaximumVStartup[k];
2336 CalculatePrefetchSchedule_params->MaxVStartup = s->MaximumVStartup[k];
2337 CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
2338 CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable;
2339 CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled;
2340 CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required;
2341 CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes;
2342 CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency;
2343 CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->ms.ExtraLatencyPrefetch;
2344 CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc;
2345 CalculatePrefetchSchedule_params->vm_bytes = mode_lib->ms.vm_bytes[k];
2346 CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[k];
2347 CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[k];
2348 CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k];
2349 CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY[k];
2350 CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[k];
2351 CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k];
2352 CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC[k];
2353 CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub[k];
2354 CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub[k];
2355 CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightY[k];
2356 CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightC[k];
2357 CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait[k];
2358 CalculatePrefetchSchedule_params->Ttrip = mode_lib->ms.TripToMemory;
2359 CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
2360 CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k];
2361 CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k];
2362 CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k];
2363 CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k];
2364 CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0);
2365 CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k];
2366 CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k];
2367 CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
2368 CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present;
2369 CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->ms.meta_row_bytes[k];
2370 CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor[k];
2371
2372 // output
2373 CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k];
2374 CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k];
2375 CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->ms.dst_y_prefetch[k];
2376 CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->ms.LinesForVM[k];
2377 CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->ms.LinesForDPTERow[k];
2378 CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[k];
2379 CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[k];
2380 CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; // prefetch_sw_bw_l
2381 CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; // prefetch_sw_bw_c
2382 CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.NoTimeForDynamicMetadata[k];
2383 CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k];
2384 CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->ms.Tno_bw_flip[k];
2385 CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k];
2386 CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0];
2387 CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1];
2388 CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2];
2389 CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k];
2390 CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k];
2391 CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k];
2392 CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k];
2393 CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k];
2394 CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k];
2395 CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0];
2396 CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1];
2397 CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2];
2398 CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->ms.prefetch_cursor_bw[k];
2399
2400 mode_lib->ms.NoTimeForPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params);
2401
2402 mode_lib->ms.support.PrefetchSupported &= !mode_lib->ms.NoTimeForPrefetch[k];
2403 dml2_printf("DML::%s: k=%d, dst_y_per_vm_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_vm_vblank);
2404 dml2_printf("DML::%s: k=%d, dst_y_per_row_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_row_vblank);
2405 } // for k num_planes
2406
2407 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
2408 if (mode_lib->ms.dst_y_prefetch[k] < 2.0
2409 || mode_lib->ms.LinesForVM[k] >= 32.0
2410 || mode_lib->ms.LinesForDPTERow[k] >= 16.0
2411 || mode_lib->ms.NoTimeForPrefetch[k] == true
2412 || s->DSTYAfterScaler[k] > 8) {
2413 mode_lib->ms.support.PrefetchSupported = false;
2414 dml2_printf("DML::%s: k=%d, dst_y_prefetch=%f (should not be < 2)\n", __func__, k, mode_lib->ms.dst_y_prefetch[k]);
2415 dml2_printf("DML::%s: k=%d, LinesForVM=%f (should not be >= 32)\n", __func__, k, mode_lib->ms.LinesForVM[k]);
2416 dml2_printf("DML::%s: k=%d, LinesForDPTERow=%f (should not be >= 16)\n", __func__, k, mode_lib->ms.LinesForDPTERow[k]);
2417 dml2_printf("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]);
2418 dml2_printf("DML::%s: k=%d, DSTYAfterScaler=%d (should be <= 8)\n", __func__, k, s->DSTYAfterScaler[k]);
2419 }
2420 }
2421
2422 mode_lib->ms.support.DynamicMetadataSupported = true;
2423 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
2424 if (mode_lib->ms.NoTimeForDynamicMetadata[k] == true) {
2425 mode_lib->ms.support.DynamicMetadataSupported = false;
2426 }
2427 }
2428
2429 mode_lib->ms.support.VRatioInPrefetchSupported = true;
2430 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
2431 if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ ||
2432 mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__) {
2433 mode_lib->ms.support.VRatioInPrefetchSupported = false;
2434 dml2_printf("DML::%s: VRatioInPrefetchSupported = %u\n", __func__, mode_lib->ms.support.VRatioInPrefetchSupported);
2435 }
2436 }
2437
2438 s->AnyLinesForVMOrRowTooLarge = false;
2439 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
2440 if (mode_lib->ms.LinesForDPTERow[k] >= 16 || mode_lib->ms.LinesForVM[k] >= 32) {
2441 s->AnyLinesForVMOrRowTooLarge = true;
2442 }
2443 }
2444
2445 // Only do urg vs prefetch bandwidth check, flip schedule check, power saving feature support check IF the Prefetch Schedule Check is ok
2446 if (mode_lib->ms.support.PrefetchSupported) {
2447 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
2448 // Calculate Urgent burst factor for prefetch
2449 #ifdef __DML_VBA_DEBUG__
2450 dml2_printf("DML::%s: k=%d, Calling CalculateUrgentBurstFactor (for prefetch)\n", __func__, k);
2451 dml2_printf("DML::%s: k=%d, VRatioPreY=%f\n", __func__, k, mode_lib->ms.VRatioPreY[k]);
2452 dml2_printf("DML::%s: k=%d, VRatioPreC=%f\n", __func__, k, mode_lib->ms.VRatioPreC[k]);
2453 #endif
2454 double line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
2455 CalculateUrgentBurstFactor(
2456 &display_cfg->plane_descriptors[k],
2457 mode_lib->ms.swath_width_luma_ub[k],
2458 mode_lib->ms.swath_width_chroma_ub[k],
2459 mode_lib->ms.SwathHeightY[k],
2460 mode_lib->ms.SwathHeightC[k],
2461 line_time_us,
2462 mode_lib->ms.UrgLatency,
2463 mode_lib->ms.VRatioPreY[k],
2464 mode_lib->ms.VRatioPreC[k],
2465 mode_lib->ms.BytePerPixelInDETY[k],
2466 mode_lib->ms.BytePerPixelInDETC[k],
2467 mode_lib->ms.DETBufferSizeY[k],
2468 mode_lib->ms.DETBufferSizeC[k],
2469 /* Output */
2470 &mode_lib->ms.UrgentBurstFactorLumaPre[k],
2471 &mode_lib->ms.UrgentBurstFactorChromaPre[k],
2472 &mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]);
2473 }
2474
2475 // Calculate urgent bandwidth required, both urg and non urg peak bandwidth
2476 // assume flip bw is 0 at this point
2477 for (k = 0; k < mode_lib->ms.num_active_planes; k++)
2478 mode_lib->ms.final_flip_bw[k] = 0;
2479
2480 calculate_peak_bandwidth_required(
2481 &mode_lib->scratch,
2482 mode_lib->ms.support.urg_vactive_bandwidth_required,
2483 mode_lib->ms.support.urg_bandwidth_required,
2484 mode_lib->ms.support.non_urg_bandwidth_required,
2485
2486 display_cfg,
2487 0, // inc_flip_bw
2488 mode_lib->ms.num_active_planes,
2489 mode_lib->ms.NoOfDPP,
2490 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0,
2491 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1,
2492 mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0,
2493 mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1,
2494 mode_lib->ms.mall_prefetch_sdp_overhead_factor,
2495 mode_lib->ms.mall_prefetch_dram_overhead_factor,
2496
2497 mode_lib->ms.SurfaceReadBandwidthLuma,
2498 mode_lib->ms.SurfaceReadBandwidthChroma,
2499 mode_lib->ms.RequiredPrefetchPixelDataBWLuma,
2500 mode_lib->ms.RequiredPrefetchPixelDataBWChroma,
2501 mode_lib->ms.cursor_bw,
2502 mode_lib->ms.dpte_row_bw,
2503 mode_lib->ms.meta_row_bw,
2504 mode_lib->ms.prefetch_cursor_bw,
2505 mode_lib->ms.prefetch_vmrow_bw,
2506 mode_lib->ms.final_flip_bw,
2507 mode_lib->ms.UrgentBurstFactorLuma,
2508 mode_lib->ms.UrgentBurstFactorChroma,
2509 mode_lib->ms.UrgentBurstFactorCursor,
2510 mode_lib->ms.UrgentBurstFactorLumaPre,
2511 mode_lib->ms.UrgentBurstFactorChromaPre,
2512 mode_lib->ms.UrgentBurstFactorCursorPre);
2513
2514 // Check urg peak bandwidth against available urg bw
2515 // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active)
2516 check_urgent_bandwidth_support(
2517 &s->dummy_single[0], // double* frac_urg_bandwidth
2518 &s->dummy_single[1], // double* frac_urg_bandwidth_mall
2519 &mode_lib->ms.support.UrgVactiveBandwidthSupport,
2520 &mode_lib->ms.support.PrefetchBandwidthSupported,
2521
2522 mode_lib->soc.mall_allocated_for_dcn_mbytes,
2523 mode_lib->ms.support.non_urg_bandwidth_required,
2524 mode_lib->ms.support.urg_vactive_bandwidth_required,
2525 mode_lib->ms.support.urg_bandwidth_required,
2526 mode_lib->ms.support.urg_bandwidth_available);
2527
2528 mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.PrefetchBandwidthSupported;
2529 dml2_printf("DML::%s: PrefetchBandwidthSupported=%0d\n", __func__, mode_lib->ms.support.PrefetchBandwidthSupported);
2530
2531 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
2532 if (mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]) {
2533 mode_lib->ms.support.PrefetchSupported = false;
2534 dml2_printf("DML::%s: k=%d, NotEnoughUrgentLatencyHidingPre=%d\n", __func__, k, mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]);
2535 }
2536 }
2537
2538
2539 // Both prefetch schedule and BW okay
2540 if (mode_lib->ms.support.PrefetchSupported == true && mode_lib->ms.support.VRatioInPrefetchSupported == true) {
2541 mode_lib->ms.BandwidthAvailableForImmediateFlip =
2542 get_bandwidth_available_for_immediate_flip(dml2_core_internal_soc_state_sys_active,
2543 mode_lib->ms.support.urg_bandwidth_required, // no flip
2544 mode_lib->ms.support.urg_bandwidth_available);
2545
2546 mode_lib->ms.TotImmediateFlipBytes = 0;
2547 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
2548 if (display_cfg->plane_descriptors[k].immediate_flip) {
2549 s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes(
2550 s->HostVMInefficiencyFactor,
2551 mode_lib->ms.vm_bytes[k],
2552 mode_lib->ms.DPTEBytesPerRow[k],
2553 mode_lib->ms.meta_row_bytes[k]);
2554 } else {
2555 s->per_pipe_flip_bytes[k] = 0;
2556 }
2557 mode_lib->ms.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->ms.NoOfDPP[k];
2558
2559 }
2560
2561 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
2562 CalculateFlipSchedule(
2563 &mode_lib->scratch,
2564 display_cfg->plane_descriptors[k].immediate_flip,
2565 1, // use_lb_flip_bw
2566 s->HostVMInefficiencyFactor,
2567 s->Tvm_trips_flip[k],
2568 s->Tr0_trips_flip[k],
2569 s->Tvm_trips_flip_rounded[k],
2570 s->Tr0_trips_flip_rounded[k],
2571 display_cfg->gpuvm_enable,
2572 mode_lib->ms.vm_bytes[k],
2573 mode_lib->ms.DPTEBytesPerRow[k],
2574 mode_lib->ms.BandwidthAvailableForImmediateFlip,
2575 mode_lib->ms.TotImmediateFlipBytes,
2576 display_cfg->plane_descriptors[k].pixel_format,
2577 (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)),
2578 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
2579 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
2580 mode_lib->ms.Tno_bw_flip[k],
2581 mode_lib->ms.dpte_row_height[k],
2582 mode_lib->ms.dpte_row_height_chroma[k],
2583 mode_lib->ms.use_one_row_for_frame_flip[k],
2584 mode_lib->ip.max_flip_time_us,
2585 s->per_pipe_flip_bytes[k],
2586 mode_lib->ms.meta_row_bytes[k],
2587 s->meta_row_height_luma[k],
2588 s->meta_row_height_chroma[k],
2589 mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable,
2590
2591 /* Output */
2592 &mode_lib->ms.dst_y_per_vm_flip[k],
2593 &mode_lib->ms.dst_y_per_row_flip[k],
2594 &mode_lib->ms.final_flip_bw[k],
2595 &mode_lib->ms.ImmediateFlipSupportedForPipe[k]);
2596 }
2597
2598 calculate_peak_bandwidth_required(
2599 &mode_lib->scratch,
2600 s->dummy_bw,
2601 mode_lib->ms.support.urg_bandwidth_required_flip,
2602 mode_lib->ms.support.non_urg_bandwidth_required_flip,
2603
2604 // Input
2605 display_cfg,
2606 1, // inc_flip_bw
2607 mode_lib->ms.num_active_planes,
2608 mode_lib->ms.NoOfDPP,
2609
2610 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0,
2611 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1,
2612 mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0,
2613 mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1,
2614 mode_lib->ms.mall_prefetch_sdp_overhead_factor,
2615 mode_lib->ms.mall_prefetch_dram_overhead_factor,
2616
2617 mode_lib->ms.SurfaceReadBandwidthLuma,
2618 mode_lib->ms.SurfaceReadBandwidthChroma,
2619 mode_lib->ms.RequiredPrefetchPixelDataBWLuma,
2620 mode_lib->ms.RequiredPrefetchPixelDataBWChroma,
2621 mode_lib->ms.cursor_bw,
2622 mode_lib->ms.dpte_row_bw,
2623 mode_lib->ms.meta_row_bw,
2624 mode_lib->ms.prefetch_cursor_bw,
2625 mode_lib->ms.prefetch_vmrow_bw,
2626 mode_lib->ms.final_flip_bw,
2627 mode_lib->ms.UrgentBurstFactorLuma,
2628 mode_lib->ms.UrgentBurstFactorChroma,
2629 mode_lib->ms.UrgentBurstFactorCursor,
2630 mode_lib->ms.UrgentBurstFactorLumaPre,
2631 mode_lib->ms.UrgentBurstFactorChromaPre,
2632 mode_lib->ms.UrgentBurstFactorCursorPre);
2633
2634 calculate_immediate_flip_bandwidth_support(
2635 &s->dummy_single[0], // double* frac_urg_bandwidth_flip
2636 &mode_lib->ms.support.ImmediateFlipSupport,
2637
2638 dml2_core_internal_soc_state_sys_active,
2639 mode_lib->ms.support.urg_bandwidth_required_flip,
2640 mode_lib->ms.support.non_urg_bandwidth_required_flip,
2641 mode_lib->ms.support.urg_bandwidth_available);
2642
2643 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
2644 if (display_cfg->plane_descriptors[k].immediate_flip == true && mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false)
2645 mode_lib->ms.support.ImmediateFlipSupport = false;
2646 }
2647
2648 } else { // if prefetch not support, assume iflip is not supported too
2649 mode_lib->ms.support.ImmediateFlipSupport = false;
2650 }
2651 } // prefetch schedule
2652 }
2653
2654 s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency;
2655 s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency;
2656 s->mSOCParameters.ExtraLatency_sr = mode_lib->ms.ExtraLatency_sr;
2657 s->mSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us;
2658 s->mSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
2659 s->mSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us;
2660 s->mSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us;
2661 s->mSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us;
2662 s->mSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us;
2663 s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us;
2664 s->mSOCParameters.USRRetrainingLatency = 0; // FIXME_STAGE2: no USR related bbox value
2665 s->mSOCParameters.SMNLatency = 0; // FIXME_STAGE2
2666
2667 CalculateWatermarks_params->display_cfg = display_cfg;
2668 CalculateWatermarks_params->USRRetrainingRequired = false /*FIXME_STAGE2 was: mode_lib->ms.policy.USRRetrainingRequired, no new dml2 replacement*/;
2669 CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
2670 CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines;
2671 CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits;
2672 CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes;
2673 CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLK;
2674 CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings;
2675 CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change;
2676 CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
2677 CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters;
2678 CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes;
2679 CalculateWatermarks_params->SOCCLK = mode_lib->ms.SOCCLK;
2680 CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep;
2681 CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY;
2682 CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC;
2683 CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightY;
2684 CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightC;
2685 //CalculateWatermarks_params->LBBitPerPixel = 57; // FIXME_STAGE2, need a new ip param?
2686 CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthY;
2687 CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthC;
2688 CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPP;
2689 CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY;
2690 CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC;
2691 CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler;
2692 CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler;
2693 CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabled;
2694 CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByte;
2695 CalculateWatermarks_params->meta_row_height_l = s->meta_row_height_luma;
2696 CalculateWatermarks_params->meta_row_height_c = s->meta_row_height_chroma;
2697
2698 // Output
2699 CalculateWatermarks_params->Watermark = &s->dummy_watermark; // Watermarks *Watermark
2700 CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->ms.support.DRAMClockChangeSupport;
2701 CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->ms.support.global_dram_clock_change_supported;
2702 CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // double *MaxActiveDRAMClockChangeLatencySupported[]
2703 CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->ms.SubViewportLinesNeededInMALL; // unsigned int SubViewportLinesNeededInMALL[]
2704 CalculateWatermarks_params->FCLKChangeSupport = mode_lib->ms.support.FCLKChangeSupport;
2705 CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->ms.support.global_fclk_change_supported;
2706 CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // double *MaxActiveFCLKChangeLatencySupported
2707 CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport;
2708 CalculateWatermarks_params->VActiveLatencyHidingMargin = mode_lib->ms.VActiveLatencyHidingMargin;
2709 CalculateWatermarks_params->VActiveLatencyHidingUs = mode_lib->ms.VActiveLatencyHidingUs;
2710
2711 CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params);
2712 }
2713
2714 // End of Prefetch Check
2715
2716 dml2_printf("DML::%s: Done prefetch calculation\n", __func__);
2717
2718 //Re-ordering Buffer Support Check
2719 mode_lib->ms.support.max_urgent_latency_us
2720 = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz
2721 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0)
2722 + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock
2723 + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock
2724 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0);
2725
2726 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) {
2727 if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024
2728 / mode_lib->ms.support.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= mode_lib->ms.support.max_urgent_latency_us) {
2729 mode_lib->ms.support.ROBSupport = true;
2730 } else {
2731 mode_lib->ms.support.ROBSupport = false;
2732 }
2733 } else {
2734 if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) {
2735 mode_lib->ms.support.ROBSupport = true;
2736 } else {
2737 mode_lib->ms.support.ROBSupport = false;
2738 }
2739 }
2740
2741 #ifdef __DML_VBA_DEBUG__
2742 dml2_printf("DML::%s: max_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.max_urgent_latency_us);
2743 dml2_printf("DML::%s: ROBSupport = %u\n", __func__, mode_lib->ms.support.ROBSupport);
2744 #endif
2745
2746 /*Mode Support, Voltage State and SOC Configuration*/
2747 {
2748 // s->dram_clock_change_support = 1;
2749 // s->f_clock_change_support = 1;
2750
2751 if (mode_lib->ms.support.ScaleRatioAndTapsSupport
2752 && mode_lib->ms.support.SourceFormatPixelAndScanSupport
2753 && mode_lib->ms.support.ViewportSizeSupport
2754 && !mode_lib->ms.support.LinkRateDoesNotMatchDPVersion
2755 && !mode_lib->ms.support.LinkRateForMultistreamNotIndicated
2756 && !mode_lib->ms.support.BPPForMultistreamNotIndicated
2757 && !mode_lib->ms.support.MultistreamWithHDMIOreDP
2758 && !mode_lib->ms.support.ExceededMultistreamSlots
2759 && !mode_lib->ms.support.MSOOrODMSplitWithNonDPLink
2760 && !mode_lib->ms.support.NotEnoughLanesForMSO
2761 //&& mode_lib->ms.support.LinkCapacitySupport == true // FIXME_STAGE2
2762 && !mode_lib->ms.support.P2IWith420
2763 && !mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP
2764 && !mode_lib->ms.support.DSC422NativeNotSupported
2765 && !mode_lib->ms.support.NotEnoughDSCUnits
2766 && !mode_lib->ms.support.NotEnoughDSCSlices
2767 && !mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe
2768 && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen
2769 && !mode_lib->ms.support.DSCCLKRequiredMoreThanSupported
2770 && mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport
2771 && !mode_lib->ms.support.DTBCLKRequiredMoreThanSupported
2772 && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPState
2773 && mode_lib->ms.support.ROBSupport
2774 && mode_lib->ms.support.OutstandingRequestsSupport
2775 && mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance
2776 && mode_lib->ms.support.DISPCLK_DPPCLK_Support
2777 && mode_lib->ms.support.TotalAvailablePipesSupport
2778 && mode_lib->ms.support.NumberOfOTGSupport
2779 && mode_lib->ms.support.NumberOfHDMIFRLSupport
2780 && mode_lib->ms.support.NumberOfDP2p0Support
2781 && mode_lib->ms.support.EnoughWritebackUnits
2782 && mode_lib->ms.support.WritebackLatencySupport
2783 && mode_lib->ms.support.WritebackScaleRatioAndTapsSupport
2784 && mode_lib->ms.support.CursorSupport
2785 && mode_lib->ms.support.PitchSupport
2786 && !mode_lib->ms.support.ViewportExceedsSurface
2787 && mode_lib->ms.support.PrefetchSupported
2788 && mode_lib->ms.support.EnoughUrgentLatencyHidingSupport
2789 && mode_lib->ms.support.AvgBandwidthSupport
2790 && mode_lib->ms.support.DynamicMetadataSupported
2791 && mode_lib->ms.support.VRatioInPrefetchSupported
2792 && mode_lib->ms.support.PTEBufferSizeNotExceeded
2793 && mode_lib->ms.support.DCCMetaBufferSizeNotExceeded
2794 && !mode_lib->ms.support.ExceededMALLSize
2795 && ((!display_cfg->hostvm_enable && !s->ImmediateFlipRequired) || mode_lib->ms.support.ImmediateFlipSupport)) {
2796 // && s->dram_clock_change_support == true
2797 // && s->f_clock_change_support == true
2798 // && (/*FIXME_STAGE2 was: mode_lib->ms.policy.USRRetrainingRequired, no new dml2 replacement || */ mode_lib->ms.support.USRRetrainingSupport)) {
2799 dml2_printf("DML::%s: mode is supported\n", __func__);
2800 mode_lib->ms.support.ModeSupport = true;
2801 } else {
2802 dml2_printf("DML::%s: mode is NOT supported\n", __func__);
2803 mode_lib->ms.support.ModeSupport = false;
2804 }
2805 }
2806
2807 // Since now the mode_support work on 1 particular power state, so there is only 1 state idx (index 0).
2808 dml2_printf("DML::%s: ModeSupport = %u\n", __func__, mode_lib->ms.support.ModeSupport);
2809 dml2_printf("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport);
2810
2811 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
2812 mode_lib->ms.support.MPCCombineEnable[k] = mode_lib->ms.MPCCombine[k];
2813 mode_lib->ms.support.DPPPerSurface[k] = mode_lib->ms.NoOfDPP[k];
2814 }
2815
2816 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
2817 if (display_cfg->plane_descriptors[k].stream_index == k) {
2818 mode_lib->ms.support.ODMMode[k] = mode_lib->ms.ODMMode[k];
2819 } else {
2820 mode_lib->ms.support.ODMMode[k] = dml2_odm_mode_bypass;
2821 }
2822
2823 mode_lib->ms.support.DSCEnabled[k] = mode_lib->ms.RequiresDSC[k];
2824 mode_lib->ms.support.FECEnabled[k] = mode_lib->ms.RequiresFEC[k];
2825 mode_lib->ms.support.OutputBpp[k] = mode_lib->ms.OutputBpp[k];
2826 mode_lib->ms.support.OutputType[k] = mode_lib->ms.OutputType[k];
2827 mode_lib->ms.support.OutputRate[k] = mode_lib->ms.OutputRate[k];
2828
2829 #if defined(__DML_VBA_DEBUG__)
2830 dml2_printf("DML::%s: k=%d, ODMMode = %u\n", __func__, k, mode_lib->ms.support.ODMMode[k]);
2831 dml2_printf("DML::%s: k=%d, DSCEnabled = %u\n", __func__, k, mode_lib->ms.support.DSCEnabled[k]);
2832 #endif
2833 }
2834
2835 #if defined(__DML_VBA_DEBUG__)
2836 if (!mode_lib->ms.support.ModeSupport)
2837 dml2_print_dml_mode_support_info(&mode_lib->ms.support, true);
2838 dml2_printf("DML::%s: is_mode_support = %u (min_clk_index=%d)\n", __func__, mode_lib->ms.support.ModeSupport, in_out_params->min_clk_index);
2839 dml2_printf("DML::%s: --- DONE --- \n", __func__);
2840 #endif
2841
2842 if (mode_lib->ms.support.ModeSupport) {
2843 *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support;
2844 return true;
2845 } else {
2846 return false;
2847 }
2848 }
2849
dml2_print_dml_mode_support_info(const struct dml2_core_internal_mode_support_info * support,bool fail_only)2850 static void dml2_print_dml_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only)
2851 {
2852 dml2_printf("DML: ===================================== \n");
2853 dml2_printf("DML: DML_MODE_SUPPORT_INFO_ST\n");
2854 if (!fail_only || support->ImmediateFlipSupport == 0)
2855 dml2_printf("DML: support: ImmediateFlipSupport = 0x%x\n", support->ImmediateFlipSupport);
2856 if (!fail_only || support->WritebackLatencySupport == 0)
2857 dml2_printf("DML: support: WritebackLatencySupport = 0x%x\n", support->WritebackLatencySupport);
2858 if (!fail_only || support->ScaleRatioAndTapsSupport == 0)
2859 dml2_printf("DML: support: ScaleRatioAndTapsSupport = 0x%x\n", support->ScaleRatioAndTapsSupport);
2860 if (!fail_only || support->SourceFormatPixelAndScanSupport == 0)
2861 dml2_printf("DML: support: SourceFormatPixelAndScanSupport = 0x%x\n", support->SourceFormatPixelAndScanSupport);
2862 if (!fail_only || support->P2IWith420 == 1)
2863 dml2_printf("DML: support: P2IWith420 = 0x%x\n", support->P2IWith420);
2864 if (!fail_only || support->DSCOnlyIfNecessaryWithBPP == 1)
2865 dml2_printf("DML: support: DSCOnlyIfNecessaryWithBPP = 0x%x\n", support->DSCOnlyIfNecessaryWithBPP);
2866 if (!fail_only || support->DSC422NativeNotSupported == 1)
2867 dml2_printf("DML: support: DSC422NativeNotSupported = 0x%x\n", support->DSC422NativeNotSupported);
2868 if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1)
2869 dml2_printf("DML: support: LinkRateDoesNotMatchDPVersion = 0x%x\n", support->LinkRateDoesNotMatchDPVersion);
2870 if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1)
2871 dml2_printf("DML: support: LinkRateForMultistreamNotIndicated = 0x%x\n", support->LinkRateForMultistreamNotIndicated);
2872 if (!fail_only || support->BPPForMultistreamNotIndicated == 1)
2873 dml2_printf("DML: support: BPPForMultistreamNotIndicated = 0x%x\n", support->BPPForMultistreamNotIndicated);
2874 if (!fail_only || support->MultistreamWithHDMIOreDP == 1)
2875 dml2_printf("DML: support: MultistreamWithHDMIOreDP = 0x%x\n", support->MultistreamWithHDMIOreDP);
2876 if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1)
2877 dml2_printf("DML: support: MSOOrODMSplitWithNonDPLink = 0x%x\n", support->MSOOrODMSplitWithNonDPLink);
2878 if (!fail_only || support->NotEnoughLanesForMSO == 1)
2879 dml2_printf("DML: support: NotEnoughLanesForMSO = 0x%x\n", support->NotEnoughLanesForMSO);
2880 if (!fail_only || support->NumberOfOTGSupport == 0)
2881 dml2_printf("DML: support: NumberOfOTGSupport = 0x%x\n", support->NumberOfOTGSupport);
2882 if (!fail_only || support->NumberOfHDMIFRLSupport == 0)
2883 dml2_printf("DML: support: NumberOfHDMIFRLSupport = 0x%x\n", support->NumberOfHDMIFRLSupport);
2884 if (!fail_only || support->NumberOfDP2p0Support == 0)
2885 dml2_printf("DML: support: NumberOfDP2p0Support = 0x%x\n", support->NumberOfDP2p0Support);
2886 if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0)
2887 dml2_printf("DML: support: WritebackScaleRatioAndTapsSupport = 0x%x\n", support->WritebackScaleRatioAndTapsSupport);
2888 if (!fail_only || support->CursorSupport == 0)
2889 dml2_printf("DML: support: CursorSupport = 0x%x\n", support->CursorSupport);
2890 if (!fail_only || support->PitchSupport == 0)
2891 dml2_printf("DML: support: PitchSupport = 0x%x\n", support->PitchSupport);
2892 if (!fail_only || support->ViewportExceedsSurface == 1)
2893 dml2_printf("DML: support: ViewportExceedsSurface = 0x%x\n", support->ViewportExceedsSurface);
2894 if (!fail_only || support->ExceededMALLSize == 1)
2895 dml2_printf("DML: support: ExceededMALLSize = 0x%x\n", support->ExceededMALLSize);
2896 if (!fail_only || support->EnoughWritebackUnits == 0)
2897 dml2_printf("DML: support: EnoughWritebackUnits = 0x%x\n", support->EnoughWritebackUnits);
2898 if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1)
2899 dml2_printf("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = 0x%x\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe);
2900 if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1)
2901 dml2_printf("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = 0x%x\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen);
2902 if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1)
2903 dml2_printf("DML: support: InvalidCombinationOfMALLUseForPState = 0x%x\n", support->InvalidCombinationOfMALLUseForPState);
2904 if (!fail_only || support->ExceededMultistreamSlots == 1)
2905 dml2_printf("DML: support: ExceededMultistreamSlots = 0x%x\n", support->ExceededMultistreamSlots);
2906 if (!fail_only || support->NotEnoughDSCUnits == 1)
2907 dml2_printf("DML: support: NotEnoughDSCUnits = 0x%x\n", support->NotEnoughDSCUnits);
2908 if (!fail_only || support->NotEnoughDSCSlices == 1)
2909 dml2_printf("DML: support: NotEnoughDSCSlices = 0x%x\n", support->NotEnoughDSCSlices);
2910 if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0)
2911 dml2_printf("DML: support: PixelsPerLinePerDSCUnitSupport = 0x%x\n", support->PixelsPerLinePerDSCUnitSupport);
2912 if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1)
2913 dml2_printf("DML: support: DSCCLKRequiredMoreThanSupported = 0x%x\n", support->DSCCLKRequiredMoreThanSupported);
2914 if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1)
2915 dml2_printf("DML: support: DTBCLKRequiredMoreThanSupported = 0x%x\n", support->DTBCLKRequiredMoreThanSupported);
2916 if (!fail_only || support->LinkCapacitySupport == 0)
2917 dml2_printf("DML: support: LinkCapacitySupport = 0x%x\n", support->LinkCapacitySupport);
2918 if (!fail_only || support->ROBSupport == 0)
2919 dml2_printf("DML: support: ROBSupport = %d\n", support->ROBSupport);
2920 if (!fail_only || support->OutstandingRequestsSupport == 0)
2921 dml2_printf("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport);
2922 if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0)
2923 dml2_printf("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance);
2924 if (!fail_only || support->PTEBufferSizeNotExceeded == 0)
2925 dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded);
2926 if (!fail_only || support->AvgBandwidthSupport == 0)
2927 dml2_printf("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport);
2928 if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0)
2929 dml2_printf("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport);
2930 if (!fail_only || support->PrefetchSupported == 0)
2931 dml2_printf("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported);
2932 if (!fail_only || support->DynamicMetadataSupported == 0)
2933 dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported);
2934 if (!fail_only || support->VRatioInPrefetchSupported == 0)
2935 dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported);
2936 if (!fail_only || support->DISPCLK_DPPCLK_Support == 0)
2937 dml2_printf("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support);
2938 if (!fail_only || support->TotalAvailablePipesSupport == 0)
2939 dml2_printf("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport);
2940 if (!fail_only || support->ModeSupport == 0)
2941 dml2_printf("DML: support: ModeSupport = %d\n", support->ModeSupport);
2942 if (!fail_only || support->ViewportSizeSupport == 0)
2943 dml2_printf("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport);
2944 dml2_printf("DML: ===================================== \n");
2945 }
2946
get_stream_output_bpp(double * out_bpp,const struct dml2_display_cfg * display_cfg)2947 static void get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg)
2948 {
2949 for (unsigned int k = 0; k < display_cfg->num_planes; k++) {
2950 double bpc = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.bpc;
2951 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_disable) {
2952 switch (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format) {
2953 case dml2_444:
2954 out_bpp[k] = bpc * 3;
2955 break;
2956 case dml2_s422:
2957 out_bpp[k] = bpc * 2;
2958 break;
2959 case dml2_n422:
2960 out_bpp[k] = bpc * 2;
2961 break;
2962 case dml2_420:
2963 default:
2964 out_bpp[k] = bpc * 1.5;
2965 break;
2966 }
2967 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable) {
2968 out_bpp[k] = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.dsc_compressed_bpp_x16 / 16;
2969 } else {
2970 out_bpp[k] = 0;
2971 }
2972 #ifdef __DML_VBA_DEBUG__
2973 dml2_printf("DML::%s: k=%d bpc=%f\n", __func__, k, bpc);
2974 dml2_printf("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable);
2975 dml2_printf("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]);
2976 #endif
2977 }
2978 }
2979
dml_round_to_multiple(unsigned int num,unsigned int multiple,bool up)2980 static unsigned int dml_round_to_multiple(unsigned int num, unsigned int multiple, bool up)
2981 {
2982 unsigned int remainder;
2983
2984 if (multiple == 0)
2985 return num;
2986
2987 remainder = num % multiple;
2988 if (remainder == 0)
2989 return num;
2990
2991 if (up)
2992 return (num + multiple - remainder);
2993 else
2994 return (num - remainder);
2995 }
2996
dml_get_num_active_pipes(int unsigned num_planes,const struct core_display_cfg_support_info * cfg_support_info)2997 static unsigned int dml_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info)
2998 {
2999 unsigned int num_active_pipes = 0;
3000
3001 for (unsigned int k = 0; k < num_planes; k++) {
3002 num_active_pipes = num_active_pipes + (unsigned int)cfg_support_info->plane_support_info[k].dpps_used;
3003 }
3004
3005 #ifdef __DML_VBA_DEBUG__
3006 dml2_printf("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes);
3007 #endif
3008 return num_active_pipes;
3009 }
3010
dml_calc_pipe_plane_mapping(const struct core_display_cfg_support_info * cfg_support_info,unsigned int * pipe_plane)3011 static void dml_calc_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane)
3012 {
3013 unsigned int pipe_idx = 0;
3014
3015 for (unsigned int k = 0; k < DML2_MAX_PLANES; ++k) {
3016 pipe_plane[k] = __DML2_CALCS_PIPE_NO_PLANE__;
3017 }
3018
3019 for (unsigned int plane_idx = 0; plane_idx < DML2_MAX_PLANES; plane_idx++) {
3020 for (int i = 0; i < cfg_support_info->plane_support_info[plane_idx].dpps_used; i++) {
3021 pipe_plane[pipe_idx] = plane_idx;
3022 pipe_idx++;
3023 }
3024 }
3025 }
3026
dml_is_phantom_pipe(const struct dml2_plane_parameters * plane_cfg)3027 static bool dml_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg)
3028 {
3029 bool is_phantom = false;
3030
3031 if (plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe ||
3032 plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) {
3033 is_phantom = true;
3034 }
3035
3036 return is_phantom;
3037 }
3038
dml_get_is_phantom_pipe(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,unsigned int pipe_idx)3039 static bool dml_get_is_phantom_pipe(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx)
3040 {
3041 unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx];
3042
3043 bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_idx]);
3044 dml2_printf("DML::%s: pipe_idx=%d legacy_svp_config=%0d is_phantom=%d\n", __func__, pipe_idx, display_cfg->plane_descriptors[plane_idx].overrides.legacy_svp_config, is_phantom);
3045 return is_phantom;
3046 }
3047
CalculateMaxDETAndMinCompressedBufferSize(unsigned int ConfigReturnBufferSizeInKByte,unsigned int ConfigReturnBufferSegmentSizeInKByte,unsigned int ROBBufferSizeInKByte,unsigned int MaxNumDPP,unsigned int nomDETInKByteOverrideEnable,unsigned int nomDETInKByteOverrideValue,bool is_mrq_present,unsigned int * MaxTotalDETInKByte,unsigned int * nomDETInKByte,unsigned int * MinCompressedBufferSizeInKByte)3048 static void CalculateMaxDETAndMinCompressedBufferSize(
3049 unsigned int ConfigReturnBufferSizeInKByte,
3050 unsigned int ConfigReturnBufferSegmentSizeInKByte,
3051 unsigned int ROBBufferSizeInKByte,
3052 unsigned int MaxNumDPP,
3053 unsigned int nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
3054 unsigned int nomDETInKByteOverrideValue, // VBA_DELTA
3055 bool is_mrq_present,
3056
3057 // Output
3058 unsigned int *MaxTotalDETInKByte,
3059 unsigned int *nomDETInKByte,
3060 unsigned int *MinCompressedBufferSizeInKByte)
3061 {
3062 if (is_mrq_present)
3063 *MaxTotalDETInKByte = (unsigned int)math_ceil2((double)(ConfigReturnBufferSizeInKByte + ROBBufferSizeInKByte) * 4 / 5, 64);
3064 else
3065 *MaxTotalDETInKByte = ConfigReturnBufferSizeInKByte - ConfigReturnBufferSegmentSizeInKByte;
3066
3067 *nomDETInKByte = (unsigned int)(math_floor2((double)*MaxTotalDETInKByte / (double)MaxNumDPP, ConfigReturnBufferSegmentSizeInKByte));
3068 *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
3069
3070 #if defined(__DML_VBA_DEBUG__)
3071 dml2_printf("DML::%s: is_mrq_present = %u\n", __func__, is_mrq_present);
3072 dml2_printf("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
3073 dml2_printf("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, ROBBufferSizeInKByte);
3074 dml2_printf("DML::%s: MaxNumDPP = %u\n", __func__, MaxNumDPP);
3075 dml2_printf("DML::%s: MaxTotalDETInKByte = %u\n", __func__, *MaxTotalDETInKByte);
3076 dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, *nomDETInKByte);
3077 dml2_printf("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, *MinCompressedBufferSizeInKByte);
3078 #endif
3079
3080 if (nomDETInKByteOverrideEnable) {
3081 *nomDETInKByte = nomDETInKByteOverrideValue;
3082 dml2_printf("DML::%s: nomDETInKByte = %u (overrided)\n", __func__, *nomDETInKByte);
3083 }
3084 }
3085
PixelClockAdjustmentForProgressiveToInterlaceUnit(const struct dml2_display_cfg * display_cfg,bool ptoi_supported,double * PixelClockBackEnd)3086 static void PixelClockAdjustmentForProgressiveToInterlaceUnit(const struct dml2_display_cfg *display_cfg, bool ptoi_supported, double *PixelClockBackEnd)
3087 {
3088 //unsigned int num_active_planes = display_cfg->num_planes;
3089
3090 //Progressive To Interlace Unit Effect
3091 for (unsigned int k = 0; k < display_cfg->num_planes; ++k) {
3092 PixelClockBackEnd[k] = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
3093 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && ptoi_supported == true) {
3094 // FIXME_STAGE2... can sw pass the pixel rate for interlaced directly
3095 //display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz = 2 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz;
3096 }
3097 }
3098 }
3099
dml2_core_shared_is_420(enum dml2_source_format_class source_format)3100 bool dml2_core_shared_is_420(enum dml2_source_format_class source_format)
3101 {
3102 bool val = false;
3103
3104 switch (source_format) {
3105 case dml2_444_8:
3106 val = 0;
3107 break;
3108 case dml2_444_16:
3109 val = 0;
3110 break;
3111 case dml2_444_32:
3112 val = 0;
3113 break;
3114 case dml2_444_64:
3115 val = 0;
3116 break;
3117 case dml2_420_8:
3118 val = 1;
3119 break;
3120 case dml2_420_10:
3121 val = 1;
3122 break;
3123 case dml2_420_12:
3124 val = 1;
3125 break;
3126 case dml2_rgbe_alpha:
3127 val = 0;
3128 break;
3129 case dml2_rgbe:
3130 val = 0;
3131 break;
3132 case dml2_mono_8:
3133 val = 0;
3134 break;
3135 case dml2_mono_16:
3136 val = 0;
3137 break;
3138 default:
3139 DML2_ASSERT(0);
3140 break;
3141 }
3142 return val;
3143 }
3144
dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode)3145 static unsigned int dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode)
3146 {
3147 switch (sw_mode) {
3148 case (dml2_sw_linear):
3149 return 256;
3150 case (dml2_sw_256b_2d):
3151 return 256;
3152 case (dml2_sw_4kb_2d):
3153 return 4096;
3154 case (dml2_sw_64kb_2d):
3155 return 65536;
3156 case (dml2_sw_256kb_2d):
3157 return 262144;
3158 case (dml2_gfx11_sw_linear):
3159 return 256;
3160 case (dml2_gfx11_sw_64kb_d):
3161 return 65536;
3162 case (dml2_gfx11_sw_64kb_d_t):
3163 return 65536;
3164 case (dml2_gfx11_sw_64kb_d_x):
3165 return 65536;
3166 case (dml2_gfx11_sw_64kb_r_x):
3167 return 65536;
3168 case (dml2_gfx11_sw_256kb_d_x):
3169 return 262144;
3170 case (dml2_gfx11_sw_256kb_r_x):
3171 return 262144;
3172 default:
3173 DML2_ASSERT(0);
3174 return 256;
3175 }
3176 }
3177
dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type)3178 const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type)
3179 {
3180 switch (bw_type) {
3181 case (dml2_core_internal_bw_sdp):
3182 return("dml2_core_internal_bw_sdp");
3183 case (dml2_core_internal_bw_dram):
3184 return("dml2_core_internal_bw_dram");
3185 case (dml2_core_internal_bw_max):
3186 return("dml2_core_internal_bw_max");
3187 default:
3188 return("dml2_core_internal_bw_unknown");
3189 }
3190 }
3191
dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type)3192 const char *dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type)
3193 {
3194 switch (dml2_core_internal_soc_state_type) {
3195 case (dml2_core_internal_soc_state_sys_idle):
3196 return("dml2_core_internal_soc_state_sys_idle");
3197 case (dml2_core_internal_soc_state_sys_active):
3198 return("dml2_core_internal_soc_state_sys_active");
3199 case (dml2_core_internal_soc_state_svp_prefetch):
3200 return("dml2_core_internal_soc_state_svp_prefetch");
3201 case dml2_core_internal_soc_state_max:
3202 default:
3203 return("dml2_core_internal_soc_state_unknown");
3204 }
3205 }
3206
dml_is_vertical_rotation(enum dml2_rotation_angle Scan)3207 static bool dml_is_vertical_rotation(enum dml2_rotation_angle Scan)
3208 {
3209 bool is_vert = false;
3210 if (Scan == dml2_rotation_90 || Scan == dml2_rotation_270) {
3211 is_vert = true;
3212 } else {
3213 is_vert = false;
3214 }
3215 return is_vert;
3216 }
3217
dml_get_gfx_version(enum dml2_swizzle_mode sw_mode)3218 static int unsigned dml_get_gfx_version(enum dml2_swizzle_mode sw_mode)
3219 {
3220 int unsigned version = 0;
3221
3222 if (sw_mode == dml2_sw_linear ||
3223 sw_mode == dml2_sw_256b_2d ||
3224 sw_mode == dml2_sw_4kb_2d ||
3225 sw_mode == dml2_sw_64kb_2d ||
3226 sw_mode == dml2_sw_256kb_2d) {
3227 version = 12;
3228 } else if (sw_mode == dml2_gfx11_sw_linear ||
3229 sw_mode == dml2_gfx11_sw_64kb_d ||
3230 sw_mode == dml2_gfx11_sw_64kb_d_t ||
3231 sw_mode == dml2_gfx11_sw_64kb_d_x ||
3232 sw_mode == dml2_gfx11_sw_64kb_r_x ||
3233 sw_mode == dml2_gfx11_sw_256kb_d_x ||
3234 sw_mode == dml2_gfx11_sw_256kb_r_x) {
3235 version = 11;
3236 } else {
3237 dml2_printf("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode);
3238 DML2_ASSERT(0);
3239 }
3240
3241 return version;
3242 }
3243
CalculateBytePerPixelAndBlockSizes(enum dml2_source_format_class SourcePixelFormat,enum dml2_swizzle_mode SurfaceTiling,unsigned int pitch_y,unsigned int pitch_c,unsigned int * BytePerPixelY,unsigned int * BytePerPixelC,double * BytePerPixelDETY,double * BytePerPixelDETC,unsigned int * BlockHeight256BytesY,unsigned int * BlockHeight256BytesC,unsigned int * BlockWidth256BytesY,unsigned int * BlockWidth256BytesC,unsigned int * MacroTileHeightY,unsigned int * MacroTileHeightC,unsigned int * MacroTileWidthY,unsigned int * MacroTileWidthC,bool * surf_linear128_l,bool * surf_linear128_c)3244 static void CalculateBytePerPixelAndBlockSizes(
3245 enum dml2_source_format_class SourcePixelFormat,
3246 enum dml2_swizzle_mode SurfaceTiling,
3247 unsigned int pitch_y,
3248 unsigned int pitch_c,
3249
3250 // Output
3251 unsigned int *BytePerPixelY,
3252 unsigned int *BytePerPixelC,
3253 double *BytePerPixelDETY,
3254 double *BytePerPixelDETC,
3255 unsigned int *BlockHeight256BytesY,
3256 unsigned int *BlockHeight256BytesC,
3257 unsigned int *BlockWidth256BytesY,
3258 unsigned int *BlockWidth256BytesC,
3259 unsigned int *MacroTileHeightY,
3260 unsigned int *MacroTileHeightC,
3261 unsigned int *MacroTileWidthY,
3262 unsigned int *MacroTileWidthC,
3263 bool *surf_linear128_l,
3264 bool *surf_linear128_c)
3265 {
3266 *BytePerPixelDETY = 0;
3267 *BytePerPixelDETC = 0;
3268 *BytePerPixelY = 0;
3269 *BytePerPixelC = 0;
3270
3271 if (SourcePixelFormat == dml2_444_64) {
3272 *BytePerPixelDETY = 8;
3273 *BytePerPixelDETC = 0;
3274 *BytePerPixelY = 8;
3275 *BytePerPixelC = 0;
3276 } else if (SourcePixelFormat == dml2_444_32 || SourcePixelFormat == dml2_rgbe) {
3277 *BytePerPixelDETY = 4;
3278 *BytePerPixelDETC = 0;
3279 *BytePerPixelY = 4;
3280 *BytePerPixelC = 0;
3281 } else if (SourcePixelFormat == dml2_444_16 || SourcePixelFormat == dml2_mono_16) {
3282 *BytePerPixelDETY = 2;
3283 *BytePerPixelDETC = 0;
3284 *BytePerPixelY = 2;
3285 *BytePerPixelC = 0;
3286 } else if (SourcePixelFormat == dml2_444_8 || SourcePixelFormat == dml2_mono_8) {
3287 *BytePerPixelDETY = 1;
3288 *BytePerPixelDETC = 0;
3289 *BytePerPixelY = 1;
3290 *BytePerPixelC = 0;
3291 } else if (SourcePixelFormat == dml2_rgbe_alpha) {
3292 *BytePerPixelDETY = 4;
3293 *BytePerPixelDETC = 1;
3294 *BytePerPixelY = 4;
3295 *BytePerPixelC = 1;
3296 } else if (SourcePixelFormat == dml2_420_8) {
3297 *BytePerPixelDETY = 1;
3298 *BytePerPixelDETC = 2;
3299 *BytePerPixelY = 1;
3300 *BytePerPixelC = 2;
3301 } else if (SourcePixelFormat == dml2_420_12) {
3302 *BytePerPixelDETY = 2;
3303 *BytePerPixelDETC = 4;
3304 *BytePerPixelY = 2;
3305 *BytePerPixelC = 4;
3306 } else if (SourcePixelFormat == dml2_420_10) {
3307 *BytePerPixelDETY = (double)(4.0 / 3);
3308 *BytePerPixelDETC = (double)(8.0 / 3);
3309 *BytePerPixelY = 2;
3310 *BytePerPixelC = 4;
3311 } else {
3312 dml2_printf("ERROR: DML::%s: SourcePixelFormat = %u not supported!\n", __func__, SourcePixelFormat);
3313 DML2_ASSERT(0);
3314 }
3315
3316 #ifdef __DML_VBA_DEBUG__
3317 dml2_printf("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat);
3318 dml2_printf("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
3319 dml2_printf("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
3320 dml2_printf("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY);
3321 dml2_printf("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC);
3322 dml2_printf("DML::%s: pitch_y = %u\n", __func__, pitch_y);
3323 dml2_printf("DML::%s: pitch_c = %u\n", __func__, pitch_c);
3324 dml2_printf("DML::%s: surf_linear128_l = %u\n", __func__, *surf_linear128_l);
3325 dml2_printf("DML::%s: surf_linear128_c = %u\n", __func__, *surf_linear128_c);
3326 #endif
3327
3328 if (dml_get_gfx_version(SurfaceTiling) == 11) {
3329 *surf_linear128_l = 0;
3330 *surf_linear128_c = 0;
3331 } else {
3332 if (SurfaceTiling == dml2_sw_linear) {
3333 *surf_linear128_l = (((pitch_y * *BytePerPixelY) % 256) != 0);
3334
3335 if (dml2_core_shared_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha)
3336 *surf_linear128_c = (((pitch_c * *BytePerPixelC) % 256) != 0);
3337 }
3338 }
3339
3340 if (!(dml2_core_shared_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha)) {
3341 if (SurfaceTiling == dml2_sw_linear) {
3342 *BlockHeight256BytesY = 1;
3343 } else if (SourcePixelFormat == dml2_444_64) {
3344 *BlockHeight256BytesY = 4;
3345 } else if (SourcePixelFormat == dml2_444_8) {
3346 *BlockHeight256BytesY = 16;
3347 } else {
3348 *BlockHeight256BytesY = 8;
3349 }
3350 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3351 *BlockHeight256BytesC = 0;
3352 *BlockWidth256BytesC = 0;
3353 } else { // dual plane
3354 if (SurfaceTiling == dml2_sw_linear) {
3355 *BlockHeight256BytesY = 1;
3356 *BlockHeight256BytesC = 1;
3357 } else if (SourcePixelFormat == dml2_rgbe_alpha) {
3358 *BlockHeight256BytesY = 8;
3359 *BlockHeight256BytesC = 16;
3360 } else if (SourcePixelFormat == dml2_420_8) {
3361 *BlockHeight256BytesY = 16;
3362 *BlockHeight256BytesC = 8;
3363 } else {
3364 *BlockHeight256BytesY = 8;
3365 *BlockHeight256BytesC = 8;
3366 }
3367 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3368 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3369 }
3370 #ifdef __DML_VBA_DEBUG__
3371 dml2_printf("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY);
3372 dml2_printf("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY);
3373 dml2_printf("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC);
3374 dml2_printf("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC);
3375 #endif
3376
3377 if (dml_get_gfx_version(SurfaceTiling) == 11) {
3378 if (SurfaceTiling == dml2_gfx11_sw_linear) {
3379 *MacroTileHeightY = *BlockHeight256BytesY;
3380 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
3381 *MacroTileHeightC = *BlockHeight256BytesC;
3382 if (*MacroTileHeightC == 0) {
3383 *MacroTileWidthC = 0;
3384 } else {
3385 *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
3386 }
3387 } else if (SurfaceTiling == dml2_gfx11_sw_64kb_d || SurfaceTiling == dml2_gfx11_sw_64kb_d_t || SurfaceTiling == dml2_gfx11_sw_64kb_d_x || SurfaceTiling == dml2_gfx11_sw_64kb_r_x) {
3388 *MacroTileHeightY = 16 * *BlockHeight256BytesY;
3389 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
3390 *MacroTileHeightC = 16 * *BlockHeight256BytesC;
3391 if (*MacroTileHeightC == 0) {
3392 *MacroTileWidthC = 0;
3393 } else {
3394 *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
3395 }
3396 } else {
3397 *MacroTileHeightY = 32 * *BlockHeight256BytesY;
3398 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
3399 *MacroTileHeightC = 32 * *BlockHeight256BytesC;
3400 if (*MacroTileHeightC == 0) {
3401 *MacroTileWidthC = 0;
3402 } else {
3403 *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
3404 }
3405 }
3406 } else {
3407 unsigned int macro_tile_size_bytes = dml_get_tile_block_size_bytes(SurfaceTiling);
3408 unsigned int macro_tile_scale = 1; // macro tile to 256B req scaling
3409
3410 if (SurfaceTiling == dml2_sw_linear) {
3411 macro_tile_scale = 1;
3412 } else if (SurfaceTiling == dml2_sw_4kb_2d) {
3413 macro_tile_scale = 4;
3414 } else if (SurfaceTiling == dml2_sw_64kb_2d) {
3415 macro_tile_scale = 16;
3416 } else if (SurfaceTiling == dml2_sw_256kb_2d) {
3417 macro_tile_scale = 32;
3418 } else {
3419 dml2_printf("ERROR: Invalid SurfaceTiling setting! val=%u\n", SurfaceTiling);
3420 DML2_ASSERT(0);
3421 }
3422
3423 *MacroTileHeightY = macro_tile_scale * *BlockHeight256BytesY;
3424 *MacroTileWidthY = macro_tile_size_bytes / *BytePerPixelY / *MacroTileHeightY;
3425 *MacroTileHeightC = macro_tile_scale * *BlockHeight256BytesC;
3426 if (*MacroTileHeightC == 0) {
3427 *MacroTileWidthC = 0;
3428 } else {
3429 *MacroTileWidthC = macro_tile_size_bytes / *BytePerPixelC / *MacroTileHeightC;
3430 }
3431 }
3432
3433 #ifdef __DML_VBA_DEBUG__
3434 dml2_printf("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY);
3435 dml2_printf("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY);
3436 dml2_printf("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC);
3437 dml2_printf("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC);
3438 #endif
3439 }
3440
CalculateSinglePipeDPPCLKAndSCLThroughput(double HRatio,double HRatioChroma,double VRatio,double VRatioChroma,double MaxDCHUBToPSCLThroughput,double MaxPSCLToLBThroughput,double PixelClock,enum dml2_source_format_class SourcePixelFormat,unsigned int HTaps,unsigned int HTapsChroma,unsigned int VTaps,unsigned int VTapsChroma,double * PSCL_THROUGHPUT,double * PSCL_THROUGHPUT_CHROMA,double * DPPCLKUsingSingleDPP)3441 static void CalculateSinglePipeDPPCLKAndSCLThroughput(
3442 double HRatio,
3443 double HRatioChroma,
3444 double VRatio,
3445 double VRatioChroma,
3446 double MaxDCHUBToPSCLThroughput,
3447 double MaxPSCLToLBThroughput,
3448 double PixelClock,
3449 enum dml2_source_format_class SourcePixelFormat,
3450 unsigned int HTaps,
3451 unsigned int HTapsChroma,
3452 unsigned int VTaps,
3453 unsigned int VTapsChroma,
3454
3455 // Output
3456 double *PSCL_THROUGHPUT,
3457 double *PSCL_THROUGHPUT_CHROMA,
3458 double *DPPCLKUsingSingleDPP)
3459 {
3460 if (HRatio > 1) {
3461 *PSCL_THROUGHPUT = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / math_ceil2((double)HTaps / 6.0, 1.0));
3462 } else {
3463 *PSCL_THROUGHPUT = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
3464 }
3465
3466 double DPPCLKUsingSingleDPPLuma;
3467 double DPPCLKUsingSingleDPPChroma;
3468
3469 DPPCLKUsingSingleDPPLuma = PixelClock * math_max3(VTaps / 6 * math_min2(1, HRatio), HRatio * VRatio / *PSCL_THROUGHPUT, 1);
3470
3471 if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
3472 DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
3473
3474 if (!dml2_core_shared_is_420(SourcePixelFormat) && SourcePixelFormat != dml2_rgbe_alpha) {
3475 *PSCL_THROUGHPUT_CHROMA = 0;
3476 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
3477 } else {
3478 if (HRatioChroma > 1) {
3479 *PSCL_THROUGHPUT_CHROMA = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatioChroma / math_ceil2((double)HTapsChroma / 6.0, 1.0));
3480 } else {
3481 *PSCL_THROUGHPUT_CHROMA = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
3482 }
3483 DPPCLKUsingSingleDPPChroma = PixelClock * math_max3(VTapsChroma / 6 * math_min2(1, HRatioChroma),
3484 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
3485 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
3486 DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
3487 *DPPCLKUsingSingleDPP = math_max2(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
3488 }
3489 }
3490
CalculateSwathWidth(const struct dml2_display_cfg * display_cfg,bool ForceSingleDPP,unsigned int NumberOfActiveSurfaces,enum dml2_odm_mode ODMMode[],unsigned int BytePerPixY[],unsigned int BytePerPixC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],bool surf_linear128_l[],bool surf_linear128_c[],unsigned int DPPPerSurface[],unsigned int req_per_swath_ub_l[],unsigned int req_per_swath_ub_c[],unsigned int SwathWidthSingleDPPY[],unsigned int SwathWidthSingleDPPC[],unsigned int SwathWidthY[],unsigned int SwathWidthC[],unsigned int MaximumSwathHeightY[],unsigned int MaximumSwathHeightC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[])3491 static void CalculateSwathWidth(
3492 const struct dml2_display_cfg *display_cfg,
3493 bool ForceSingleDPP,
3494 unsigned int NumberOfActiveSurfaces,
3495 enum dml2_odm_mode ODMMode[],
3496 unsigned int BytePerPixY[],
3497 unsigned int BytePerPixC[],
3498 unsigned int Read256BytesBlockHeightY[],
3499 unsigned int Read256BytesBlockHeightC[],
3500 unsigned int Read256BytesBlockWidthY[],
3501 unsigned int Read256BytesBlockWidthC[],
3502 bool surf_linear128_l[],
3503 bool surf_linear128_c[],
3504 unsigned int DPPPerSurface[],
3505
3506 // Output
3507 unsigned int req_per_swath_ub_l[],
3508 unsigned int req_per_swath_ub_c[],
3509 unsigned int SwathWidthSingleDPPY[],
3510 unsigned int SwathWidthSingleDPPC[],
3511 unsigned int SwathWidthY[], // per-pipe
3512 unsigned int SwathWidthC[], // per-pipe
3513 unsigned int MaximumSwathHeightY[],
3514 unsigned int MaximumSwathHeightC[],
3515 unsigned int swath_width_luma_ub[], // per-pipe
3516 unsigned int swath_width_chroma_ub[]) // per-pipe
3517 {
3518 enum dml2_odm_mode MainSurfaceODMMode;
3519 double odm_hactive_factor = 1.0;
3520
3521 #ifdef __DML_VBA_DEBUG__
3522 dml2_printf("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
3523 dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
3524 #endif
3525
3526 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
3527 if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
3528 SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
3529 } else {
3530 SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
3531 }
3532
3533 #ifdef __DML_VBA_DEBUG__
3534 dml2_printf("DML::%s: k=%u ViewportWidth=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width);
3535 dml2_printf("DML::%s: k=%u ViewportHeight=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height);
3536 dml2_printf("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
3537 #endif
3538
3539 MainSurfaceODMMode = ODMMode[k];
3540 for (unsigned int j = 0; j < NumberOfActiveSurfaces; ++j) {
3541 if (display_cfg->plane_descriptors[k].stream_index == j) {
3542 MainSurfaceODMMode = ODMMode[j];
3543 }
3544 }
3545
3546 if (ForceSingleDPP) {
3547 SwathWidthY[k] = SwathWidthSingleDPPY[k];
3548 } else {
3549 if (MainSurfaceODMMode == dml2_odm_mode_combine_4to1)
3550 odm_hactive_factor = 4.0;
3551 else if (MainSurfaceODMMode == dml2_odm_mode_combine_3to1)
3552 odm_hactive_factor = 3.0;
3553 else if (MainSurfaceODMMode == dml2_odm_mode_combine_2to1)
3554 odm_hactive_factor = 2.0;
3555
3556 if (MainSurfaceODMMode == dml2_odm_mode_combine_4to1 || MainSurfaceODMMode == dml2_odm_mode_combine_3to1 || MainSurfaceODMMode == dml2_odm_mode_combine_2to1) {
3557 SwathWidthY[k] = (unsigned int)(math_min2((double)SwathWidthSingleDPPY[k], math_round((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active / odm_hactive_factor * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio)));
3558 } else if (DPPPerSurface[k] == 2) {
3559 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
3560 } else {
3561 SwathWidthY[k] = SwathWidthSingleDPPY[k];
3562 }
3563 }
3564
3565 #ifdef __DML_VBA_DEBUG__
3566 dml2_printf("DML::%s: k=%u HActive=%u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active);
3567 dml2_printf("DML::%s: k=%u HRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
3568 dml2_printf("DML::%s: k=%u MainSurfaceODMMode=%u\n", __func__, k, MainSurfaceODMMode);
3569 dml2_printf("DML::%s: k=%u SwathWidthSingleDPPY=%u\n", __func__, k, SwathWidthSingleDPPY[k]);
3570 dml2_printf("DML::%s: k=%u SwathWidthY=%u\n", __func__, k, SwathWidthY[k]);
3571 #endif
3572
3573 if (dml2_core_shared_is_420(display_cfg->plane_descriptors[k].pixel_format)) {
3574 SwathWidthC[k] = SwathWidthY[k] / 2;
3575 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
3576 } else {
3577 SwathWidthC[k] = SwathWidthY[k];
3578 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
3579 }
3580
3581 if (ForceSingleDPP == true) {
3582 SwathWidthY[k] = SwathWidthSingleDPPY[k];
3583 SwathWidthC[k] = SwathWidthSingleDPPC[k];
3584 }
3585
3586 unsigned int req_width_horz_y = Read256BytesBlockWidthY[k];
3587 unsigned int req_width_horz_c = Read256BytesBlockWidthC[k];
3588
3589 if (surf_linear128_l[k])
3590 req_width_horz_y = req_width_horz_y / 2;
3591
3592 if (surf_linear128_c[k])
3593 req_width_horz_c = req_width_horz_c / 2;
3594
3595 unsigned int surface_width_ub_l = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane0.width, req_width_horz_y);
3596 unsigned int surface_height_ub_l = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane0.height, Read256BytesBlockHeightY[k]);
3597 unsigned int surface_width_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.width, req_width_horz_c);
3598 unsigned int surface_height_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.height, Read256BytesBlockHeightC[k]);
3599
3600 #ifdef __DML_VBA_DEBUG__
3601 dml2_printf("DML::%s: k=%u surface_width_ub_l=%u\n", __func__, k, surface_width_ub_l);
3602 dml2_printf("DML::%s: k=%u surface_height_ub_l=%u\n", __func__, k, surface_height_ub_l);
3603 dml2_printf("DML::%s: k=%u surface_width_ub_c=%u\n", __func__, k, surface_width_ub_c);
3604 dml2_printf("DML::%s: k=%u surface_height_ub_c=%u\n", __func__, k, surface_height_ub_c);
3605 dml2_printf("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y);
3606 dml2_printf("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c);
3607 dml2_printf("DML::%s: k=%u Read256BytesBlockWidthY=%u\n", __func__, k, Read256BytesBlockWidthY[k]);
3608 dml2_printf("DML::%s: k=%u Read256BytesBlockHeightY=%u\n", __func__, k, Read256BytesBlockHeightY[k]);
3609 dml2_printf("DML::%s: k=%u Read256BytesBlockWidthC=%u\n", __func__, k, Read256BytesBlockWidthC[k]);
3610 dml2_printf("DML::%s: k=%u Read256BytesBlockHeightC=%u\n", __func__, k, Read256BytesBlockHeightC[k]);
3611 dml2_printf("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y);
3612 dml2_printf("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c);
3613 dml2_printf("DML::%s: k=%u ViewportStationary=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.stationary);
3614 dml2_printf("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
3615 #endif
3616
3617 req_per_swath_ub_l[k] = 0;
3618 req_per_swath_ub_c[k] = 0;
3619 if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
3620 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
3621 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
3622 if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
3623 swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_width_ub_l, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start + SwathWidthY[k] + req_width_horz_y - 1, req_width_horz_y) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start, req_width_horz_y)));
3624 } else {
3625 swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_width_ub_l, math_ceil2((double)SwathWidthY[k] - 1, req_width_horz_y) + req_width_horz_y));
3626 }
3627 req_per_swath_ub_l[k] = swath_width_luma_ub[k] / req_width_horz_y;
3628
3629 if (BytePerPixC[k] > 0) {
3630 if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
3631 swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_width_ub_c, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start + SwathWidthC[k] + req_width_horz_c - 1, req_width_horz_c) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start, req_width_horz_c)));
3632 } else {
3633 swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_width_ub_c, math_ceil2((double)SwathWidthC[k] - 1, req_width_horz_c) + req_width_horz_c));
3634 }
3635 req_per_swath_ub_c[k] = swath_width_chroma_ub[k] / req_width_horz_c;
3636 } else {
3637 swath_width_chroma_ub[k] = 0;
3638 }
3639 } else {
3640 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
3641 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
3642
3643 if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
3644 swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_height_ub_l, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start + SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, Read256BytesBlockHeightY[k]) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start, Read256BytesBlockHeightY[k])));
3645 } else {
3646 swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_height_ub_l, math_ceil2((double)SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]));
3647 }
3648 req_per_swath_ub_l[k] = swath_width_luma_ub[k] / Read256BytesBlockHeightY[k];
3649 if (BytePerPixC[k] > 0) {
3650 if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
3651 swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_height_ub_c, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start + SwathWidthC[k] + Read256BytesBlockHeightC[k] - 1, Read256BytesBlockHeightC[k]) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start, Read256BytesBlockHeightC[k])));
3652 } else {
3653 swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_height_ub_c, math_ceil2((double)SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]));
3654 }
3655 req_per_swath_ub_c[k] = swath_width_chroma_ub[k] / Read256BytesBlockHeightC[k];
3656 } else {
3657 swath_width_chroma_ub[k] = 0;
3658 }
3659 }
3660
3661 #ifdef __DML_VBA_DEBUG__
3662 dml2_printf("DML::%s: k=%u swath_width_luma_ub=%u\n", __func__, k, swath_width_luma_ub[k]);
3663 dml2_printf("DML::%s: k=%u swath_width_chroma_ub=%u\n", __func__, k, swath_width_chroma_ub[k]);
3664 dml2_printf("DML::%s: k=%u MaximumSwathHeightY=%u\n", __func__, k, MaximumSwathHeightY[k]);
3665 dml2_printf("DML::%s: k=%u MaximumSwathHeightC=%u\n", __func__, k, MaximumSwathHeightC[k]);
3666 dml2_printf("DML::%s: k=%u req_per_swath_ub_l=%u\n", __func__, k, req_per_swath_ub_l[k]);
3667 dml2_printf("DML::%s: k=%u req_per_swath_ub_c=%u\n", __func__, k, req_per_swath_ub_c[k]);
3668 #endif
3669
3670 }
3671 }
3672
UnboundedRequest(bool unb_req_force_en,bool unb_req_force_val,unsigned int TotalNumberOfActiveDPP,bool NoChromaOrLinear)3673 static bool UnboundedRequest(bool unb_req_force_en, bool unb_req_force_val, unsigned int TotalNumberOfActiveDPP, bool NoChromaOrLinear)
3674 {
3675 bool unb_req_ok = false;
3676 bool unb_req_en = false;
3677
3678 unb_req_ok = (TotalNumberOfActiveDPP == 1 && NoChromaOrLinear);
3679 unb_req_en = unb_req_ok;
3680
3681 if (unb_req_force_en) {
3682 unb_req_en = unb_req_force_val && unb_req_ok;
3683 }
3684 #ifdef __DML_VBA_DEBUG__
3685 dml2_printf("DML::%s: unb_req_force_en = %u\n", __func__, unb_req_force_en);
3686 dml2_printf("DML::%s: unb_req_force_val = %u\n", __func__, unb_req_force_val);
3687 dml2_printf("DML::%s: unb_req_ok = %u\n", __func__, unb_req_ok);
3688 dml2_printf("DML::%s: unb_req_en = %u\n", __func__, unb_req_en);
3689 #endif
3690 return (unb_req_en);
3691 }
3692
CalculateDETBufferSize(struct dml2_core_shared_calculate_det_buffer_size_params * p)3693 static void CalculateDETBufferSize(struct dml2_core_shared_calculate_det_buffer_size_params *p)
3694 {
3695 unsigned int DETBufferSizePoolInKByte;
3696 unsigned int NextDETBufferPieceInKByte;
3697 bool DETPieceAssignedToThisSurfaceAlready[DML2_MAX_PLANES];
3698 bool NextPotentialSurfaceToAssignDETPieceFound;
3699 unsigned int NextSurfaceToAssignDETPiece;
3700 double TotalBandwidth;
3701 double BandwidthOfSurfacesNotAssignedDETPiece;
3702 unsigned int max_minDET;
3703 unsigned int minDET;
3704 unsigned int minDET_pipe;
3705 unsigned int TotalBandwidthPerStream[DML2_MAX_PLANES] = { 0 };
3706 unsigned int TotalPixelRate = 0;
3707 unsigned int DETBudgetPerStream[DML2_MAX_PLANES] = { 0 };
3708 unsigned int RemainingDETBudgetPerStream[DML2_MAX_PLANES] = { 0 };
3709 unsigned int IdealDETBudget, DeltaDETBudget;
3710 bool MinimizeReallocationSuccess = false;
3711
3712 #ifdef __DML_VBA_DEBUG__
3713 dml2_printf("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP);
3714 dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte);
3715 dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, p->NumberOfActiveSurfaces);
3716 dml2_printf("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled);
3717 dml2_printf("DML::%s: MaxTotalDETInKByte = %u\n", __func__, p->MaxTotalDETInKByte);
3718 dml2_printf("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte);
3719 dml2_printf("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, p->MinCompressedBufferSizeInKByte);
3720 dml2_printf("DML::%s: CompressedBufferSegmentSizeInkByte = %u\n", __func__, p->CompressedBufferSegmentSizeInkByte);
3721 #endif
3722
3723 // Note: Will use default det size if that fits 2 swaths
3724 if (p->UnboundedRequestEnabled) {
3725 if (p->display_cfg->plane_descriptors[0].overrides.det_size_override_kb > 0) {
3726 p->DETBufferSizeInKByte[0] = p->display_cfg->plane_descriptors[0].overrides.det_size_override_kb;
3727 } else {
3728 p->DETBufferSizeInKByte[0] = (unsigned int)math_max2(128.0, math_ceil2(2.0 * ((double)p->full_swath_bytes_l[0] + (double)p->full_swath_bytes_c[0]) / 1024.0, p->ConfigReturnBufferSegmentSizeInkByte));
3729 }
3730 *p->CompressedBufferSizeInkByte = p->ConfigReturnBufferSizeInKByte - p->DETBufferSizeInKByte[0];
3731 } else {
3732 DETBufferSizePoolInKByte = p->MaxTotalDETInKByte;
3733 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3734 p->DETBufferSizeInKByte[k] = 0;
3735 if (dml2_core_shared_is_420(p->display_cfg->plane_descriptors[k].pixel_format)) {
3736 max_minDET = p->nomDETInKByte - p->ConfigReturnBufferSegmentSizeInkByte;
3737 } else {
3738 max_minDET = p->nomDETInKByte;
3739 }
3740 minDET = 128;
3741 minDET_pipe = 0;
3742
3743 // add DET resource until can hold 2 full swaths
3744 while (minDET <= max_minDET && minDET_pipe == 0) {
3745 if (2.0 * ((double)p->full_swath_bytes_l[k] + (double)p->full_swath_bytes_c[k]) / 1024.0 <= minDET)
3746 minDET_pipe = minDET;
3747 minDET = minDET + p->ConfigReturnBufferSegmentSizeInkByte;
3748 }
3749
3750 #ifdef __DML_VBA_DEBUG__
3751 dml2_printf("DML::%s: k=%u minDET = %u\n", __func__, k, minDET);
3752 dml2_printf("DML::%s: k=%u max_minDET = %u\n", __func__, k, max_minDET);
3753 dml2_printf("DML::%s: k=%u minDET_pipe = %u\n", __func__, k, minDET_pipe);
3754 dml2_printf("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]);
3755 dml2_printf("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]);
3756 #endif
3757
3758 if (minDET_pipe == 0) {
3759 minDET_pipe = (unsigned int)(math_max2(128, math_ceil2(((double)p->full_swath_bytes_l[k] + (double)p->full_swath_bytes_c[k]) / 1024.0, p->ConfigReturnBufferSegmentSizeInkByte)));
3760 #ifdef __DML_VBA_DEBUG__
3761 dml2_printf("DML::%s: k=%u minDET_pipe = %u (assume each plane take half DET)\n", __func__, k, minDET_pipe);
3762 #endif
3763 }
3764
3765 if (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
3766 p->DETBufferSizeInKByte[k] = 0;
3767 } else if (p->display_cfg->plane_descriptors[k].overrides.det_size_override_kb > 0) {
3768 p->DETBufferSizeInKByte[k] = p->display_cfg->plane_descriptors[k].overrides.det_size_override_kb;
3769 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - (p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]) * p->display_cfg->plane_descriptors[k].overrides.det_size_override_kb;
3770 } else if ((p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
3771 p->DETBufferSizeInKByte[k] = minDET_pipe;
3772 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - (p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]) * minDET_pipe;
3773 }
3774
3775 #ifdef __DML_VBA_DEBUG__
3776 dml2_printf("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]);
3777 dml2_printf("DML::%s: k=%u DETSizeOverride = %u\n", __func__, k, p->display_cfg->plane_descriptors[k].overrides.det_size_override_kb);
3778 dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]);
3779 dml2_printf("DML::%s: DETBufferSizePoolInKByte = %u\n", __func__, DETBufferSizePoolInKByte);
3780 #endif
3781 }
3782
3783 if (p->display_cfg->minimize_det_reallocation) {
3784 MinimizeReallocationSuccess = true;
3785 // To minimize det reallocation, we don't distribute based on each surfaces bandwidth proportional to the global
3786 // but rather distribute DET across streams proportionally based on pixel rate, and only distribute based on
3787 // bandwidth between the planes on the same stream. This ensures that large scale re-distribution only on a
3788 // stream count and/or pixel rate change, which is must less likely then general bandwidth changes per plane.
3789
3790 // Calculate total pixel rate
3791 for (unsigned int k = 0; k < p->display_cfg->num_streams; ++k) {
3792 TotalPixelRate += p->display_cfg->stream_descriptors[k].timing.pixel_clock_khz;
3793 }
3794
3795 // Calculate per stream DET budget
3796 for (unsigned int k = 0; k < p->display_cfg->num_streams; ++k) {
3797 DETBudgetPerStream[k] = (unsigned int)((double)p->display_cfg->stream_descriptors[k].timing.pixel_clock_khz * p->MaxTotalDETInKByte / TotalPixelRate);
3798 RemainingDETBudgetPerStream[k] = DETBudgetPerStream[k];
3799 }
3800
3801 // Calculate the per stream total bandwidth
3802 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3803 if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
3804 TotalBandwidthPerStream[p->display_cfg->plane_descriptors[k].stream_index] += (unsigned int)(p->ReadBandwidthLuma[k] + p->ReadBandwidthChroma[k]);
3805
3806 // Check the minimum can be satisfied by budget
3807 if (RemainingDETBudgetPerStream[p->display_cfg->plane_descriptors[k].stream_index] >= p->DETBufferSizeInKByte[k]) {
3808 RemainingDETBudgetPerStream[p->display_cfg->plane_descriptors[k].stream_index] -= p->DETBufferSizeInKByte[k];
3809 } else {
3810 MinimizeReallocationSuccess = false;
3811 break;
3812 }
3813 }
3814 }
3815
3816 if (MinimizeReallocationSuccess) {
3817 // Since a fixed budget per stream is sufficient to satisfy the minimums, just re-distribute each streams
3818 // budget proportionally across its planes
3819
3820 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3821 if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
3822 IdealDETBudget = (unsigned int)(((p->ReadBandwidthLuma[k] + p->ReadBandwidthChroma[k]) / TotalBandwidthPerStream[p->display_cfg->plane_descriptors[k].stream_index])
3823 * DETBudgetPerStream[p->display_cfg->plane_descriptors[k].stream_index]);
3824
3825 if (IdealDETBudget > p->DETBufferSizeInKByte[k]) {
3826 DeltaDETBudget = IdealDETBudget - p->DETBufferSizeInKByte[k];
3827 if (DeltaDETBudget > RemainingDETBudgetPerStream[p->display_cfg->plane_descriptors[k].stream_index])
3828 DeltaDETBudget = RemainingDETBudgetPerStream[p->display_cfg->plane_descriptors[k].stream_index];
3829
3830 p->DETBufferSizeInKByte[k] += DeltaDETBudget;
3831 RemainingDETBudgetPerStream[p->display_cfg->plane_descriptors[k].stream_index] -= DeltaDETBudget;
3832 }
3833
3834 // Split among the pipes per the plane
3835 p->DETBufferSizeInKByte[k] = (unsigned int)((double)p->DETBufferSizeInKByte[k] / (p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]));
3836
3837 // Round down to segment size
3838 p->DETBufferSizeInKByte[k] = (p->DETBufferSizeInKByte[k] / p->CompressedBufferSegmentSizeInkByte) * p->CompressedBufferSegmentSizeInkByte;
3839 }
3840 }
3841 }
3842 }
3843
3844 if (!MinimizeReallocationSuccess) {
3845 TotalBandwidth = 0;
3846 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3847 if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
3848 TotalBandwidth = TotalBandwidth + p->ReadBandwidthLuma[k] + p->ReadBandwidthChroma[k];
3849 }
3850 }
3851 #ifdef __DML_VBA_DEBUG__
3852 dml2_printf("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
3853 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3854 dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]);
3855 }
3856 dml2_printf("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
3857 #endif
3858 dml2_printf("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
3859 BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
3860 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3861
3862 if (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
3863 DETPieceAssignedToThisSurfaceAlready[k] = true;
3864 } else if (p->display_cfg->plane_descriptors[k].overrides.det_size_override_kb > 0 || (((double)(p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]) * (double)p->DETBufferSizeInKByte[k] / (double)p->MaxTotalDETInKByte) >= ((p->ReadBandwidthLuma[k] + p->ReadBandwidthChroma[k]) / TotalBandwidth))) {
3865 DETPieceAssignedToThisSurfaceAlready[k] = true;
3866 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - p->ReadBandwidthLuma[k] - p->ReadBandwidthChroma[k];
3867 } else {
3868 DETPieceAssignedToThisSurfaceAlready[k] = false;
3869 }
3870 #ifdef __DML_VBA_DEBUG__
3871 dml2_printf("DML::%s: k=%u DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, k, DETPieceAssignedToThisSurfaceAlready[k]);
3872 dml2_printf("DML::%s: k=%u BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, BandwidthOfSurfacesNotAssignedDETPiece);
3873 #endif
3874 }
3875
3876 for (unsigned int j = 0; j < p->NumberOfActiveSurfaces; ++j) {
3877 NextPotentialSurfaceToAssignDETPieceFound = false;
3878 NextSurfaceToAssignDETPiece = 0;
3879
3880 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3881 #ifdef __DML_VBA_DEBUG__
3882 dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthLuma[k] = %f\n", __func__, j, k, p->ReadBandwidthLuma[k]);
3883 dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthChroma[k] = %f\n", __func__, j, k, p->ReadBandwidthChroma[k]);
3884 dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, p->ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
3885 dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, p->ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
3886 dml2_printf("DML::%s: j=%u k=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, k, NextSurfaceToAssignDETPiece);
3887 #endif
3888 if (!DETPieceAssignedToThisSurfaceAlready[k] && (!NextPotentialSurfaceToAssignDETPieceFound ||
3889 p->ReadBandwidthLuma[k] + p->ReadBandwidthChroma[k] < p->ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + p->ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
3890 NextSurfaceToAssignDETPiece = k;
3891 NextPotentialSurfaceToAssignDETPieceFound = true;
3892 }
3893 #ifdef __DML_VBA_DEBUG__
3894 dml2_printf("DML::%s: j=%u k=%u, DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
3895 dml2_printf("DML::%s: j=%u k=%u, NextPotentialSurfaceToAssignDETPieceFound = %u\n", __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
3896 #endif
3897 }
3898
3899 if (NextPotentialSurfaceToAssignDETPieceFound) {
3900 NextDETBufferPieceInKByte = (unsigned int)(math_min2(
3901 math_round((double)DETBufferSizePoolInKByte * (p->ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + p->ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / BandwidthOfSurfacesNotAssignedDETPiece /
3902 ((p->ForceSingleDPP ? 1 : p->DPPPerSurface[NextSurfaceToAssignDETPiece]) * p->ConfigReturnBufferSegmentSizeInkByte))
3903 * (p->ForceSingleDPP ? 1 : p->DPPPerSurface[NextSurfaceToAssignDETPiece]) * p->ConfigReturnBufferSegmentSizeInkByte,
3904 math_floor2((double)DETBufferSizePoolInKByte, (p->ForceSingleDPP ? 1 : p->DPPPerSurface[NextSurfaceToAssignDETPiece]) * p->ConfigReturnBufferSegmentSizeInkByte)));
3905
3906 #ifdef __DML_VBA_DEBUG__
3907 dml2_printf("DML::%s: j=%u, DETBufferSizePoolInKByte = %u\n", __func__, j, DETBufferSizePoolInKByte);
3908 dml2_printf("DML::%s: j=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, NextSurfaceToAssignDETPiece);
3909 dml2_printf("DML::%s: j=%u, ReadBandwidthLuma[%u] = %f\n", __func__, j, NextSurfaceToAssignDETPiece, p->ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
3910 dml2_printf("DML::%s: j=%u, ReadBandwidthChroma[%u] = %f\n", __func__, j, NextSurfaceToAssignDETPiece, p->ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
3911 dml2_printf("DML::%s: j=%u, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
3912 dml2_printf("DML::%s: j=%u, NextDETBufferPieceInKByte = %u\n", __func__, j, NextDETBufferPieceInKByte);
3913 dml2_printf("DML::%s: j=%u, DETBufferSizeInKByte[%u] increases from %u ", __func__, j, NextSurfaceToAssignDETPiece, p->DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
3914 #endif
3915
3916 p->DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] = p->DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] + NextDETBufferPieceInKByte / (p->ForceSingleDPP ? 1 : p->DPPPerSurface[NextSurfaceToAssignDETPiece]);
3917 #ifdef __DML_VBA_DEBUG__
3918 dml2_printf("to %u\n", p->DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
3919 #endif
3920
3921 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
3922 DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
3923 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - (p->ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + p->ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
3924 }
3925 }
3926 }
3927 *p->CompressedBufferSizeInkByte = p->MinCompressedBufferSizeInKByte;
3928 }
3929 *p->CompressedBufferSizeInkByte = *p->CompressedBufferSizeInkByte * p->CompressedBufferSegmentSizeInkByte / p->ConfigReturnBufferSegmentSizeInkByte;
3930
3931 #ifdef __DML_VBA_DEBUG__
3932 dml2_printf("DML::%s: --- After bandwidth adjustment ---\n", __func__);
3933 dml2_printf("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte);
3934 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3935 dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u (TotalReadBandWidth=%f)\n", __func__, k, p->DETBufferSizeInKByte[k], p->ReadBandwidthLuma[k] + p->ReadBandwidthChroma[k]);
3936 }
3937 #endif
3938 }
3939
CalculateRequiredDispclk(enum dml2_odm_mode ODMMode,double PixelClock)3940 static double CalculateRequiredDispclk(
3941 enum dml2_odm_mode ODMMode,
3942 double PixelClock)
3943 {
3944
3945 if (ODMMode == dml2_odm_mode_combine_4to1) {
3946 return PixelClock / 4.0;
3947 } else if (ODMMode == dml2_odm_mode_combine_3to1) {
3948 return PixelClock / 3.0;
3949 } else if (ODMMode == dml2_odm_mode_combine_2to1) {
3950 return PixelClock / 2.0;
3951 } else {
3952 return PixelClock;
3953 }
3954 }
3955
TruncToValidBPP(struct dml2_core_shared_TruncToValidBPP_locals * l,double LinkBitRate,unsigned int Lanes,unsigned int HTotal,unsigned int HActive,double PixelClock,double DesiredBPP,bool DSCEnable,enum dml2_output_encoder_class Output,enum dml2_output_format_class Format,unsigned int DSCInputBitPerComponent,unsigned int DSCSlices,unsigned int AudioRate,unsigned int AudioLayout,enum dml2_odm_mode ODMModeNoDSC,enum dml2_odm_mode ODMModeDSC,unsigned int * RequiredSlots)3956 static double TruncToValidBPP(
3957 struct dml2_core_shared_TruncToValidBPP_locals *l,
3958 double LinkBitRate,
3959 unsigned int Lanes,
3960 unsigned int HTotal,
3961 unsigned int HActive,
3962 double PixelClock,
3963 double DesiredBPP,
3964 bool DSCEnable,
3965 enum dml2_output_encoder_class Output,
3966 enum dml2_output_format_class Format,
3967 unsigned int DSCInputBitPerComponent,
3968 unsigned int DSCSlices,
3969 unsigned int AudioRate,
3970 unsigned int AudioLayout,
3971 enum dml2_odm_mode ODMModeNoDSC,
3972 enum dml2_odm_mode ODMModeDSC,
3973
3974 // Output
3975 unsigned int *RequiredSlots)
3976 {
3977 double MaxLinkBPP;
3978 unsigned int MinDSCBPP;
3979 double MaxDSCBPP;
3980 unsigned int NonDSCBPP0;
3981 unsigned int NonDSCBPP1;
3982 unsigned int NonDSCBPP2;
3983 enum dml2_odm_mode ODMMode;
3984
3985 if (Format == dml2_420) {
3986 NonDSCBPP0 = 12;
3987 NonDSCBPP1 = 15;
3988 NonDSCBPP2 = 18;
3989 MinDSCBPP = 6;
3990 MaxDSCBPP = 16;
3991 } else if (Format == dml2_444) {
3992 NonDSCBPP0 = 24;
3993 NonDSCBPP1 = 30;
3994 NonDSCBPP2 = 36;
3995 MinDSCBPP = 8;
3996 MaxDSCBPP = 16;
3997 } else {
3998 if (Output == dml2_hdmi || Output == dml2_hdmifrl) {
3999 NonDSCBPP0 = 24;
4000 NonDSCBPP1 = 24;
4001 NonDSCBPP2 = 24;
4002 } else {
4003 NonDSCBPP0 = 16;
4004 NonDSCBPP1 = 20;
4005 NonDSCBPP2 = 24;
4006 }
4007 if (Format == dml2_n422 || Output == dml2_hdmifrl) {
4008 MinDSCBPP = 7;
4009 MaxDSCBPP = 16;
4010 } else {
4011 MinDSCBPP = 8;
4012 MaxDSCBPP = 16;
4013 }
4014 }
4015 if (Output == dml2_dp2p0) {
4016 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128.0 / 132.0 * 383.0 / 384.0 * 65536.0 / 65540.0;
4017 } else if (DSCEnable && Output == dml2_dp) {
4018 MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock * (1 - 2.4 / 100);
4019 } else {
4020 MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock;
4021 }
4022
4023 ODMMode = DSCEnable ? ODMModeDSC : ODMModeNoDSC;
4024
4025 if (ODMMode == dml2_odm_mode_split_1to2) {
4026 MaxLinkBPP = 2 * MaxLinkBPP;
4027 }
4028
4029 if (DesiredBPP == 0) {
4030 if (DSCEnable) {
4031 if (MaxLinkBPP < MinDSCBPP) {
4032 return __DML2_CALCS_DPP_INVALID__;
4033 } else if (MaxLinkBPP >= MaxDSCBPP) {
4034 return MaxDSCBPP;
4035 } else {
4036 return math_floor2(16.0 * MaxLinkBPP, 1.0) / 16.0;
4037 }
4038 } else {
4039 if (MaxLinkBPP >= NonDSCBPP2) {
4040 return NonDSCBPP2;
4041 } else if (MaxLinkBPP >= NonDSCBPP1) {
4042 return NonDSCBPP1;
4043 } else if (MaxLinkBPP >= NonDSCBPP0) {
4044 return NonDSCBPP0;
4045 } else {
4046 return __DML2_CALCS_DPP_INVALID__;
4047 }
4048 }
4049 } else {
4050 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0)) ||
4051 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
4052 return __DML2_CALCS_DPP_INVALID__;
4053 } else {
4054 return DesiredBPP;
4055 }
4056 }
4057 }
4058
4059 // updated for dcn4
dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum dml2_output_format_class pixelFormat,enum dml2_output_encoder_class Output)4060 static unsigned int dscceComputeDelay(
4061 unsigned int bpc,
4062 double BPP,
4063 unsigned int sliceWidth,
4064 unsigned int numSlices,
4065 enum dml2_output_format_class pixelFormat,
4066 enum dml2_output_encoder_class Output)
4067 {
4068 // valid bpc = source bits per component in the set of {8, 10, 12}
4069 // valid bpp = increments of 1/16 of a bit
4070 // min = 6/7/8 in N420/N422/444, respectively
4071 // max = such that compression is 1:1
4072 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
4073 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
4074 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
4075
4076 // fixed value
4077 unsigned int rcModelSize = 8192;
4078
4079 // N422/N420 operate at 2 pixels per clock
4080 unsigned int pixelsPerClock, padding_pixels, ssm_group_priming_delay, ssm_pipeline_delay, obsm_pipeline_delay, slice_padded_pixels, ixd_plus_padding, ixd_plus_padding_groups, cycles_per_group, group_delay, pipeline_delay, pixels, additional_group_delay, lines_to_reach_ixd, groups_to_reach_ixd, slice_width_groups, initial_xmit_delay, number_of_lines_to_reach_ixd, slice_width_modified;
4081
4082
4083 if (pixelFormat == dml2_420)
4084 pixelsPerClock = 2;
4085 // #all other modes operate at 1 pixel per clock
4086 else if (pixelFormat == dml2_444)
4087 pixelsPerClock = 1;
4088 else if (pixelFormat == dml2_n422 || Output == dml2_hdmifrl)
4089 pixelsPerClock = 2;
4090 else
4091 pixelsPerClock = 1;
4092
4093 //initial transmit delay as per PPS
4094 initial_xmit_delay = (unsigned int)(math_round(rcModelSize / 2.0 / BPP / pixelsPerClock));
4095
4096 //slice width as seen by dscc_bcl in pixels or pixels pairs (depending on number of pixels per pixel container based on pixel format)
4097 slice_width_modified = (pixelFormat == dml2_444 || pixelFormat == dml2_420 || Output == dml2_hdmifrl) ? sliceWidth / 2 : sliceWidth;
4098
4099 padding_pixels = ((slice_width_modified % 3) != 0) ? (3 - (slice_width_modified % 3)) * (initial_xmit_delay / slice_width_modified) : 0;
4100
4101 if ((3.0 * pixelsPerClock * BPP) >= ((double)((initial_xmit_delay + 2) / 3) * (double)(3 + (pixelFormat == dml2_n422)))) {
4102 if ((initial_xmit_delay + padding_pixels) % 3 == 1) {
4103 initial_xmit_delay++;
4104 }
4105 }
4106
4107
4108 //sub-stream multiplexer balance fifo priming delay in groups as per dsc standard
4109 if (bpc == 8)
4110 ssm_group_priming_delay = 83;
4111 else if (bpc == 10)
4112 ssm_group_priming_delay = 91;
4113 else if (bpc == 12)
4114 ssm_group_priming_delay = 115;
4115 else if (bpc == 14)
4116 ssm_group_priming_delay = 123;
4117 else
4118 ssm_group_priming_delay = 128;
4119
4120 //slice width in groups is rounded up to the nearest group as DSC adds padded pixels such that there are an integer number of groups per slice
4121 slice_width_groups = (slice_width_modified + 2) / 3;
4122
4123 //determine number of padded pixels in the last group of a slice line, computed as
4124 slice_padded_pixels = 3 * slice_width_groups - slice_width_modified;
4125
4126
4127
4128
4129 //determine integer number of complete slice lines required to reach initial transmit delay without ssm delay considered
4130 number_of_lines_to_reach_ixd = initial_xmit_delay / slice_width_modified;
4131
4132 //increase initial transmit delay by the number of padded pixels added to a slice line multipled by the integer number of complete lines to reach initial transmit delay
4133 //this step is necessary as each padded pixel added takes up a clock cycle and, therefore, adds to the overall delay
4134 ixd_plus_padding = initial_xmit_delay + slice_padded_pixels * number_of_lines_to_reach_ixd;
4135
4136 //convert the padded initial transmit delay from pixels to groups by rounding up to the nearest group as DSC processes in groups of pixels
4137 ixd_plus_padding_groups = (ixd_plus_padding + 2) / 3;
4138
4139 //number of groups required for a slice to reach initial transmit delay is the sum of the padded initial transmit delay plus the ssm group priming delay
4140 groups_to_reach_ixd = ixd_plus_padding_groups + ssm_group_priming_delay;
4141
4142
4143 //number of lines required to reach padded initial transmit delay in groups in slices to the left of the last horizontal slice
4144 //needs to be rounded up as a complete slice lines are buffered prior to initial transmit delay being reached in the last horizontal slice
4145 lines_to_reach_ixd = (groups_to_reach_ixd + slice_width_groups - 1) / slice_width_groups; //round up lines to reach ixd to next
4146
4147 //determine if there are non-zero number of pixels reached in the group where initial transmit delay is reached
4148 //an additional group time (i.e., 3 pixel times) is required before the first output if there are no additional pixels beyond initial transmit delay
4149 additional_group_delay = ((initial_xmit_delay - number_of_lines_to_reach_ixd * slice_width_modified) % 3) == 0 ? 1 : 0;
4150
4151 //number of pipeline delay cycles in the ssm block (can be determined empirically or analytically by inspecting the ssm block)
4152 ssm_pipeline_delay = 2;
4153
4154 //number of pipe delay cycles in the obsm block (can be determined empirically or analytically by inspecting the obsm block)
4155 obsm_pipeline_delay = 1;
4156
4157 //a group of pixels is worth 6 pixels in N422/N420 mode or 3 pixels in all other modes
4158 if (pixelFormat == dml2_420 || pixelFormat == dml2_444 || pixelFormat == dml2_n422 || Output == dml2_hdmifrl)
4159 cycles_per_group = 6;
4160 else
4161 cycles_per_group = 3;
4162 //delay of the bit stream contruction layer in pixels is the sum of:
4163 //1. number of pixel containers in a slice line multipled by the number of lines required to reach initial transmit delay multipled by number of slices to the left of the last horizontal slice
4164 //2. number of pixel containers required to reach initial transmit delay (specifically, in the last horizontal slice)
4165 //3. additional group of delay if initial transmit delay is reached exactly in a group
4166 //4. ssm and obsm pipeline delay (i.e., clock cycles of delay)
4167 group_delay = (lines_to_reach_ixd * slice_width_groups * (numSlices - 1)) + groups_to_reach_ixd + additional_group_delay;
4168 pipeline_delay = ssm_pipeline_delay + obsm_pipeline_delay;
4169
4170 //pixel delay is group_delay (converted to pixels) + pipeline, however, first group is a special case since it is processed as soon as it arrives (i.e., in 3 cycles regardless of pixel format)
4171 pixels = (group_delay - 1) * cycles_per_group + 3 + pipeline_delay;
4172
4173 #ifdef __DML_VBA_DEBUG__
4174 dml2_printf("DML::%s: bpc: %u\n", __func__, bpc);
4175 dml2_printf("DML::%s: BPP: %f\n", __func__, BPP);
4176 dml2_printf("DML::%s: sliceWidth: %u\n", __func__, sliceWidth);
4177 dml2_printf("DML::%s: numSlices: %u\n", __func__, numSlices);
4178 dml2_printf("DML::%s: pixelFormat: %u\n", __func__, pixelFormat);
4179 dml2_printf("DML::%s: Output: %u\n", __func__, Output);
4180 dml2_printf("DML::%s: pixels: %u\n", __func__, pixels);
4181 #endif
4182 return pixels;
4183 }
4184
4185
4186 //updated in dcn4
dscComputeDelay(enum dml2_output_format_class pixelFormat,enum dml2_output_encoder_class Output)4187 static unsigned int dscComputeDelay(enum dml2_output_format_class pixelFormat, enum dml2_output_encoder_class Output)
4188 {
4189 unsigned int Delay = 0;
4190 unsigned int dispclk_per_dscclk = 3;
4191
4192 // sfr
4193 Delay = Delay + 2;
4194
4195 if (pixelFormat == dml2_420 || pixelFormat == dml2_n422 || (Output == dml2_hdmifrl && pixelFormat != dml2_444)) {
4196 dispclk_per_dscclk = 3 * 2;
4197 }
4198
4199 if (pixelFormat == dml2_420) {
4200 //dscc top delay for pixel compression layer
4201 Delay = Delay + 16 * dispclk_per_dscclk;
4202
4203 // dscc - input deserializer
4204 Delay = Delay + 5;
4205
4206 // dscc - input cdc fifo
4207 Delay = Delay + 1 + 4 * dispclk_per_dscclk;
4208
4209 // dscc - output cdc fifo
4210 Delay = Delay + 3 + 1 * dispclk_per_dscclk;
4211
4212 // dscc - cdc uncertainty
4213 Delay = Delay + 3 + 3 * dispclk_per_dscclk;
4214 } else if (pixelFormat == dml2_n422 || (Output == dml2_hdmifrl && pixelFormat != dml2_444)) {
4215 //dscc top delay for pixel compression layer
4216 Delay = Delay + 16 * dispclk_per_dscclk;
4217 // dsccif
4218 Delay = Delay + 1;
4219 // dscc - input deserializer
4220 Delay = Delay + 5;
4221 // dscc - input cdc fifo
4222 Delay = Delay + 1 + 4 * dispclk_per_dscclk;
4223
4224
4225 // dscc - output cdc fifo
4226 Delay = Delay + 3 + 1 * dispclk_per_dscclk;
4227 // dscc - cdc uncertainty
4228 Delay = Delay + 3 + 3 * dispclk_per_dscclk;
4229 } else if (pixelFormat == dml2_s422) {
4230 //dscc top delay for pixel compression layer
4231 Delay = Delay + 17 * dispclk_per_dscclk;
4232
4233 // dscc - input deserializer
4234 Delay = Delay + 3;
4235 // dscc - input cdc fifo
4236 Delay = Delay + 1 + 4 * dispclk_per_dscclk;
4237 // dscc - output cdc fifo
4238 Delay = Delay + 3 + 1 * dispclk_per_dscclk;
4239 // dscc - cdc uncertainty
4240 Delay = Delay + 3 + 3 * dispclk_per_dscclk;
4241 } else {
4242 //dscc top delay for pixel compression layer
4243 Delay = Delay + 16 * dispclk_per_dscclk;
4244 // dscc - input deserializer
4245 Delay = Delay + 3;
4246 // dscc - input cdc fifo
4247 Delay = Delay + 1 + 4 * dispclk_per_dscclk;
4248 // dscc - output cdc fifo
4249 Delay = Delay + 3 + 1 * dispclk_per_dscclk;
4250
4251 // dscc - cdc uncertainty
4252 Delay = Delay + 3 + 3 * dispclk_per_dscclk;
4253 }
4254
4255 // sft
4256 Delay = Delay + 1;
4257 #ifdef __DML_VBA_DEBUG__
4258 dml2_printf("DML::%s: pixelFormat = %u\n", __func__, pixelFormat);
4259 dml2_printf("DML::%s: Delay = %u\n", __func__, Delay);
4260 #endif
4261
4262 return Delay;
4263 }
4264
CalculateHostVMDynamicLevels(bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels)4265 static unsigned int CalculateHostVMDynamicLevels(
4266 bool GPUVMEnable,
4267 bool HostVMEnable,
4268 unsigned int HostVMMinPageSize,
4269 unsigned int HostVMMaxNonCachedPageTableLevels)
4270 {
4271 unsigned int HostVMDynamicLevels = 0;
4272
4273 if (GPUVMEnable && HostVMEnable) {
4274 if (HostVMMinPageSize < 2048)
4275 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
4276 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
4277 HostVMDynamicLevels = (unsigned int)math_max2(0, (double)HostVMMaxNonCachedPageTableLevels - 1);
4278 else
4279 HostVMDynamicLevels = (unsigned int)math_max2(0, (double)HostVMMaxNonCachedPageTableLevels - 2);
4280 } else {
4281 HostVMDynamicLevels = 0;
4282 }
4283 return HostVMDynamicLevels;
4284 }
4285
CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_and_row_bytes_params * p)4286 static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_and_row_bytes_params *p)
4287 {
4288 unsigned int extra_dpde_bytes;
4289 unsigned int extra_mpde_bytes;
4290 unsigned int MacroTileSizeBytes;
4291 unsigned int vp_height_dpte_ub;
4292
4293 unsigned int meta_surface_bytes;
4294 unsigned int vm_bytes;
4295 unsigned int vp_height_meta_ub;
4296
4297 *p->MetaRequestHeight = 8 * p->BlockHeight256Bytes;
4298 *p->MetaRequestWidth = 8 * p->BlockWidth256Bytes;
4299 if (p->SurfaceTiling == dml2_sw_linear) {
4300 *p->meta_row_height = 32;
4301 *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->MetaRequestWidth - 1, *p->MetaRequestWidth) - math_floor2(p->ViewportXStart, *p->MetaRequestWidth));
4302 *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestHeight * p->BytePerPixel / 256.0); // FIXME_DCN4SW missing in old code but no dcc for linear anyways?
4303 } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
4304 *p->meta_row_height = *p->MetaRequestHeight;
4305 if (p->ViewportStationary && p->NumberOfDPPs == 1) {
4306 *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->MetaRequestWidth - 1, *p->MetaRequestWidth) - math_floor2(p->ViewportXStart, *p->MetaRequestWidth));
4307 } else {
4308 *p->meta_row_width = (unsigned int)(math_ceil2(p->SwathWidth - 1, *p->MetaRequestWidth) + *p->MetaRequestWidth);
4309 }
4310 *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestHeight * p->BytePerPixel / 256.0);
4311 } else {
4312 *p->meta_row_height = *p->MetaRequestWidth;
4313 if (p->ViewportStationary && p->NumberOfDPPs == 1) {
4314 *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + *p->MetaRequestHeight - 1, *p->MetaRequestHeight) - math_floor2(p->ViewportYStart, *p->MetaRequestHeight));
4315 } else {
4316 *p->meta_row_width = (unsigned int)(math_ceil2(p->SwathWidth - 1, *p->MetaRequestHeight) + *p->MetaRequestHeight);
4317 }
4318 *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestWidth * p->BytePerPixel / 256.0);
4319 }
4320
4321 if (p->ViewportStationary && p->is_phantom && (p->NumberOfDPPs == 1 || !dml_is_vertical_rotation(p->RotationAngle))) {
4322 vp_height_meta_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + 64 * p->BlockHeight256Bytes - 1, 64 * p->BlockHeight256Bytes) - math_floor2(p->ViewportYStart, 64 * p->BlockHeight256Bytes));
4323 } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
4324 vp_height_meta_ub = (unsigned int)(math_ceil2(p->ViewportHeight - 1, 64 * p->BlockHeight256Bytes) + 64 * p->BlockHeight256Bytes);
4325 } else {
4326 vp_height_meta_ub = (unsigned int)(math_ceil2(p->SwathWidth - 1, 64 * p->BlockHeight256Bytes) + 64 * p->BlockHeight256Bytes);
4327 }
4328
4329 meta_surface_bytes = (unsigned int)(p->DCCMetaPitch * vp_height_meta_ub * p->BytePerPixel / 256.0);
4330 #ifdef __DML_VBA_DEBUG__
4331 dml2_printf("DML::%s: DCCMetaPitch = %u\n", __func__, p->DCCMetaPitch);
4332 dml2_printf("DML::%s: meta_surface_bytes = %u\n", __func__, meta_surface_bytes);
4333 #endif
4334 if (p->GPUVMEnable == true) {
4335 double meta_vmpg_bytes = 4.0 * 1024.0;
4336 *p->meta_pte_bytes_per_frame_ub = (unsigned int)((math_ceil2((double)(meta_surface_bytes - meta_vmpg_bytes) / (8 * meta_vmpg_bytes), 1) + 1) * 64);
4337 extra_mpde_bytes = 128 * (p->GPUVMMaxPageTableLevels - 1);
4338 } else {
4339 *p->meta_pte_bytes_per_frame_ub = 0;
4340 extra_mpde_bytes = 0;
4341 }
4342
4343 if (!p->DCCEnable || !p->mrq_present) {
4344 *p->meta_pte_bytes_per_frame_ub = 0;
4345 extra_mpde_bytes = 0;
4346 *p->meta_row_bytes = 0;
4347 }
4348
4349 if (!p->GPUVMEnable) {
4350 *p->PixelPTEBytesPerRow = 0;
4351 *p->PixelPTEBytesPerRowStorage = 0;
4352 *p->dpte_row_width_ub = 0;
4353 *p->dpte_row_height = 0;
4354 *p->dpte_row_height_linear = 0;
4355 *p->PixelPTEBytesPerRow_one_row_per_frame = 0;
4356 *p->dpte_row_width_ub_one_row_per_frame = 0;
4357 *p->dpte_row_height_one_row_per_frame = 0;
4358 *p->vmpg_width = 0;
4359 *p->vmpg_height = 0;
4360 *p->PixelPTEReqWidth = 0;
4361 *p->PixelPTEReqHeight = 0;
4362 *p->PTERequestSize = 0;
4363 *p->dpde0_bytes_per_frame_ub = 0;
4364 return 0;
4365 }
4366
4367 MacroTileSizeBytes = p->MacroTileWidth * p->BytePerPixel * p->MacroTileHeight;
4368
4369 if (p->ViewportStationary && p->is_phantom && (p->NumberOfDPPs == 1 || !dml_is_vertical_rotation(p->RotationAngle))) {
4370 vp_height_dpte_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + p->MacroTileHeight - 1, p->MacroTileHeight) - math_floor2(p->ViewportYStart, p->MacroTileHeight));
4371 } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
4372 vp_height_dpte_ub = (unsigned int)(math_ceil2((double)p->ViewportHeight - 1, p->MacroTileHeight) + p->MacroTileHeight);
4373 } else {
4374 vp_height_dpte_ub = (unsigned int)(math_ceil2((double)p->SwathWidth - 1, p->MacroTileHeight) + p->MacroTileHeight);
4375 }
4376
4377 if (p->GPUVMEnable == true && p->GPUVMMaxPageTableLevels > 1) {
4378 *p->dpde0_bytes_per_frame_ub = (unsigned int)(64 * (math_ceil2((double)(p->Pitch * vp_height_dpte_ub * p->BytePerPixel - MacroTileSizeBytes) / (double)(8 * 2097152), 1) + 1));
4379 extra_dpde_bytes = 128 * (p->GPUVMMaxPageTableLevels - 2);
4380 } else {
4381 *p->dpde0_bytes_per_frame_ub = 0;
4382 extra_dpde_bytes = 0;
4383 }
4384
4385 vm_bytes = *p->meta_pte_bytes_per_frame_ub + extra_mpde_bytes + *p->dpde0_bytes_per_frame_ub + extra_dpde_bytes;
4386
4387 #ifdef __DML_VBA_DEBUG__
4388 dml2_printf("DML::%s: DCCEnable = %u\n", __func__, p->DCCEnable);
4389 dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable);
4390 dml2_printf("DML::%s: SwModeLinear = %u\n", __func__, p->SurfaceTiling == dml2_sw_linear);
4391 dml2_printf("DML::%s: BytePerPixel = %u\n", __func__, p->BytePerPixel);
4392 dml2_printf("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, p->GPUVMMaxPageTableLevels);
4393 dml2_printf("DML::%s: BlockHeight256Bytes = %u\n", __func__, p->BlockHeight256Bytes);
4394 dml2_printf("DML::%s: BlockWidth256Bytes = %u\n", __func__, p->BlockWidth256Bytes);
4395 dml2_printf("DML::%s: MacroTileHeight = %u\n", __func__, p->MacroTileHeight);
4396 dml2_printf("DML::%s: MacroTileWidth = %u\n", __func__, p->MacroTileWidth);
4397 dml2_printf("DML::%s: meta_pte_bytes_per_frame_ub = %u\n", __func__, *p->meta_pte_bytes_per_frame_ub);
4398 dml2_printf("DML::%s: dpde0_bytes_per_frame_ub = %u\n", __func__, *p->dpde0_bytes_per_frame_ub);
4399 dml2_printf("DML::%s: extra_mpde_bytes = %u\n", __func__, extra_mpde_bytes);
4400 dml2_printf("DML::%s: extra_dpde_bytes = %u\n", __func__, extra_dpde_bytes);
4401 dml2_printf("DML::%s: vm_bytes = %u\n", __func__, vm_bytes);
4402 dml2_printf("DML::%s: ViewportHeight = %u\n", __func__, p->ViewportHeight);
4403 dml2_printf("DML::%s: SwathWidth = %u\n", __func__, p->SwathWidth);
4404 dml2_printf("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub);
4405 #endif
4406
4407 unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
4408
4409 if (p->SurfaceTiling == dml2_sw_linear) {
4410 *p->PixelPTEReqHeight = 1;
4411 *p->PixelPTEReqWidth = p->GPUVMMinPageSizeKBytes * 1024 * 8 / p->BytePerPixel;
4412 PixelPTEReqWidth_linear = p->GPUVMMinPageSizeKBytes * 1024 * 8 / p->BytePerPixel;
4413 *p->PTERequestSize = 64;
4414
4415 *p->vmpg_height = 1;
4416 *p->vmpg_width = p->GPUVMMinPageSizeKBytes * 1024 / p->BytePerPixel;
4417 } else if (p->GPUVMMinPageSizeKBytes * 1024 >= dml_get_tile_block_size_bytes(p->SurfaceTiling)) { // 1 64B 8x1 PTE
4418 *p->PixelPTEReqHeight = p->MacroTileHeight;
4419 *p->PixelPTEReqWidth = 8 * 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
4420 *p->PTERequestSize = 64;
4421
4422 *p->vmpg_height = p->MacroTileHeight;
4423 *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
4424
4425 } else if (p->GPUVMMinPageSizeKBytes == 4 && dml_get_tile_block_size_bytes(p->SurfaceTiling) == 65536) { // 2 64B PTE requests to get 16 PTEs to cover the 64K tile
4426 // one 64KB tile, is 16x16x256B req
4427 *p->PixelPTEReqHeight = 16 * p->BlockHeight256Bytes;
4428 *p->PixelPTEReqWidth = 16 * p->BlockWidth256Bytes;
4429 *p->PTERequestSize = 128;
4430
4431 *p->vmpg_height = *p->PixelPTEReqHeight;
4432 *p->vmpg_width = *p->PixelPTEReqWidth;
4433 } else {
4434 // default for rest of calculation to go through, when vm is disable, the calulated pte related values shouldnt be used anyways
4435 *p->PixelPTEReqHeight = p->MacroTileHeight;
4436 *p->PixelPTEReqWidth = 8 * 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
4437 *p->PTERequestSize = 64;
4438
4439 *p->vmpg_height = p->MacroTileHeight;
4440 *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
4441
4442 if (p->GPUVMEnable == true) {
4443 dml2_printf("DML::%s: GPUVMMinPageSizeKBytes=%u and sw_mode=%u (tile_size=%d) not supported!\n",
4444 __func__, p->GPUVMMinPageSizeKBytes, p->SurfaceTiling, dml_get_tile_block_size_bytes(p->SurfaceTiling));
4445 DML2_ASSERT(0);
4446 }
4447 }
4448
4449 #ifdef __DML_VBA_DEBUG__
4450 dml2_printf("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes);
4451 dml2_printf("DML::%s: PixelPTEReqHeight = %u\n", __func__, *p->PixelPTEReqHeight);
4452 dml2_printf("DML::%s: PixelPTEReqWidth = %u\n", __func__, *p->PixelPTEReqWidth);
4453 dml2_printf("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear);
4454 dml2_printf("DML::%s: PTERequestSize = %u\n", __func__, *p->PTERequestSize);
4455 dml2_printf("DML::%s: Pitch = %u\n", __func__, p->Pitch);
4456 dml2_printf("DML::%s: vmpg_width = %u\n", __func__, *p->vmpg_width);
4457 dml2_printf("DML::%s: vmpg_height = %u\n", __func__, *p->vmpg_height);
4458 #endif
4459
4460 *p->dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
4461 *p->dpte_row_width_ub_one_row_per_frame = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height_one_row_per_frame / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * (double)*p->PixelPTEReqWidth);
4462 *p->PixelPTEBytesPerRow_one_row_per_frame = (unsigned int)((double)*p->dpte_row_width_ub_one_row_per_frame / (double)*p->PixelPTEReqWidth * *p->PTERequestSize);
4463
4464 if (p->SurfaceTiling == dml2_sw_linear) {
4465 *p->dpte_row_height = (unsigned int)(math_min2(128, (double)(1ULL << (unsigned int)math_floor2(math_log((float)(p->PTEBufferSizeInRequests * *p->PixelPTEReqWidth / p->Pitch), 2.0), 1))));
4466 *p->dpte_row_width_ub = (unsigned int)(math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height - 1), (double)*p->PixelPTEReqWidth) + *p->PixelPTEReqWidth);
4467 *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqWidth * *p->PTERequestSize);
4468 *p->dpte_row_height_linear = 0;
4469
4470 // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
4471 *p->dpte_row_height_linear = (unsigned int)1 << (unsigned int)math_floor2(math_log((float)(p->PTEBufferSizeInRequests * PixelPTEReqWidth_linear / p->Pitch), 2.0), 1);
4472 if (*p->dpte_row_height_linear > 128)
4473 *p->dpte_row_height_linear = 128;
4474
4475 #ifdef __DML_VBA_DEBUG__
4476 dml2_printf("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *p->dpte_row_width_ub);
4477 #endif
4478
4479 } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
4480 *p->dpte_row_height = *p->PixelPTEReqHeight;
4481
4482 if (p->GPUVMMinPageSizeKBytes > 64) {
4483 *p->dpte_row_width_ub = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * *p->PixelPTEReqWidth);
4484 } else if (p->ViewportStationary && (p->NumberOfDPPs == 1)) {
4485 *p->dpte_row_width_ub = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->PixelPTEReqWidth - 1, *p->PixelPTEReqWidth) - math_floor2(p->ViewportXStart, *p->PixelPTEReqWidth));
4486 } else {
4487 *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqWidth, 1) + 1.0) * *p->PixelPTEReqWidth);
4488 }
4489 #ifdef __DML_VBA_DEBUG__
4490 dml2_printf("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *p->dpte_row_width_ub);
4491 #endif
4492
4493 *p->PixelPTEBytesPerRow = *p->dpte_row_width_ub / *p->PixelPTEReqWidth * *p->PTERequestSize;
4494 } else {
4495 *p->dpte_row_height = (unsigned int)(math_min2(*p->PixelPTEReqWidth, p->MacroTileWidth));
4496
4497 if (p->ViewportStationary && (p->NumberOfDPPs == 1)) {
4498 *p->dpte_row_width_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + *p->PixelPTEReqHeight - 1, *p->PixelPTEReqHeight) - math_floor2(p->ViewportYStart, *p->PixelPTEReqHeight));
4499 } else {
4500 *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqHeight, 1) + 1) * *p->PixelPTEReqHeight);
4501 }
4502
4503 *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqHeight * *p->PTERequestSize);
4504 #ifdef __DML_VBA_DEBUG__
4505 dml2_printf("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *p->dpte_row_width_ub);
4506 #endif
4507 }
4508
4509 if (p->GPUVMEnable != true) {
4510 *p->PixelPTEBytesPerRow = 0;
4511 *p->PixelPTEBytesPerRow_one_row_per_frame = 0;
4512 }
4513
4514 *p->PixelPTEBytesPerRowStorage = *p->PixelPTEBytesPerRow;
4515
4516 #ifdef __DML_VBA_DEBUG__
4517 dml2_printf("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes);
4518 dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable);
4519 dml2_printf("DML::%s: dpte_row_height = %u\n", __func__, *p->dpte_row_height);
4520 dml2_printf("DML::%s: dpte_row_height_linear = %u\n", __func__, *p->dpte_row_height_linear);
4521 dml2_printf("DML::%s: dpte_row_width_ub = %u\n", __func__, *p->dpte_row_width_ub);
4522 dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *p->PixelPTEBytesPerRow);
4523 dml2_printf("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *p->PixelPTEBytesPerRowStorage);
4524 dml2_printf("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, p->PTEBufferSizeInRequests);
4525 dml2_printf("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *p->dpte_row_height_one_row_per_frame);
4526 dml2_printf("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *p->dpte_row_width_ub_one_row_per_frame);
4527 dml2_printf("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *p->PixelPTEBytesPerRow_one_row_per_frame);
4528 #endif
4529
4530 return vm_bytes;
4531 } // CalculateVMAndRowBytes
4532
CalculatePrefetchSourceLines(double VRatio,unsigned int VTaps,bool Interlace,bool ProgressiveToInterlaceUnitInOPP,unsigned int SwathHeight,enum dml2_rotation_angle RotationAngle,bool mirrored,bool ViewportStationary,unsigned int SwathWidth,unsigned int ViewportHeight,unsigned int ViewportXStart,unsigned int ViewportYStart,unsigned int * VInitPreFill,unsigned int * MaxNumSwath)4533 static unsigned int CalculatePrefetchSourceLines(
4534 double VRatio,
4535 unsigned int VTaps,
4536 bool Interlace,
4537 bool ProgressiveToInterlaceUnitInOPP,
4538 unsigned int SwathHeight,
4539 enum dml2_rotation_angle RotationAngle,
4540 bool mirrored,
4541 bool ViewportStationary,
4542 unsigned int SwathWidth,
4543 unsigned int ViewportHeight,
4544 unsigned int ViewportXStart,
4545 unsigned int ViewportYStart,
4546
4547 // Output
4548 unsigned int *VInitPreFill,
4549 unsigned int *MaxNumSwath)
4550 {
4551
4552 unsigned int vp_start_rot = 0;
4553 unsigned int sw0_tmp = 0;
4554 unsigned int MaxPartialSwath = 0;
4555 double numLines = 0;
4556
4557 #ifdef __DML_VBA_DEBUG__
4558 dml2_printf("DML::%s: VRatio = %f\n", __func__, VRatio);
4559 dml2_printf("DML::%s: VTaps = %u\n", __func__, VTaps);
4560 dml2_printf("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart);
4561 dml2_printf("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart);
4562 dml2_printf("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary);
4563 dml2_printf("DML::%s: SwathHeight = %u\n", __func__, SwathHeight);
4564 #endif
4565 if (ProgressiveToInterlaceUnitInOPP)
4566 *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1) / 2.0, 1));
4567 else
4568 *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1 + (Interlace ? 1 : 0) * 0.5 * VRatio) / 2.0, 1));
4569
4570 if (ViewportStationary) {
4571 if (RotationAngle == dml2_rotation_180) {
4572 vp_start_rot = SwathHeight - (((unsigned int)(ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
4573 } else if ((RotationAngle == dml2_rotation_270 && !mirrored) || (RotationAngle == dml2_rotation_90 && mirrored)) {
4574 vp_start_rot = ViewportXStart;
4575 } else if ((RotationAngle == dml2_rotation_90 && !mirrored) || (RotationAngle == dml2_rotation_270 && mirrored)) {
4576 vp_start_rot = SwathHeight - (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
4577 } else {
4578 vp_start_rot = ViewportYStart;
4579 }
4580 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
4581 if (sw0_tmp < *VInitPreFill) {
4582 *MaxNumSwath = (unsigned int)(math_ceil2((*VInitPreFill - sw0_tmp) / (double)SwathHeight, 1) + 1);
4583 } else {
4584 *MaxNumSwath = 1;
4585 }
4586 MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(vp_start_rot + *VInitPreFill - 1) % SwathHeight));
4587 } else {
4588 *MaxNumSwath = (unsigned int)(math_ceil2((*VInitPreFill - 1.0) / (double)SwathHeight, 1) + 1);
4589 if (*VInitPreFill > 1) {
4590 MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(*VInitPreFill - 2) % SwathHeight));
4591 } else {
4592 MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(*VInitPreFill + SwathHeight - 2) % SwathHeight));
4593 }
4594 }
4595 numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
4596
4597 #ifdef __DML_VBA_DEBUG__
4598 dml2_printf("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot);
4599 dml2_printf("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill);
4600 dml2_printf("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath);
4601 dml2_printf("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath);
4602 dml2_printf("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
4603 #endif
4604 return (unsigned int)(numLines);
4605
4606 }
4607
CalculateRowBandwidth(bool GPUVMEnable,bool use_one_row_for_frame,enum dml2_source_format_class SourcePixelFormat,double VRatio,double VRatioChroma,bool DCCEnable,double LineTime,unsigned int PixelPTEBytesPerRowLuma,unsigned int PixelPTEBytesPerRowChroma,unsigned int dpte_row_height_luma,unsigned int dpte_row_height_chroma,bool mrq_present,unsigned int meta_row_bytes_per_row_ub_l,unsigned int meta_row_bytes_per_row_ub_c,unsigned int meta_row_height_luma,unsigned int meta_row_height_chroma,double * dpte_row_bw,double * meta_row_bw)4608 static void CalculateRowBandwidth(
4609 bool GPUVMEnable,
4610 bool use_one_row_for_frame,
4611 enum dml2_source_format_class SourcePixelFormat,
4612 double VRatio,
4613 double VRatioChroma,
4614 bool DCCEnable,
4615 double LineTime,
4616 unsigned int PixelPTEBytesPerRowLuma,
4617 unsigned int PixelPTEBytesPerRowChroma,
4618 unsigned int dpte_row_height_luma,
4619 unsigned int dpte_row_height_chroma,
4620
4621 bool mrq_present,
4622 unsigned int meta_row_bytes_per_row_ub_l,
4623 unsigned int meta_row_bytes_per_row_ub_c,
4624 unsigned int meta_row_height_luma,
4625 unsigned int meta_row_height_chroma,
4626
4627 // Output
4628 double *dpte_row_bw,
4629 double *meta_row_bw)
4630 {
4631 if (!DCCEnable || !mrq_present) {
4632 *meta_row_bw = 0;
4633 } else if (dml2_core_shared_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) {
4634 *meta_row_bw = VRatio * meta_row_bytes_per_row_ub_l / (meta_row_height_luma * LineTime)
4635 + VRatioChroma * meta_row_bytes_per_row_ub_c / (meta_row_height_chroma * LineTime);
4636 } else {
4637 *meta_row_bw = VRatio * meta_row_bytes_per_row_ub_l / (meta_row_height_luma * LineTime);
4638 }
4639
4640 if (GPUVMEnable != true) {
4641 *dpte_row_bw = 0;
4642 } else if (dml2_core_shared_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) {
4643 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
4644 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
4645 } else {
4646 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
4647 }
4648 }
4649
CalculateMALLUseForStaticScreen(const struct dml2_display_cfg * display_cfg,unsigned int NumberOfActiveSurfaces,unsigned int MALLAllocatedForDCN,unsigned int SurfaceSizeInMALL[],bool one_row_per_frame_fits_in_buffer[],bool is_using_mall_for_ss[])4650 static void CalculateMALLUseForStaticScreen(
4651 const struct dml2_display_cfg *display_cfg,
4652 unsigned int NumberOfActiveSurfaces,
4653 unsigned int MALLAllocatedForDCN,
4654 unsigned int SurfaceSizeInMALL[],
4655 bool one_row_per_frame_fits_in_buffer[],
4656
4657 // Output
4658 bool is_using_mall_for_ss[])
4659 {
4660
4661 unsigned int SurfaceToAddToMALL;
4662 bool CanAddAnotherSurfaceToMALL;
4663 unsigned int TotalSurfaceSizeInMALL;
4664
4665 TotalSurfaceSizeInMALL = 0;
4666 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
4667 is_using_mall_for_ss[k] = (display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable);
4668 if (is_using_mall_for_ss[k])
4669 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
4670 #ifdef __DML_VBA_DEBUG__
4671 dml2_printf("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, is_using_mall_for_ss[k]);
4672 dml2_printf("DML::%s: k=%u, TotalSurfaceSizeInMALL = %u\n", __func__, k, TotalSurfaceSizeInMALL);
4673 #endif
4674 }
4675
4676 SurfaceToAddToMALL = 0;
4677 CanAddAnotherSurfaceToMALL = true;
4678 while (CanAddAnotherSurfaceToMALL) {
4679 CanAddAnotherSurfaceToMALL = false;
4680 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
4681 if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCN * 1024 * 1024 &&
4682 !is_using_mall_for_ss[k] && display_cfg->plane_descriptors[k].overrides.refresh_from_mall != dml2_refresh_from_mall_mode_override_force_disable && one_row_per_frame_fits_in_buffer[k] &&
4683 (!CanAddAnotherSurfaceToMALL || SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
4684 CanAddAnotherSurfaceToMALL = true;
4685 SurfaceToAddToMALL = k;
4686 dml2_printf("DML::%s: k=%u, UseMALLForStaticScreen = %u (dis, en, optimize)\n", __func__, k, display_cfg->plane_descriptors[k].overrides.refresh_from_mall);
4687 }
4688 }
4689 if (CanAddAnotherSurfaceToMALL) {
4690 is_using_mall_for_ss[SurfaceToAddToMALL] = true;
4691 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
4692
4693 #ifdef __DML_VBA_DEBUG__
4694 dml2_printf("DML::%s: SurfaceToAddToMALL = %u\n", __func__, SurfaceToAddToMALL);
4695 dml2_printf("DML::%s: TotalSurfaceSizeInMALL = %u\n", __func__, TotalSurfaceSizeInMALL);
4696 #endif
4697 }
4698 }
4699 }
4700
CalculateDCCConfiguration(bool DCCEnabled,bool DCCProgrammingAssumesScanDirectionUnknown,enum dml2_source_format_class SourcePixelFormat,unsigned int SurfaceWidthLuma,unsigned int SurfaceWidthChroma,unsigned int SurfaceHeightLuma,unsigned int SurfaceHeightChroma,unsigned int nomDETInKByte,unsigned int RequestHeight256ByteLuma,unsigned int RequestHeight256ByteChroma,enum dml2_swizzle_mode TilingFormat,unsigned int BytePerPixelY,unsigned int BytePerPixelC,double BytePerPixelDETY,double BytePerPixelDETC,enum dml2_rotation_angle RotationAngle,enum dml2_core_internal_request_type * RequestLuma,enum dml2_core_internal_request_type * RequestChroma,unsigned int * MaxUncompressedBlockLuma,unsigned int * MaxUncompressedBlockChroma,unsigned int * MaxCompressedBlockLuma,unsigned int * MaxCompressedBlockChroma,unsigned int * IndependentBlockLuma,unsigned int * IndependentBlockChroma)4701 static void CalculateDCCConfiguration(
4702 bool DCCEnabled,
4703 bool DCCProgrammingAssumesScanDirectionUnknown,
4704 enum dml2_source_format_class SourcePixelFormat,
4705 unsigned int SurfaceWidthLuma,
4706 unsigned int SurfaceWidthChroma,
4707 unsigned int SurfaceHeightLuma,
4708 unsigned int SurfaceHeightChroma,
4709 unsigned int nomDETInKByte,
4710 unsigned int RequestHeight256ByteLuma,
4711 unsigned int RequestHeight256ByteChroma,
4712 enum dml2_swizzle_mode TilingFormat,
4713 unsigned int BytePerPixelY,
4714 unsigned int BytePerPixelC,
4715 double BytePerPixelDETY,
4716 double BytePerPixelDETC,
4717 enum dml2_rotation_angle RotationAngle,
4718
4719 // Output
4720 enum dml2_core_internal_request_type *RequestLuma,
4721 enum dml2_core_internal_request_type *RequestChroma,
4722 unsigned int *MaxUncompressedBlockLuma,
4723 unsigned int *MaxUncompressedBlockChroma,
4724 unsigned int *MaxCompressedBlockLuma,
4725 unsigned int *MaxCompressedBlockChroma,
4726 unsigned int *IndependentBlockLuma,
4727 unsigned int *IndependentBlockChroma)
4728 {
4729 unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
4730
4731 unsigned int yuv420;
4732 unsigned int horz_div_l;
4733 unsigned int horz_div_c;
4734 unsigned int vert_div_l;
4735 unsigned int vert_div_c;
4736
4737 unsigned int swath_buf_size;
4738 double detile_buf_vp_horz_limit;
4739 double detile_buf_vp_vert_limit;
4740
4741 yuv420 = dml2_core_shared_is_420(SourcePixelFormat);
4742 horz_div_l = 1;
4743 horz_div_c = 1;
4744 vert_div_l = 1;
4745 vert_div_c = 1;
4746
4747 if (BytePerPixelY == 1)
4748 vert_div_l = 0;
4749 if (BytePerPixelC == 1)
4750 vert_div_c = 0;
4751
4752 if (BytePerPixelC == 0) {
4753 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
4754 detile_buf_vp_horz_limit = (double)swath_buf_size / ((double)RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
4755 detile_buf_vp_vert_limit = (double)swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
4756 } else {
4757 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
4758 detile_buf_vp_horz_limit = (double)swath_buf_size / ((double)RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) + (double)RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
4759 detile_buf_vp_vert_limit = (double)swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
4760 }
4761
4762 if (SourcePixelFormat == dml2_420_10) {
4763 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
4764 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
4765 }
4766
4767 detile_buf_vp_horz_limit = math_floor2(detile_buf_vp_horz_limit - 1, 16);
4768 detile_buf_vp_vert_limit = math_floor2(detile_buf_vp_vert_limit - 1, 16);
4769
4770 unsigned int MAS_vp_horz_limit;
4771 unsigned int MAS_vp_vert_limit;
4772 unsigned int max_vp_horz_width;
4773 unsigned int max_vp_vert_height;
4774 unsigned int eff_surf_width_l;
4775 unsigned int eff_surf_width_c;
4776 unsigned int eff_surf_height_l;
4777 unsigned int eff_surf_height_c;
4778
4779 unsigned int full_swath_bytes_horz_wc_l;
4780 unsigned int full_swath_bytes_horz_wc_c;
4781 unsigned int full_swath_bytes_vert_wc_l;
4782 unsigned int full_swath_bytes_vert_wc_c;
4783
4784 MAS_vp_horz_limit = SourcePixelFormat == dml2_rgbe_alpha ? 3840 : 6144;
4785 MAS_vp_vert_limit = SourcePixelFormat == dml2_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
4786 max_vp_horz_width = (unsigned int)(math_min2((double)MAS_vp_horz_limit, detile_buf_vp_horz_limit));
4787 max_vp_vert_height = (unsigned int)(math_min2((double)MAS_vp_vert_limit, detile_buf_vp_vert_limit));
4788 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
4789 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
4790 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
4791 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
4792
4793 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
4794 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
4795 if (BytePerPixelC > 0) {
4796 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
4797 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
4798 } else {
4799 full_swath_bytes_horz_wc_c = 0;
4800 full_swath_bytes_vert_wc_c = 0;
4801 }
4802
4803 if (SourcePixelFormat == dml2_420_10) {
4804 full_swath_bytes_horz_wc_l = (unsigned int)(math_ceil2((double)full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0));
4805 full_swath_bytes_horz_wc_c = (unsigned int)(math_ceil2((double)full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0));
4806 full_swath_bytes_vert_wc_l = (unsigned int)(math_ceil2((double)full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0));
4807 full_swath_bytes_vert_wc_c = (unsigned int)(math_ceil2((double)full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0));
4808 }
4809
4810 unsigned int req128_horz_wc_l;
4811 unsigned int req128_horz_wc_c;
4812 unsigned int req128_vert_wc_l;
4813 unsigned int req128_vert_wc_c;
4814
4815 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
4816 req128_horz_wc_l = 0;
4817 req128_horz_wc_c = 0;
4818 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
4819 req128_horz_wc_l = 0;
4820 req128_horz_wc_c = 1;
4821 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
4822 req128_horz_wc_l = 1;
4823 req128_horz_wc_c = 0;
4824 } else {
4825 req128_horz_wc_l = 1;
4826 req128_horz_wc_c = 1;
4827 }
4828
4829 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
4830 req128_vert_wc_l = 0;
4831 req128_vert_wc_c = 0;
4832 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
4833 req128_vert_wc_l = 0;
4834 req128_vert_wc_c = 1;
4835 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
4836 req128_vert_wc_l = 1;
4837 req128_vert_wc_c = 0;
4838 } else {
4839 req128_vert_wc_l = 1;
4840 req128_vert_wc_c = 1;
4841 }
4842
4843 unsigned int segment_order_horz_contiguous_luma;
4844 unsigned int segment_order_horz_contiguous_chroma;
4845 unsigned int segment_order_vert_contiguous_luma;
4846 unsigned int segment_order_vert_contiguous_chroma;
4847
4848 if (BytePerPixelY == 2) {
4849 segment_order_horz_contiguous_luma = 0;
4850 segment_order_vert_contiguous_luma = 1;
4851 } else {
4852 segment_order_horz_contiguous_luma = 1;
4853 segment_order_vert_contiguous_luma = 0;
4854 }
4855
4856 if (BytePerPixelC == 2) {
4857 segment_order_horz_contiguous_chroma = 0;
4858 segment_order_vert_contiguous_chroma = 1;
4859 } else {
4860 segment_order_horz_contiguous_chroma = 1;
4861 segment_order_vert_contiguous_chroma = 0;
4862 }
4863 #ifdef __DML_VBA_DEBUG__
4864 dml2_printf("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled);
4865 dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
4866 dml2_printf("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC);
4867 dml2_printf("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l);
4868 dml2_printf("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c);
4869 dml2_printf("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l);
4870 dml2_printf("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c);
4871 dml2_printf("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma);
4872 dml2_printf("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma);
4873 #endif
4874 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
4875 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
4876 *RequestLuma = dml2_core_internal_request_type_256_bytes;
4877 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
4878 *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous;
4879 } else {
4880 *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous;
4881 }
4882 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
4883 *RequestChroma = dml2_core_internal_request_type_256_bytes;
4884 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
4885 *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous;
4886 } else {
4887 *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous;
4888 }
4889 } else if (!dml_is_vertical_rotation(RotationAngle)) {
4890 if (req128_horz_wc_l == 0) {
4891 *RequestLuma = dml2_core_internal_request_type_256_bytes;
4892 } else if (segment_order_horz_contiguous_luma == 0) {
4893 *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous;
4894 } else {
4895 *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous;
4896 }
4897 if (req128_horz_wc_c == 0) {
4898 *RequestChroma = dml2_core_internal_request_type_256_bytes;
4899 } else if (segment_order_horz_contiguous_chroma == 0) {
4900 *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous;
4901 } else {
4902 *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous;
4903 }
4904 } else {
4905 if (req128_vert_wc_l == 0) {
4906 *RequestLuma = dml2_core_internal_request_type_256_bytes;
4907 } else if (segment_order_vert_contiguous_luma == 0) {
4908 *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous;
4909 } else {
4910 *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous;
4911 }
4912 if (req128_vert_wc_c == 0) {
4913 *RequestChroma = dml2_core_internal_request_type_256_bytes;
4914 } else if (segment_order_vert_contiguous_chroma == 0) {
4915 *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous;
4916 } else {
4917 *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous;
4918 }
4919 }
4920
4921 if (*RequestLuma == dml2_core_internal_request_type_256_bytes) {
4922 *MaxUncompressedBlockLuma = 256;
4923 *MaxCompressedBlockLuma = 256;
4924 *IndependentBlockLuma = 0;
4925 } else if (*RequestLuma == dml2_core_internal_request_type_128_bytes_contiguous) {
4926 *MaxUncompressedBlockLuma = 256;
4927 *MaxCompressedBlockLuma = 128;
4928 *IndependentBlockLuma = 128;
4929 } else {
4930 *MaxUncompressedBlockLuma = 256;
4931 *MaxCompressedBlockLuma = 64;
4932 *IndependentBlockLuma = 64;
4933 }
4934
4935 if (*RequestChroma == dml2_core_internal_request_type_256_bytes) {
4936 *MaxUncompressedBlockChroma = 256;
4937 *MaxCompressedBlockChroma = 256;
4938 *IndependentBlockChroma = 0;
4939 } else if (*RequestChroma == dml2_core_internal_request_type_128_bytes_contiguous) {
4940 *MaxUncompressedBlockChroma = 256;
4941 *MaxCompressedBlockChroma = 128;
4942 *IndependentBlockChroma = 128;
4943 } else {
4944 *MaxUncompressedBlockChroma = 256;
4945 *MaxCompressedBlockChroma = 64;
4946 *IndependentBlockChroma = 64;
4947 }
4948
4949 if (DCCEnabled != true || BytePerPixelC == 0) {
4950 *MaxUncompressedBlockChroma = 0;
4951 *MaxCompressedBlockChroma = 0;
4952 *IndependentBlockChroma = 0;
4953 }
4954
4955 if (DCCEnabled != true) {
4956 *MaxUncompressedBlockLuma = 0;
4957 *MaxCompressedBlockLuma = 0;
4958 *IndependentBlockLuma = 0;
4959 }
4960
4961 #ifdef __DML_VBA_DEBUG__
4962 dml2_printf("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma);
4963 dml2_printf("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma);
4964 dml2_printf("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma);
4965 dml2_printf("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma);
4966 dml2_printf("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma);
4967 dml2_printf("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma);
4968 #endif
4969
4970 }
4971
calculate_mcache_row_bytes(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_calculate_mcache_row_bytes_params * p)4972 static void calculate_mcache_row_bytes(
4973 struct dml2_core_internal_scratch *scratch,
4974 struct dml2_core_calcs_calculate_mcache_row_bytes_params *p)
4975 {
4976 unsigned int vmpg_bytes = 0;
4977 unsigned int blk_bytes = 0;
4978 float meta_per_mvmpg_per_channel = 0;
4979 unsigned int est_blk_per_vmpg = 2;
4980 unsigned int mvmpg_per_row_ub = 0;
4981 unsigned int full_vp_width_mvmpg_aligned = 0;
4982 unsigned int full_vp_height_mvmpg_aligned = 0;
4983
4984 #ifdef __DML_VBA_DEBUG__
4985 dml2_printf("DML::%s: num_chans = %u\n", __func__, p->num_chans);
4986 dml2_printf("DML::%s: mem_word_bytes = %u\n", __func__, p->mem_word_bytes);
4987 dml2_printf("DML::%s: mcache_line_size_bytes = %u\n", __func__, p->mcache_line_size_bytes);
4988 dml2_printf("DML::%s: mcache_size_bytes = %u\n", __func__, p->mcache_size_bytes);
4989 dml2_printf("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable);
4990 dml2_printf("DML::%s: gpuvm_page_size_kbytes = %u\n", __func__, p->gpuvm_page_size_kbytes);
4991 dml2_printf("DML::%s: vp_stationary = %u\n", __func__, p->vp_stationary);
4992 dml2_printf("DML::%s: tiling_mode = %u\n", __func__, p->tiling_mode);
4993 dml2_printf("DML::%s: vp_start_x = %u\n", __func__, p->vp_start_x);
4994 dml2_printf("DML::%s: vp_start_y = %u\n", __func__, p->vp_start_y);
4995 dml2_printf("DML::%s: full_vp_width = %u\n", __func__, p->full_vp_width);
4996 dml2_printf("DML::%s: full_vp_height = %u\n", __func__, p->full_vp_height);
4997 dml2_printf("DML::%s: blk_width = %u\n", __func__, p->blk_width);
4998 dml2_printf("DML::%s: blk_height = %u\n", __func__, p->blk_height);
4999 dml2_printf("DML::%s: vmpg_width = %u\n", __func__, p->vmpg_width);
5000 dml2_printf("DML::%s: vmpg_height = %u\n", __func__, p->vmpg_height);
5001 dml2_printf("DML::%s: full_swath_bytes = %u\n", __func__, p->full_swath_bytes);
5002 #endif
5003 DML2_ASSERT(p->mcache_line_size_bytes != 0);
5004 DML2_ASSERT(p->mcache_size_bytes != 0);
5005
5006 *p->mvmpg_width = 0;
5007 *p->mvmpg_height = 0;
5008
5009 if (p->full_vp_height == 0 && p->full_vp_width == 0) {
5010 *p->num_mcaches = 0;
5011 *p->mcache_row_bytes = 0;
5012 } else {
5013 blk_bytes = dml_get_tile_block_size_bytes(p->tiling_mode);
5014
5015 // if gpuvm is not enable, the alignment boundary should be in terms of tiling block size
5016 vmpg_bytes = p->gpuvm_page_size_kbytes * 1024;
5017
5018 //With vmpg_bytes >= tile blk_bytes, the meta_row_width alignment equations are relative to the vmpg_width/height.
5019 // But for 4KB page with 64KB tile block, we need the meta for all pages in the tile block.
5020 // Therefore, the alignment is relative to the blk_width/height. The factor of 16 vmpg per 64KB tile block is applied at the end.
5021 *p->mvmpg_width = p->blk_width;
5022 *p->mvmpg_height = p->blk_height;
5023 if (p->gpuvm_enable) {
5024 if (vmpg_bytes >= blk_bytes) {
5025 *p->mvmpg_width = p->vmpg_width;
5026 *p->mvmpg_height = p->vmpg_height;
5027 } else if (!((blk_bytes == 65536) && (vmpg_bytes == 4096))) {
5028 dml2_printf("ERROR: DML::%s: Tiling size and vm page size combination not supported\n", __func__);
5029 DML2_ASSERT(0);
5030 }
5031 }
5032
5033 //For plane0 & 1, first calculate full_vp_width/height_l/c aligned to vmpg_width/height_l/c
5034 full_vp_width_mvmpg_aligned = (unsigned int)(math_floor2((p->vp_start_x + p->full_vp_width) + *p->mvmpg_width - 1, *p->mvmpg_width) - math_floor2(p->vp_start_x, *p->mvmpg_width));
5035 full_vp_height_mvmpg_aligned = (unsigned int)(math_floor2((p->vp_start_y + p->full_vp_height) + *p->mvmpg_height - 1, *p->mvmpg_height) - math_floor2(p->vp_start_y, *p->mvmpg_height));
5036
5037 *p->full_vp_access_width_mvmpg_aligned = p->surf_vert ? full_vp_height_mvmpg_aligned : full_vp_width_mvmpg_aligned;
5038
5039 //Use the equation for the exact alignment when possible. Note that the exact alignment cannot be used for horizontal access if vmpg_bytes > blk_bytes.
5040 if (!p->surf_vert) { //horizontal access
5041 if (p->vp_stationary == 1 && vmpg_bytes <= blk_bytes)
5042 *p->meta_row_width_ub = full_vp_width_mvmpg_aligned;
5043 else
5044 *p->meta_row_width_ub = (unsigned int)math_ceil2((double)p->full_vp_width - 1, *p->mvmpg_width) + *p->mvmpg_width;
5045 mvmpg_per_row_ub = *p->meta_row_width_ub / *p->mvmpg_width;
5046 } else { //vertical access
5047 if (p->vp_stationary == 1)
5048 *p->meta_row_width_ub = full_vp_height_mvmpg_aligned;
5049 else
5050 *p->meta_row_width_ub = (unsigned int)math_ceil2((double)p->full_vp_height - 1, *p->mvmpg_height) + *p->mvmpg_height;
5051 mvmpg_per_row_ub = *p->meta_row_width_ub / *p->mvmpg_height;
5052 }
5053
5054 unsigned int meta_per_mvmpg_per_channel_ub = 0;
5055
5056 if (p->gpuvm_enable) {
5057 meta_per_mvmpg_per_channel = (float)vmpg_bytes / (float)256 / p->num_chans;
5058
5059 //but using the est_blk_per_vmpg between 2 and 4, to be not as pessimestic
5060 if (p->surf_vert && vmpg_bytes > blk_bytes) {
5061 meta_per_mvmpg_per_channel = (float)est_blk_per_vmpg * blk_bytes / 256 / p->num_chans;
5062 }
5063
5064 *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel)); // dcc_dr_oh_nom
5065 } else {
5066 meta_per_mvmpg_per_channel = (float)blk_bytes / (float)256 / p->num_chans;
5067
5068 if (!p->surf_vert)
5069 *p->dcc_dram_bw_nom_overhead_factor = 1 + 1.0 / 256.0;
5070 else
5071 *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel));
5072 }
5073
5074 meta_per_mvmpg_per_channel_ub = (unsigned int)math_ceil2((double)meta_per_mvmpg_per_channel, p->mcache_line_size_bytes);
5075
5076 //but for 4KB vmpg with 64KB tile blk
5077 if (p->gpuvm_enable && (blk_bytes == 65536) && (vmpg_bytes == 4096))
5078 meta_per_mvmpg_per_channel_ub = 16 * meta_per_mvmpg_per_channel_ub;
5079
5080 // If this mcache_row_bytes for the full viewport of the surface is less than or equal to mcache_bytes,
5081 // then one mcache can be used for this request stream. If not, it is useful to know the width of the viewport that can be supported in the mcache_bytes.
5082 if (p->gpuvm_enable || !p->surf_vert) {
5083 *p->mcache_row_bytes = mvmpg_per_row_ub * meta_per_mvmpg_per_channel_ub;
5084 } else { // horizontal and gpuvm disable
5085 *p->mcache_row_bytes = *p->meta_row_width_ub * p->blk_height * p->bytes_per_pixel / 256;
5086 *p->mcache_row_bytes = (unsigned int)math_ceil2((double)*p->mcache_row_bytes / p->num_chans, p->mcache_line_size_bytes);
5087 }
5088
5089 *p->dcc_dram_bw_pref_overhead_factor = 1 + math_max2(1.0 / 256.0, *p->mcache_row_bytes / p->full_swath_bytes); // dcc_dr_oh_pref
5090 *p->num_mcaches = (unsigned int)math_ceil2((double)*p->mcache_row_bytes / p->mcache_size_bytes, 1);
5091
5092 unsigned int mvmpg_per_mcache = p->mcache_size_bytes / meta_per_mvmpg_per_channel_ub;
5093 *p->mvmpg_per_mcache_lb = (unsigned int)math_floor2(mvmpg_per_mcache, 1);
5094
5095 DML2_ASSERT(*p->num_mcaches > 0);
5096
5097 #ifdef __DML_VBA_DEBUG__
5098 dml2_printf("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable);
5099 dml2_printf("DML::%s: vmpg_bytes = %u\n", __func__, vmpg_bytes);
5100 dml2_printf("DML::%s: blk_bytes = %u\n", __func__, blk_bytes);
5101 dml2_printf("DML::%s: meta_per_mvmpg_per_channel = %f\n", __func__, meta_per_mvmpg_per_channel);
5102 dml2_printf("DML::%s: mvmpg_per_row_ub = %u\n", __func__, mvmpg_per_row_ub);
5103 dml2_printf("DML::%s: meta_row_width_ub = %u\n", __func__, *p->meta_row_width_ub);
5104 dml2_printf("DML::%s: mvmpg_width = %u\n", __func__, *p->mvmpg_width);
5105 dml2_printf("DML::%s: mvmpg_height = %u\n", __func__, *p->mvmpg_height);
5106 dml2_printf("DML::%s: num_mcaches = %u\n", __func__, *p->num_mcaches);
5107 dml2_printf("DML::%s: dcc_dram_bw_nom_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_nom_overhead_factor);
5108 dml2_printf("DML::%s: dcc_dram_bw_pref_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_pref_overhead_factor);
5109 #endif
5110 }
5111
5112 #ifdef __DML_VBA_DEBUG__
5113 dml2_printf("DML::%s: mcache_row_bytes = %u\n", __func__, *p->mcache_row_bytes);
5114 dml2_printf("DML::%s: num_mcaches = %u\n", __func__, *p->num_mcaches);
5115 #endif
5116 }
5117
calculate_mcache_setting(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_calculate_mcache_setting_params * p)5118 static void calculate_mcache_setting(
5119 struct dml2_core_internal_scratch *scratch,
5120 struct dml2_core_calcs_calculate_mcache_setting_params *p)
5121 {
5122 unsigned int n;
5123
5124 struct dml2_core_shared_calculate_mcache_setting_locals *l = &scratch->calculate_mcache_setting_locals;
5125 memset(l, 0, sizeof(struct dml2_core_shared_calculate_mcache_setting_locals));
5126
5127 *p->num_mcaches_l = 0;
5128 *p->mcache_row_bytes_l = 0;
5129 *p->dcc_dram_bw_nom_overhead_factor_l = 1.0;
5130 *p->dcc_dram_bw_pref_overhead_factor_l = 1.0;
5131
5132 *p->num_mcaches_c = 0;
5133 *p->mcache_row_bytes_c = 0;
5134 *p->dcc_dram_bw_nom_overhead_factor_c = 1.0;
5135 *p->dcc_dram_bw_pref_overhead_factor_c = 1.0;
5136
5137 *p->mall_comb_mcache_l = 0;
5138 *p->mall_comb_mcache_c = 0;
5139 *p->lc_comb_mcache = 0;
5140
5141 if (!p->dcc_enable)
5142 return;
5143
5144 l->is_dual_plane = dml2_core_shared_is_420(p->source_format) || p->source_format == dml2_rgbe_alpha;
5145
5146 l->l_p.num_chans = p->num_chans;
5147 l->l_p.mem_word_bytes = p->mem_word_bytes;
5148 l->l_p.mcache_size_bytes = p->mcache_size_bytes;
5149 l->l_p.mcache_line_size_bytes = p->mcache_line_size_bytes;
5150 l->l_p.gpuvm_enable = p->gpuvm_enable;
5151 l->l_p.gpuvm_page_size_kbytes = p->gpuvm_page_size_kbytes;
5152 l->l_p.surf_vert = p->surf_vert;
5153 l->l_p.vp_stationary = p->vp_stationary;
5154 l->l_p.tiling_mode = p->tiling_mode;
5155 l->l_p.vp_start_x = p->vp_start_x_l;
5156 l->l_p.vp_start_y = p->vp_start_y_l;
5157 l->l_p.full_vp_width = p->full_vp_width_l;
5158 l->l_p.full_vp_height = p->full_vp_height_l;
5159 l->l_p.blk_width = p->blk_width_l;
5160 l->l_p.blk_height = p->blk_height_l;
5161 l->l_p.vmpg_width = p->vmpg_width_l;
5162 l->l_p.vmpg_height = p->vmpg_height_l;
5163 l->l_p.full_swath_bytes = p->full_swath_bytes_l;
5164 l->l_p.bytes_per_pixel = p->bytes_per_pixel_l;
5165
5166 // output
5167 l->l_p.num_mcaches = p->num_mcaches_l;
5168 l->l_p.mcache_row_bytes = p->mcache_row_bytes_l;
5169 l->l_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_l;
5170 l->l_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_l;
5171 l->l_p.mvmpg_width = &l->mvmpg_width_l;
5172 l->l_p.mvmpg_height = &l->mvmpg_height_l;
5173 l->l_p.full_vp_access_width_mvmpg_aligned = &l->full_vp_access_width_mvmpg_aligned_l;
5174 l->l_p.meta_row_width_ub = &l->meta_row_width_l;
5175 l->l_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_l;
5176
5177 calculate_mcache_row_bytes(scratch, &l->l_p);
5178 dml2_assert(*p->num_mcaches_l > 0);
5179
5180 if (l->is_dual_plane) {
5181 l->c_p.num_chans = p->num_chans;
5182 l->c_p.mem_word_bytes = p->mem_word_bytes;
5183 l->c_p.mcache_size_bytes = p->mcache_size_bytes;
5184 l->c_p.mcache_line_size_bytes = p->mcache_line_size_bytes;
5185 l->c_p.gpuvm_enable = p->gpuvm_enable;
5186 l->c_p.gpuvm_page_size_kbytes = p->gpuvm_page_size_kbytes;
5187 l->c_p.surf_vert = p->surf_vert;
5188 l->c_p.vp_stationary = p->vp_stationary;
5189 l->c_p.tiling_mode = p->tiling_mode;
5190 l->c_p.vp_start_x = p->vp_start_x_c;
5191 l->c_p.vp_start_y = p->vp_start_y_c;
5192 l->c_p.full_vp_width = p->full_vp_width_c;
5193 l->c_p.full_vp_height = p->full_vp_height_c;
5194 l->c_p.blk_width = p->blk_width_c;
5195 l->c_p.blk_height = p->blk_height_c;
5196 l->c_p.vmpg_width = p->vmpg_width_c;
5197 l->c_p.vmpg_height = p->vmpg_height_c;
5198 l->c_p.full_swath_bytes = p->full_swath_bytes_c;
5199 l->c_p.bytes_per_pixel = p->bytes_per_pixel_c;
5200
5201 // output
5202 l->c_p.num_mcaches = p->num_mcaches_c;
5203 l->c_p.mcache_row_bytes = p->mcache_row_bytes_c;
5204 l->c_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_c;
5205 l->c_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_c;
5206 l->c_p.mvmpg_width = &l->mvmpg_width_c;
5207 l->c_p.mvmpg_height = &l->mvmpg_height_c;
5208 l->c_p.full_vp_access_width_mvmpg_aligned = &l->full_vp_access_width_mvmpg_aligned_c;
5209 l->c_p.meta_row_width_ub = &l->meta_row_width_c;
5210 l->c_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_c;
5211
5212 calculate_mcache_row_bytes(scratch, &l->c_p);
5213 dml2_assert(*p->num_mcaches_c > 0);
5214 }
5215
5216 // Sharing for iMALL access
5217 l->mcache_remainder_l = *p->mcache_row_bytes_l % p->mcache_size_bytes;
5218 l->mcache_remainder_c = *p->mcache_row_bytes_c % p->mcache_size_bytes;
5219 l->mvmpg_access_width_l = p->surf_vert ? l->mvmpg_height_l : l->mvmpg_width_l;
5220 l->mvmpg_access_width_c = p->surf_vert ? l->mvmpg_height_c : l->mvmpg_width_c;
5221
5222 if (p->imall_enable) {
5223 *p->mall_comb_mcache_l = (2 * l->mcache_remainder_l <= p->mcache_size_bytes);
5224
5225 if (l->is_dual_plane)
5226 *p->mall_comb_mcache_c = (2 * l->mcache_remainder_c <= p->mcache_size_bytes);
5227 }
5228
5229 if (!p->surf_vert) // horizonatal access
5230 l->luma_time_factor = (double)l->mvmpg_height_c / l->mvmpg_height_l * 2;
5231 else // vertical access
5232 l->luma_time_factor = (double)l->mvmpg_width_c / l->mvmpg_width_l * 2;
5233
5234 // The algorithm starts with computing a non-integer, avg_mcache_element_size_l/c:
5235 l->avg_mcache_element_size_l = l->meta_row_width_l / *p->num_mcaches_l;
5236 if (l->is_dual_plane) {
5237 l->avg_mcache_element_size_c = l->meta_row_width_c / *p->num_mcaches_c;
5238
5239 if (!p->imall_enable || (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c)) {
5240 l->lc_comb_last_mcache_size = (unsigned int)((l->mcache_remainder_l * (*p->mall_comb_mcache_l ? 2 : 1) * l->luma_time_factor) +
5241 (l->mcache_remainder_c * (*p->mall_comb_mcache_c ? 2 : 1)));
5242 }
5243 *p->lc_comb_mcache = (l->lc_comb_last_mcache_size <= p->mcache_size_bytes) && (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c);
5244 }
5245
5246 #ifdef __DML_VBA_DEBUG__
5247 dml2_printf("DML::%s: imall_enable = %u\n", __func__, p->imall_enable);
5248 dml2_printf("DML::%s: is_dual_plane = %u\n", __func__, l->is_dual_plane);
5249 dml2_printf("DML::%s: surf_vert = %u\n", __func__, p->surf_vert);
5250 dml2_printf("DML::%s: mvmpg_width_l = %u\n", __func__, l->mvmpg_width_l);
5251 dml2_printf("DML::%s: mvmpg_height_l = %u\n", __func__, l->mvmpg_height_l);
5252 dml2_printf("DML::%s: mcache_remainder_l = %f\n", __func__, l->mcache_remainder_l);
5253 dml2_printf("DML::%s: num_mcaches_l = %u\n", __func__, *p->num_mcaches_l);
5254 dml2_printf("DML::%s: avg_mcache_element_size_l = %u\n", __func__, l->avg_mcache_element_size_l);
5255 dml2_printf("DML::%s: mvmpg_access_width_l = %u\n", __func__, l->mvmpg_access_width_l);
5256 dml2_printf("DML::%s: mall_comb_mcache_l = %u\n", __func__, *p->mall_comb_mcache_l);
5257
5258 if (l->is_dual_plane) {
5259 dml2_printf("DML::%s: mvmpg_width_c = %u\n", __func__, l->mvmpg_width_c);
5260 dml2_printf("DML::%s: mvmpg_height_c = %u\n", __func__, l->mvmpg_height_c);
5261 dml2_printf("DML::%s: mcache_remainder_c = %f\n", __func__, l->mcache_remainder_c);
5262 dml2_printf("DML::%s: luma_time_factor = %f\n", __func__, l->luma_time_factor);
5263 dml2_printf("DML::%s: num_mcaches_c = %u\n", __func__, *p->num_mcaches_c);
5264 dml2_printf("DML::%s: avg_mcache_element_size_c = %u\n", __func__, l->avg_mcache_element_size_c);
5265 dml2_printf("DML::%s: mvmpg_access_width_c = %u\n", __func__, l->mvmpg_access_width_c);
5266 dml2_printf("DML::%s: mall_comb_mcache_c = %u\n", __func__, *p->mall_comb_mcache_c);
5267 dml2_printf("DML::%s: lc_comb_last_mcache_size = %u\n", __func__, l->lc_comb_last_mcache_size);
5268 dml2_printf("DML::%s: lc_comb_mcache = %u\n", __func__, *p->lc_comb_mcache);
5269 }
5270 #endif
5271 // calculate split_coordinate
5272 l->full_vp_access_width_l = p->surf_vert ? p->full_vp_height_l : p->full_vp_width_l;
5273 l->full_vp_access_width_c = p->surf_vert ? p->full_vp_height_c : p->full_vp_width_c;
5274
5275 for (n = 0; n < *p->num_mcaches_l - 1; n++) {
5276 p->mcache_offsets_l[n] = (unsigned int)(math_floor2((n + 1) * l->avg_mcache_element_size_l / l->mvmpg_access_width_l, 1)) * l->mvmpg_access_width_l;
5277 }
5278 p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l;
5279
5280 if (l->is_dual_plane) {
5281 for (n = 0; n < *p->num_mcaches_c - 1; n++) {
5282 p->mcache_offsets_c[n] = (unsigned int)(math_floor2((n + 1) * l->avg_mcache_element_size_c / l->mvmpg_access_width_c, 1)) * l->mvmpg_access_width_c;
5283 }
5284 p->mcache_offsets_c[*p->num_mcaches_c - 1] = l->full_vp_access_width_c;
5285 }
5286 #ifdef __DML_VBA_DEBUG__
5287 for (n = 0; n < *p->num_mcaches_l; n++)
5288 dml2_printf("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]);
5289
5290 if (l->is_dual_plane) {
5291 for (n = 0; n < *p->num_mcaches_c; n++)
5292 dml2_printf("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]);
5293 }
5294 #endif
5295
5296 // Luma/Chroma combine in the last mcache
5297 // In the case of Luma/Chroma combine-mCache (with lc_comb_mcache==1), all mCaches except the last segment are filled as much as possible, when stay aligned to mvmpg boundary
5298 if (*p->lc_comb_mcache && l->is_dual_plane) {
5299 for (n = 0; n < *p->num_mcaches_l - 1; n++)
5300 p->mcache_offsets_l[n] = (n + 1) * l->mvmpg_per_mcache_lb_l * l->mvmpg_access_width_l;
5301 p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l;
5302
5303 for (n = 0; n < *p->num_mcaches_c - 1; n++)
5304 p->mcache_offsets_c[n] = (n + 1) * l->mvmpg_per_mcache_lb_c * l->mvmpg_access_width_c;
5305 p->mcache_offsets_c[*p->num_mcaches_c - 1] = l->full_vp_access_width_c;
5306
5307 #ifdef __DML_VBA_DEBUG__
5308 for (n = 0; n < *p->num_mcaches_l; n++)
5309 dml2_printf("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]);
5310
5311 for (n = 0; n < *p->num_mcaches_c; n++)
5312 dml2_printf("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]);
5313 #endif
5314 }
5315
5316 *p->mcache_shift_granularity_l = l->mvmpg_access_width_l;
5317 *p->mcache_shift_granularity_c = l->mvmpg_access_width_c;
5318 }
5319
calculate_mall_bw_overhead_factor(double mall_prefetch_sdp_overhead_factor[],double mall_prefetch_dram_overhead_factor[],const struct dml2_display_cfg * display_cfg,unsigned int num_active_planes)5320 static void calculate_mall_bw_overhead_factor(
5321 double mall_prefetch_sdp_overhead_factor[], //mall_sdp_oh_nom/pref
5322 double mall_prefetch_dram_overhead_factor[], //mall_dram_oh_nom/pref
5323
5324 // input
5325 const struct dml2_display_cfg *display_cfg,
5326 unsigned int num_active_planes)
5327 {
5328 for (unsigned int k = 0; k < num_active_planes; ++k) {
5329 mall_prefetch_sdp_overhead_factor[k] = 1.0;
5330 mall_prefetch_dram_overhead_factor[k] = 1.0;
5331
5332 // SDP - on the return side
5333 if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall) // always no data return
5334 mall_prefetch_sdp_overhead_factor[k] = 1.25;
5335 else if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return)
5336 mall_prefetch_sdp_overhead_factor[k] = 0.25;
5337
5338 // DRAM
5339 if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall)
5340 mall_prefetch_dram_overhead_factor[k] = 2.0;
5341
5342 #ifdef __DML_VBA_DEBUG__
5343 dml2_printf("DML::%s: k=%u, mall_prefetch_sdp_overhead_factor = %f\n", __func__, k, mall_prefetch_sdp_overhead_factor[k]);
5344 dml2_printf("DML::%s: k=%u, mall_prefetch_dram_overhead_factor = %f\n", __func__, k, mall_prefetch_dram_overhead_factor[k]);
5345 #endif
5346 }
5347 }
5348
dml_get_return_bandwidth_available(const struct dml2_soc_bb * soc,enum dml2_core_internal_soc_state_type state_type,enum dml2_core_internal_bw_type bw_type,bool is_avg_bw,bool is_hvm_en,bool is_hvm_only,double dcflk_mhz,double fclk_mhz,double dram_bw_mbps)5349 static double dml_get_return_bandwidth_available(
5350 const struct dml2_soc_bb *soc,
5351 enum dml2_core_internal_soc_state_type state_type,
5352 enum dml2_core_internal_bw_type bw_type,
5353 bool is_avg_bw,
5354 bool is_hvm_en,
5355 bool is_hvm_only,
5356 double dcflk_mhz,
5357 double fclk_mhz,
5358 double dram_bw_mbps)
5359 {
5360 double return_bw_mbps = 0.;
5361 double ideal_sdp_bandwidth = (double)soc->return_bus_width_bytes * dcflk_mhz;
5362 double ideal_fabric_bandwidth = fclk_mhz * (double)soc->fabric_datapath_to_dcn_data_return_bytes;
5363 double ideal_dram_bandwidth = dram_bw_mbps; //dram_speed_mts * soc->clk_table.dram_config.channel_count * soc->clk_table.dram_config.channel_width_bytes;
5364
5365 double derate_sdp_factor = 1;
5366 double derate_fabric_factor = 1;
5367 double derate_dram_factor = 1;
5368
5369 if (is_avg_bw) {
5370 if (state_type == dml2_core_internal_soc_state_svp_prefetch) {
5371 derate_sdp_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.dcfclk_derate_percent / 100.0;
5372 derate_fabric_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.fclk_derate_percent / 100.0;
5373 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.dram_derate_percent_pixel / 100.0;
5374 } else { // just assume sys_active
5375 derate_sdp_factor = soc->qos_parameters.derate_table.system_active_average.dcfclk_derate_percent / 100.0;
5376 derate_fabric_factor = soc->qos_parameters.derate_table.system_active_average.fclk_derate_percent / 100.0;
5377 derate_dram_factor = soc->qos_parameters.derate_table.system_active_average.dram_derate_percent_pixel / 100.0;
5378 }
5379 } else { // urgent bw
5380 if (state_type == dml2_core_internal_soc_state_svp_prefetch) {
5381 derate_sdp_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dcfclk_derate_percent / 100.0;
5382 derate_fabric_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.fclk_derate_percent / 100.0;
5383 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100.0;
5384
5385 if (is_hvm_en) {
5386 if (is_hvm_only)
5387 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_vm / 100.0;
5388 else
5389 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel_and_vm / 100.0;
5390 } else {
5391 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100.0;
5392 }
5393 } else { // just assume sys_active
5394 derate_sdp_factor = soc->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0;
5395 derate_fabric_factor = soc->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100.0;
5396
5397 if (is_hvm_en) {
5398 if (is_hvm_only)
5399 derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_vm / 100.0;
5400 else
5401 derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel_and_vm / 100.0;
5402 } else {
5403 derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100.0;
5404 }
5405 }
5406 }
5407
5408 double derate_sdp_bandwidth = ideal_sdp_bandwidth * derate_sdp_factor;
5409 double derate_fabric_bandwidth = ideal_fabric_bandwidth * derate_fabric_factor;
5410 double derate_dram_bandwidth = ideal_dram_bandwidth * derate_dram_factor;
5411
5412 if (bw_type == dml2_core_internal_bw_sdp)
5413 return_bw_mbps = math_min2(derate_sdp_bandwidth, derate_fabric_bandwidth);
5414 else // dml2_core_internal_bw_dram
5415 return_bw_mbps = derate_dram_bandwidth;
5416
5417 #ifdef __DML_VBA_DEBUG__
5418 dml2_printf("DML::%s: is_avg_bw = %u\n", __func__, is_avg_bw);
5419 dml2_printf("DML::%s: is_hvm_en = %u\n", __func__, is_hvm_en);
5420 dml2_printf("DML::%s: is_hvm_only = %u\n", __func__, is_hvm_only);
5421 dml2_printf("DML::%s: state_type = %s\n", __func__, dml2_core_internal_soc_state_type_str(state_type));
5422 dml2_printf("DML::%s: bw_type = %s\n", __func__, dml2_core_internal_bw_type_str(bw_type));
5423 dml2_printf("DML::%s: dcflk_mhz = %f\n", __func__, dcflk_mhz);
5424 dml2_printf("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz);
5425 dml2_printf("DML::%s: ideal_sdp_bandwidth = %f\n", __func__, ideal_sdp_bandwidth);
5426 dml2_printf("DML::%s: ideal_fabric_bandwidth = %f\n", __func__, ideal_fabric_bandwidth);
5427 dml2_printf("DML::%s: ideal_dram_bandwidth = %f\n", __func__, ideal_dram_bandwidth);
5428 dml2_printf("DML::%s: derate_sdp_bandwidth = %f (derate %f)\n", __func__, derate_sdp_bandwidth, derate_sdp_factor);
5429 dml2_printf("DML::%s: derate_fabric_bandwidth = %f (derate %f)\n", __func__, derate_fabric_bandwidth, derate_fabric_factor);
5430 dml2_printf("DML::%s: derate_dram_bandwidth = %f (derate %f)\n", __func__, derate_dram_bandwidth, derate_dram_factor);
5431 dml2_printf("DML::%s: return_bw_mbps = %f\n", __func__, return_bw_mbps);
5432 #endif
5433 return return_bw_mbps;
5434 }
5435
calculate_bandwidth_available(double avg_bandwidth_available_min[dml2_core_internal_soc_state_max],double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available_min[dml2_core_internal_soc_state_max],double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_max],double urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_max],const struct dml2_soc_bb * soc,bool HostVMEnable,double dcfclk_mhz,double fclk_mhz,double dram_bw_mbps)5436 static void calculate_bandwidth_available(
5437 double avg_bandwidth_available_min[dml2_core_internal_soc_state_max],
5438 double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
5439 double urg_bandwidth_available_min[dml2_core_internal_soc_state_max], // min between SDP and DRAM
5440 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
5441 double urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_max],
5442 double urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_max],
5443
5444 const struct dml2_soc_bb *soc,
5445 bool HostVMEnable,
5446 double dcfclk_mhz,
5447 double fclk_mhz,
5448 double dram_bw_mbps)
5449 {
5450 unsigned int n, m;
5451
5452 dml2_printf("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz);
5453 dml2_printf("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz);
5454 dml2_printf("DML::%s: dram_bw_mbps = %f\n", __func__, dram_bw_mbps);
5455
5456 // Calculate all the bandwidth availabe
5457 for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
5458 for (n = 0; n < dml2_core_internal_bw_max; n++) {
5459 avg_bandwidth_available[m][n] = dml_get_return_bandwidth_available(soc,
5460 m, // soc_state
5461 n, // bw_type
5462 1, // avg_bw
5463 HostVMEnable,
5464 0, // hvm_only
5465 dcfclk_mhz,
5466 fclk_mhz,
5467 dram_bw_mbps);
5468
5469 urg_bandwidth_available[m][n] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 0, dcfclk_mhz, fclk_mhz, dram_bw_mbps);
5470
5471
5472 dml2_printf("DML::%s: avg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), avg_bandwidth_available[m][n]);
5473 dml2_printf("DML::%s: urg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_bandwidth_available[m][n]);
5474
5475 // urg_bandwidth_available_vm_only is indexed by soc_state
5476 if (n == dml2_core_internal_bw_dram) {
5477 urg_bandwidth_available_vm_only[m] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 1, dcfclk_mhz, fclk_mhz, dram_bw_mbps);
5478 urg_bandwidth_available_pixel_and_vm[m] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 0, dcfclk_mhz, fclk_mhz, dram_bw_mbps);
5479 }
5480 }
5481
5482 avg_bandwidth_available_min[m] = math_min2(avg_bandwidth_available[m][dml2_core_internal_bw_dram], avg_bandwidth_available[m][dml2_core_internal_bw_sdp]);
5483 urg_bandwidth_available_min[m] = math_min2(urg_bandwidth_available[m][dml2_core_internal_bw_dram], urg_bandwidth_available[m][dml2_core_internal_bw_sdp]);
5484
5485 #ifdef __DML_VBA_DEBUG__
5486 dml2_printf("DML::%s: avg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), avg_bandwidth_available_min[m]);
5487 dml2_printf("DML::%s: urg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_min[m]);
5488 dml2_printf("DML::%s: urg_bandwidth_available_vm_only[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_vm_only[n]);
5489 #endif
5490 }
5491 }
5492
calculate_avg_bandwidth_required(double avg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],const struct dml2_display_cfg * display_cfg,unsigned int num_active_planes,double ReadBandwidthLuma[],double ReadBandwidthChroma[],double cursor_bw[],double dcc_dram_bw_nom_overhead_factor_p0[],double dcc_dram_bw_nom_overhead_factor_p1[],double mall_prefetch_dram_overhead_factor[],double mall_prefetch_sdp_overhead_factor[])5493 static void calculate_avg_bandwidth_required(
5494 double avg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
5495
5496 // input
5497 const struct dml2_display_cfg *display_cfg,
5498 unsigned int num_active_planes,
5499 double ReadBandwidthLuma[],
5500 double ReadBandwidthChroma[],
5501 double cursor_bw[],
5502 double dcc_dram_bw_nom_overhead_factor_p0[],
5503 double dcc_dram_bw_nom_overhead_factor_p1[],
5504 double mall_prefetch_dram_overhead_factor[],
5505 double mall_prefetch_sdp_overhead_factor[])
5506 {
5507 unsigned int n, m, k;
5508
5509 // Average BW support check
5510 for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
5511 for (n = 0; n < dml2_core_internal_bw_max; n++) { // sdp, dram
5512 avg_bandwidth_required[m][n] = 0;
5513 }
5514 }
5515
5516 // SysActive and SVP Prefetch AVG bandwidth Check
5517 for (k = 0; k < num_active_planes; ++k) {
5518 #ifdef __DML_VBA_DEBUG__
5519 dml2_printf("DML::%s: plane %0d\n", __func__, k);
5520 dml2_printf("DML::%s: ReadBandwidthLuma=%f\n", __func__, ReadBandwidthLuma[k]);
5521 dml2_printf("DML::%s: ReadBandwidthChroma=%f\n", __func__, ReadBandwidthChroma[k]);
5522 dml2_printf("DML::%s: dcc_dram_bw_nom_overhead_factor_p0=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p0[k]);
5523 dml2_printf("DML::%s: dcc_dram_bw_nom_overhead_factor_p1=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p1[k]);
5524 dml2_printf("DML::%s: mall_prefetch_dram_overhead_factor=%f\n", __func__, mall_prefetch_dram_overhead_factor[k]);
5525 dml2_printf("DML::%s: mall_prefetch_sdp_overhead_factor=%f\n", __func__, mall_prefetch_sdp_overhead_factor[k]);
5526 #endif
5527
5528 double sdp_overhead_factor = mall_prefetch_sdp_overhead_factor[k];
5529 double dram_overhead_factor_p0 = dcc_dram_bw_nom_overhead_factor_p0[k] * mall_prefetch_dram_overhead_factor[k];
5530 double dram_overhead_factor_p1 = dcc_dram_bw_nom_overhead_factor_p1[k] * mall_prefetch_dram_overhead_factor[k];
5531
5532 // FIXME_DCN4, was missing cursor_bw in here, but do I actually need that and tdlut bw for average bandwidth calculation?
5533 // active avg bw not include phantom, but svp_prefetch avg bw should include phantom pipes
5534 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
5535 avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] += sdp_overhead_factor * (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) + cursor_bw[k];
5536 avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k];
5537 }
5538 avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] += sdp_overhead_factor * (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) + cursor_bw[k];
5539 avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k];
5540
5541 #ifdef __DML_VBA_DEBUG__
5542 dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
5543 dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
5544 dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
5545 dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
5546 #endif
5547 }
5548 }
5549
CalculateVMRowAndSwath(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculateVMRowAndSwath_params * p)5550 static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch,
5551 struct dml2_core_calcs_CalculateVMRowAndSwath_params *p)
5552 {
5553 struct dml2_core_calcs_CalculateVMRowAndSwath_locals *s = &scratch->CalculateVMRowAndSwath_locals;
5554
5555 s->HostVMDynamicLevels = CalculateHostVMDynamicLevels(p->display_cfg->gpuvm_enable, p->display_cfg->hostvm_enable, p->HostVMMinPageSize, p->display_cfg->hostvm_max_non_cached_page_table_levels);
5556
5557 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
5558 if (p->display_cfg->hostvm_enable == true) {
5559 p->vm_group_bytes[k] = 512;
5560 p->dpte_group_bytes[k] = 512;
5561 } else if (p->display_cfg->gpuvm_enable == true) {
5562 p->vm_group_bytes[k] = 2048;
5563 if (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes >= 64 && dml_is_vertical_rotation(p->myPipe[k].RotationAngle)) {
5564 p->dpte_group_bytes[k] = 512;
5565 } else {
5566 p->dpte_group_bytes[k] = 2048;
5567 }
5568 } else {
5569 p->vm_group_bytes[k] = 0;
5570 p->dpte_group_bytes[k] = 0;
5571 }
5572
5573 if (dml2_core_shared_is_420(p->myPipe[k].SourcePixelFormat) || p->myPipe[k].SourcePixelFormat == dml2_rgbe_alpha) {
5574 if ((p->myPipe[k].SourcePixelFormat == dml2_420_10 || p->myPipe[k].SourcePixelFormat == dml2_420_12) && !dml_is_vertical_rotation(p->myPipe[k].RotationAngle)) {
5575 s->PTEBufferSizeInRequestsForLuma[k] = (p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma) / 2;
5576 s->PTEBufferSizeInRequestsForChroma[k] = s->PTEBufferSizeInRequestsForLuma[k];
5577 } else {
5578 s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma;
5579 s->PTEBufferSizeInRequestsForChroma[k] = p->PTEBufferSizeInRequestsChroma;
5580 }
5581
5582 scratch->calculate_vm_and_row_bytes_params.ViewportStationary = p->myPipe[k].ViewportStationary;
5583 scratch->calculate_vm_and_row_bytes_params.DCCEnable = p->myPipe[k].DCCEnable;
5584 scratch->calculate_vm_and_row_bytes_params.NumberOfDPPs = p->myPipe[k].DPPPerSurface;
5585 scratch->calculate_vm_and_row_bytes_params.BlockHeight256Bytes = p->myPipe[k].BlockHeight256BytesC;
5586 scratch->calculate_vm_and_row_bytes_params.BlockWidth256Bytes = p->myPipe[k].BlockWidth256BytesC;
5587 scratch->calculate_vm_and_row_bytes_params.SourcePixelFormat = p->myPipe[k].SourcePixelFormat;
5588 scratch->calculate_vm_and_row_bytes_params.SurfaceTiling = p->myPipe[k].SurfaceTiling;
5589 scratch->calculate_vm_and_row_bytes_params.BytePerPixel = p->myPipe[k].BytePerPixelC;
5590 scratch->calculate_vm_and_row_bytes_params.RotationAngle = p->myPipe[k].RotationAngle;
5591 scratch->calculate_vm_and_row_bytes_params.SwathWidth = p->SwathWidthC[k];
5592 scratch->calculate_vm_and_row_bytes_params.ViewportHeight = p->myPipe[k].ViewportHeightC;
5593 scratch->calculate_vm_and_row_bytes_params.ViewportXStart = p->myPipe[k].ViewportXStartC;
5594 scratch->calculate_vm_and_row_bytes_params.ViewportYStart = p->myPipe[k].ViewportYStartC;
5595 scratch->calculate_vm_and_row_bytes_params.GPUVMEnable = p->display_cfg->gpuvm_enable;
5596 scratch->calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = p->display_cfg->gpuvm_max_page_table_levels;
5597 scratch->calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
5598 scratch->calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = s->PTEBufferSizeInRequestsForChroma[k];
5599 scratch->calculate_vm_and_row_bytes_params.Pitch = p->myPipe[k].PitchC;
5600 scratch->calculate_vm_and_row_bytes_params.MacroTileWidth = p->myPipe[k].BlockWidthC;
5601 scratch->calculate_vm_and_row_bytes_params.MacroTileHeight = p->myPipe[k].BlockHeightC;
5602 scratch->calculate_vm_and_row_bytes_params.is_phantom = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]);
5603 scratch->calculate_vm_and_row_bytes_params.DCCMetaPitch = p->myPipe[k].DCCMetaPitchC;
5604 scratch->calculate_vm_and_row_bytes_params.mrq_present = p->mrq_present;
5605
5606 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &s->PixelPTEBytesPerRowC[k];
5607 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &s->PixelPTEBytesPerRowStorageC[k];
5608 scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub = &p->dpte_row_width_chroma_ub[k];
5609 scratch->calculate_vm_and_row_bytes_params.dpte_row_height = &p->dpte_row_height_chroma[k];
5610 scratch->calculate_vm_and_row_bytes_params.dpte_row_height_linear = &p->dpte_row_height_linear_chroma[k];
5611 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &s->PixelPTEBytesPerRowC_one_row_per_frame[k];
5612 scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &s->dpte_row_width_chroma_ub_one_row_per_frame[k];
5613 scratch->calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &s->dpte_row_height_chroma_one_row_per_frame[k];
5614 scratch->calculate_vm_and_row_bytes_params.vmpg_width = &p->vmpg_width_c[k];
5615 scratch->calculate_vm_and_row_bytes_params.vmpg_height = &p->vmpg_height_c[k];
5616 scratch->calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &p->PixelPTEReqWidthC[k];
5617 scratch->calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &p->PixelPTEReqHeightC[k];
5618 scratch->calculate_vm_and_row_bytes_params.PTERequestSize = &p->PTERequestSizeC[k];
5619 scratch->calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &p->dpde0_bytes_per_frame_ub_c[k];
5620
5621 scratch->calculate_vm_and_row_bytes_params.meta_row_bytes = &s->meta_row_bytes_per_row_ub_c[k];
5622 scratch->calculate_vm_and_row_bytes_params.MetaRequestWidth = &p->meta_req_width_chroma[k];
5623 scratch->calculate_vm_and_row_bytes_params.MetaRequestHeight = &p->meta_req_height_chroma[k];
5624 scratch->calculate_vm_and_row_bytes_params.meta_row_width = &p->meta_row_width_chroma[k];
5625 scratch->calculate_vm_and_row_bytes_params.meta_row_height = &p->meta_row_height_chroma[k];
5626 scratch->calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &p->meta_pte_bytes_per_frame_ub_c[k];
5627
5628 s->vm_bytes_c = CalculateVMAndRowBytes(&scratch->calculate_vm_and_row_bytes_params);
5629
5630 p->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
5631 p->myPipe[k].VRatioChroma,
5632 p->myPipe[k].VTapsChroma,
5633 p->myPipe[k].InterlaceEnable,
5634 p->myPipe[k].ProgressiveToInterlaceUnitInOPP,
5635 p->myPipe[k].SwathHeightC,
5636 p->myPipe[k].RotationAngle,
5637 p->myPipe[k].mirrored,
5638 p->myPipe[k].ViewportStationary,
5639 p->SwathWidthC[k],
5640 p->myPipe[k].ViewportHeightC,
5641 p->myPipe[k].ViewportXStartC,
5642 p->myPipe[k].ViewportYStartC,
5643
5644 // Output
5645 &p->VInitPreFillC[k],
5646 &p->MaxNumSwathC[k]);
5647 } else {
5648 s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma;
5649 s->PTEBufferSizeInRequestsForChroma[k] = 0;
5650 s->PixelPTEBytesPerRowC[k] = 0;
5651 s->PixelPTEBytesPerRowStorageC[k] = 0;
5652 s->vm_bytes_c = 0;
5653 p->MaxNumSwathC[k] = 0;
5654 p->PrefetchSourceLinesC[k] = 0;
5655 s->dpte_row_height_chroma_one_row_per_frame[k] = 0;
5656 s->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
5657 s->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
5658 }
5659
5660 scratch->calculate_vm_and_row_bytes_params.ViewportStationary = p->myPipe[k].ViewportStationary;
5661 scratch->calculate_vm_and_row_bytes_params.DCCEnable = p->myPipe[k].DCCEnable;
5662 scratch->calculate_vm_and_row_bytes_params.NumberOfDPPs = p->myPipe[k].DPPPerSurface;
5663 scratch->calculate_vm_and_row_bytes_params.BlockHeight256Bytes = p->myPipe[k].BlockHeight256BytesY;
5664 scratch->calculate_vm_and_row_bytes_params.BlockWidth256Bytes = p->myPipe[k].BlockWidth256BytesY;
5665 scratch->calculate_vm_and_row_bytes_params.SourcePixelFormat = p->myPipe[k].SourcePixelFormat;
5666 scratch->calculate_vm_and_row_bytes_params.SurfaceTiling = p->myPipe[k].SurfaceTiling;
5667 scratch->calculate_vm_and_row_bytes_params.BytePerPixel = p->myPipe[k].BytePerPixelY;
5668 scratch->calculate_vm_and_row_bytes_params.RotationAngle = p->myPipe[k].RotationAngle;
5669 scratch->calculate_vm_and_row_bytes_params.SwathWidth = p->SwathWidthY[k];
5670 scratch->calculate_vm_and_row_bytes_params.ViewportHeight = p->myPipe[k].ViewportHeight;
5671 scratch->calculate_vm_and_row_bytes_params.ViewportXStart = p->myPipe[k].ViewportXStart;
5672 scratch->calculate_vm_and_row_bytes_params.ViewportYStart = p->myPipe[k].ViewportYStart;
5673 scratch->calculate_vm_and_row_bytes_params.GPUVMEnable = p->display_cfg->gpuvm_enable;
5674 scratch->calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = p->display_cfg->gpuvm_max_page_table_levels;
5675 scratch->calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
5676 scratch->calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = s->PTEBufferSizeInRequestsForLuma[k];
5677 scratch->calculate_vm_and_row_bytes_params.Pitch = p->myPipe[k].PitchY;
5678 scratch->calculate_vm_and_row_bytes_params.MacroTileWidth = p->myPipe[k].BlockWidthY;
5679 scratch->calculate_vm_and_row_bytes_params.MacroTileHeight = p->myPipe[k].BlockHeightY;
5680 scratch->calculate_vm_and_row_bytes_params.is_phantom = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]);
5681 scratch->calculate_vm_and_row_bytes_params.DCCMetaPitch = p->myPipe[k].DCCMetaPitchY;
5682 scratch->calculate_vm_and_row_bytes_params.mrq_present = p->mrq_present;
5683
5684 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &s->PixelPTEBytesPerRowY[k];
5685 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &s->PixelPTEBytesPerRowStorageY[k];
5686 scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub = &p->dpte_row_width_luma_ub[k];
5687 scratch->calculate_vm_and_row_bytes_params.dpte_row_height = &p->dpte_row_height_luma[k];
5688 scratch->calculate_vm_and_row_bytes_params.dpte_row_height_linear = &p->dpte_row_height_linear_luma[k];
5689 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &s->PixelPTEBytesPerRowY_one_row_per_frame[k];
5690 scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &s->dpte_row_width_luma_ub_one_row_per_frame[k];
5691 scratch->calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &s->dpte_row_height_luma_one_row_per_frame[k];
5692 scratch->calculate_vm_and_row_bytes_params.vmpg_width = &p->vmpg_width_y[k];
5693 scratch->calculate_vm_and_row_bytes_params.vmpg_height = &p->vmpg_height_y[k];
5694 scratch->calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &p->PixelPTEReqWidthY[k];
5695 scratch->calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &p->PixelPTEReqHeightY[k];
5696 scratch->calculate_vm_and_row_bytes_params.PTERequestSize = &p->PTERequestSizeY[k];
5697 scratch->calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &p->dpde0_bytes_per_frame_ub_l[k];
5698
5699 scratch->calculate_vm_and_row_bytes_params.meta_row_bytes = &s->meta_row_bytes_per_row_ub_l[k];
5700 scratch->calculate_vm_and_row_bytes_params.MetaRequestWidth = &p->meta_req_width_luma[k];
5701 scratch->calculate_vm_and_row_bytes_params.MetaRequestHeight = &p->meta_req_height_luma[k];
5702 scratch->calculate_vm_and_row_bytes_params.meta_row_width = &p->meta_row_width_luma[k];
5703 scratch->calculate_vm_and_row_bytes_params.meta_row_height = &p->meta_row_height_luma[k];
5704 scratch->calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &p->meta_pte_bytes_per_frame_ub_l[k];
5705
5706 s->vm_bytes_l = CalculateVMAndRowBytes(&scratch->calculate_vm_and_row_bytes_params);
5707
5708 p->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
5709 p->myPipe[k].VRatio,
5710 p->myPipe[k].VTaps,
5711 p->myPipe[k].InterlaceEnable,
5712 p->myPipe[k].ProgressiveToInterlaceUnitInOPP,
5713 p->myPipe[k].SwathHeightY,
5714 p->myPipe[k].RotationAngle,
5715 p->myPipe[k].mirrored,
5716 p->myPipe[k].ViewportStationary,
5717 p->SwathWidthY[k],
5718 p->myPipe[k].ViewportHeight,
5719 p->myPipe[k].ViewportXStart,
5720 p->myPipe[k].ViewportYStart,
5721
5722 // Output
5723 &p->VInitPreFillY[k],
5724 &p->MaxNumSwathY[k]);
5725
5726 #ifdef __DML_VBA_DEBUG__
5727 dml2_printf("DML::%s: k=%u, vm_bytes_l = %u (before hvm level)\n", __func__, k, s->vm_bytes_l);
5728 dml2_printf("DML::%s: k=%u, vm_bytes_c = %u (before hvm level)\n", __func__, k, s->vm_bytes_c);
5729 dml2_printf("DML::%s: k=%u, meta_row_bytes_per_row_ub_l = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_l[k]);
5730 dml2_printf("DML::%s: k=%u, meta_row_bytes_per_row_ub_c = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_c[k]);
5731 #endif
5732 p->vm_bytes[k] = (s->vm_bytes_l + s->vm_bytes_c) * (1 + 8 * s->HostVMDynamicLevels);
5733 p->meta_row_bytes[k] = s->meta_row_bytes_per_row_ub_l[k] + s->meta_row_bytes_per_row_ub_c[k];
5734
5735 #ifdef __DML_VBA_DEBUG__
5736 dml2_printf("DML::%s: k=%u, meta_row_bytes = %u\n", __func__, k, p->meta_row_bytes[k]);
5737 dml2_printf("DML::%s: k=%u, vm_bytes = %u (after hvm level)\n", __func__, k, p->vm_bytes[k]);
5738 #endif
5739 if (s->PixelPTEBytesPerRowStorageY[k] <= 64 * s->PTEBufferSizeInRequestsForLuma[k] && s->PixelPTEBytesPerRowStorageC[k] <= 64 * s->PTEBufferSizeInRequestsForChroma[k]) {
5740 p->PTEBufferSizeNotExceeded[k] = true;
5741 } else {
5742 p->PTEBufferSizeNotExceeded[k] = false;
5743 }
5744
5745 s->one_row_per_frame_fits_in_buffer[k] = (s->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForLuma[k] &&
5746 s->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForChroma[k]);
5747 #ifdef __DML_VBA_DEBUG__
5748 if (p->PTEBufferSizeNotExceeded[k] == 0 || s->one_row_per_frame_fits_in_buffer[k] == 0) {
5749 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
5750 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
5751 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowStorageY = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageY[k]);
5752 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowStorageC = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageC[k]);
5753 dml2_printf("DML::%s: k=%u, PTEBufferSizeInRequestsForLuma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForLuma[k]);
5754 dml2_printf("DML::%s: k=%u, PTEBufferSizeInRequestsForChroma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForChroma[k]);
5755 dml2_printf("DML::%s: k=%u, PTEBufferSizeNotExceeded (not one_row_per_frame) = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
5756
5757 dml2_printf("DML::%s: k=%u, HostVMDynamicLevels = %u\n", __func__, k, s->HostVMDynamicLevels);
5758 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowY_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowY_one_row_per_frame[k]);
5759 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowC_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowC_one_row_per_frame[k]);
5760 dml2_printf("DML::%s: k=%u, one_row_per_frame_fits_in_buffer = %u\n", __func__, k, s->one_row_per_frame_fits_in_buffer[k]);
5761 }
5762 #endif
5763 }
5764
5765 CalculateMALLUseForStaticScreen(
5766 p->display_cfg,
5767 p->NumberOfActiveSurfaces,
5768 p->MALLAllocatedForDCN,
5769 p->SurfaceSizeInMALL,
5770 s->one_row_per_frame_fits_in_buffer,
5771 // Output
5772 p->is_using_mall_for_ss);
5773
5774 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
5775 if (p->display_cfg->gpuvm_enable) {
5776 if (p->display_cfg->plane_descriptors[k].overrides.hw.force_pte_buffer_mode.enable == 1) {
5777 p->PTE_BUFFER_MODE[k] = p->display_cfg->plane_descriptors[k].overrides.hw.force_pte_buffer_mode.value;
5778 }
5779 p->PTE_BUFFER_MODE[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) ||
5780 dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64);
5781 p->BIGK_FRAGMENT_SIZE[k] = (unsigned int)(math_log((float)p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes * 1024, 2) - 12);
5782 } else {
5783 p->PTE_BUFFER_MODE[k] = 0;
5784 p->BIGK_FRAGMENT_SIZE[k] = 0;
5785 }
5786 }
5787
5788 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
5789 p->DCCMetaBufferSizeNotExceeded[k] = true;
5790 #ifdef __DML_VBA_DEBUG__
5791 dml2_printf("DML::%s: k=%u, SurfaceSizeInMALL = %u\n", __func__, k, p->SurfaceSizeInMALL[k]);
5792 dml2_printf("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, p->is_using_mall_for_ss[k]);
5793 #endif
5794 p->use_one_row_for_frame[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) ||
5795 (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64 && dml_is_vertical_rotation(p->myPipe[k].RotationAngle));
5796
5797 p->use_one_row_for_frame_flip[k] = p->use_one_row_for_frame[k] && !(p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame);
5798
5799 if (p->use_one_row_for_frame[k]) {
5800 p->dpte_row_height_luma[k] = s->dpte_row_height_luma_one_row_per_frame[k];
5801 p->dpte_row_width_luma_ub[k] = s->dpte_row_width_luma_ub_one_row_per_frame[k];
5802 s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY_one_row_per_frame[k];
5803 p->dpte_row_height_chroma[k] = s->dpte_row_height_chroma_one_row_per_frame[k];
5804 p->dpte_row_width_chroma_ub[k] = s->dpte_row_width_chroma_ub_one_row_per_frame[k];
5805 s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC_one_row_per_frame[k];
5806 p->PTEBufferSizeNotExceeded[k] = s->one_row_per_frame_fits_in_buffer[k];
5807 }
5808
5809 if (p->meta_row_bytes[k] <= p->DCCMetaBufferSizeBytes) {
5810 p->DCCMetaBufferSizeNotExceeded[k] = true;
5811 } else {
5812 p->DCCMetaBufferSizeNotExceeded[k] = false;
5813
5814 #ifdef __DML_VBA_DEBUG__
5815 dml2_printf("DML::%s: k=%d, meta_row_bytes = %d\n", __func__, k, p->meta_row_bytes[k]);
5816 dml2_printf("DML::%s: k=%d, DCCMetaBufferSizeBytes = %d\n", __func__, k, p->DCCMetaBufferSizeBytes);
5817 dml2_printf("DML::%s: k=%d, DCCMetaBufferSizeNotExceeded = %d\n", __func__, k, p->DCCMetaBufferSizeNotExceeded[k]);
5818 #endif
5819 }
5820
5821 s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY[k] * (1 + 8 * s->HostVMDynamicLevels);
5822 s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC[k] * (1 + 8 * s->HostVMDynamicLevels);
5823 p->PixelPTEBytesPerRow[k] = s->PixelPTEBytesPerRowY[k] + s->PixelPTEBytesPerRowC[k];
5824
5825 // if one row of dPTEs is meant to span the entire frame, then for these calculations, we will pretend like that one big row is fetched in two halfs
5826 if (p->use_one_row_for_frame[k])
5827 p->PixelPTEBytesPerRow[k] = p->PixelPTEBytesPerRow[k] / 2;
5828
5829 CalculateRowBandwidth(
5830 p->display_cfg->gpuvm_enable,
5831 p->use_one_row_for_frame[k],
5832 p->myPipe[k].SourcePixelFormat,
5833 p->myPipe[k].VRatio,
5834 p->myPipe[k].VRatioChroma,
5835 p->myPipe[k].DCCEnable,
5836 p->myPipe[k].HTotal / p->myPipe[k].PixelClock,
5837 s->PixelPTEBytesPerRowY[k],
5838 s->PixelPTEBytesPerRowC[k],
5839 p->dpte_row_height_luma[k],
5840 p->dpte_row_height_chroma[k],
5841
5842 p->mrq_present,
5843 s->meta_row_bytes_per_row_ub_l[k],
5844 s->meta_row_bytes_per_row_ub_c[k],
5845 p->meta_row_height_luma[k],
5846 p->meta_row_height_chroma[k],
5847
5848 // Output
5849 &p->dpte_row_bw[k],
5850 &p->meta_row_bw[k]);
5851 #ifdef __DML_VBA_DEBUG__
5852 dml2_printf("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]);
5853 dml2_printf("DML::%s: k=%u, use_one_row_for_frame_flip = %u\n", __func__, k, p->use_one_row_for_frame_flip[k]);
5854 dml2_printf("DML::%s: k=%u, UseMALLForPStateChange = %u\n", __func__, k, p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config);
5855 dml2_printf("DML::%s: k=%u, dpte_row_height_luma = %u\n", __func__, k, p->dpte_row_height_luma[k]);
5856 dml2_printf("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]);
5857 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
5858 dml2_printf("DML::%s: k=%u, dpte_row_height_chroma = %u\n", __func__, k, p->dpte_row_height_chroma[k]);
5859 dml2_printf("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]);
5860 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
5861 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRow = %u\n", __func__, k, p->PixelPTEBytesPerRow[k]);
5862 dml2_printf("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
5863 dml2_printf("DML::%s: k=%u, gpuvm_enable = %u\n", __func__, k, p->display_cfg->gpuvm_enable);
5864 dml2_printf("DML::%s: k=%u, PTE_BUFFER_MODE = %u\n", __func__, k, p->PTE_BUFFER_MODE[k]);
5865 dml2_printf("DML::%s: k=%u, BIGK_FRAGMENT_SIZE = %u\n", __func__, k, p->BIGK_FRAGMENT_SIZE[k]);
5866 #endif
5867 }
5868 }
5869
CalculateUrgentLatency(double UrgentLatencyPixelDataOnly,double UrgentLatencyPixelMixedWithVMData,double UrgentLatencyVMDataOnly,bool DoUrgentLatencyAdjustment,double UrgentLatencyAdjustmentFabricClockComponent,double UrgentLatencyAdjustmentFabricClockReference,double FabricClock,double uclk_freq_mhz,enum dml2_qos_param_type qos_type,unsigned int urgent_ramp_uclk_cycles,unsigned int df_qos_response_time_fclk_cycles,unsigned int max_round_trip_to_furthest_cs_fclk_cycles,unsigned int mall_overhead_fclk_cycles,double umc_urgent_ramp_latency_margin,double fabric_max_transport_latency_margin)5870 static double CalculateUrgentLatency(
5871 double UrgentLatencyPixelDataOnly,
5872 double UrgentLatencyPixelMixedWithVMData,
5873 double UrgentLatencyVMDataOnly,
5874 bool DoUrgentLatencyAdjustment,
5875 double UrgentLatencyAdjustmentFabricClockComponent,
5876 double UrgentLatencyAdjustmentFabricClockReference,
5877 double FabricClock,
5878 double uclk_freq_mhz,
5879 enum dml2_qos_param_type qos_type,
5880 unsigned int urgent_ramp_uclk_cycles,
5881 unsigned int df_qos_response_time_fclk_cycles,
5882 unsigned int max_round_trip_to_furthest_cs_fclk_cycles,
5883 unsigned int mall_overhead_fclk_cycles,
5884 double umc_urgent_ramp_latency_margin,
5885 double fabric_max_transport_latency_margin)
5886 {
5887 double urgent_latency = 0;
5888 if (qos_type == dml2_qos_param_type_dcn4x) {
5889 urgent_latency = (df_qos_response_time_fclk_cycles + mall_overhead_fclk_cycles) / FabricClock
5890 + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1 + fabric_max_transport_latency_margin / 100.0)
5891 + urgent_ramp_uclk_cycles / uclk_freq_mhz * (1 + umc_urgent_ramp_latency_margin / 100.0);
5892 } else {
5893 urgent_latency = math_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
5894 if (DoUrgentLatencyAdjustment == true) {
5895 urgent_latency = urgent_latency + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
5896 }
5897 }
5898 #ifdef __DML_VBA_DEBUG__
5899 if (qos_type == dml2_qos_param_type_dcn4x) {
5900 dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type);
5901 dml2_printf("DML::%s: urgent_ramp_uclk_cycles = %d\n", __func__, urgent_ramp_uclk_cycles);
5902 dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
5903 dml2_printf("DML::%s: umc_urgent_ramp_latency_margin = %f\n", __func__, umc_urgent_ramp_latency_margin);
5904 } else {
5905 dml2_printf("DML::%s: UrgentLatencyPixelDataOnly = %f\n", __func__, UrgentLatencyPixelDataOnly);
5906 dml2_printf("DML::%s: UrgentLatencyPixelMixedWithVMData = %f\n", __func__, UrgentLatencyPixelMixedWithVMData);
5907 dml2_printf("DML::%s: UrgentLatencyVMDataOnly = %f\n", __func__, UrgentLatencyVMDataOnly);
5908 dml2_printf("DML::%s: UrgentLatencyAdjustmentFabricClockComponent = %f\n", __func__, UrgentLatencyAdjustmentFabricClockComponent);
5909 dml2_printf("DML::%s: UrgentLatencyAdjustmentFabricClockReference = %f\n", __func__, UrgentLatencyAdjustmentFabricClockReference);
5910 }
5911 dml2_printf("DML::%s: FabricClock = %f\n", __func__, FabricClock);
5912 dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, urgent_latency);
5913 #endif
5914 return urgent_latency;
5915 }
5916
CalculateTripToMemory(double UrgLatency,double FabricClock,double uclk_freq_mhz,enum dml2_qos_param_type qos_type,unsigned int trip_to_memory_uclk_cycles,unsigned int max_round_trip_to_furthest_cs_fclk_cycles,unsigned int mall_overhead_fclk_cycles,double umc_max_latency_margin,double fabric_max_transport_latency_margin)5917 static double CalculateTripToMemory(
5918 double UrgLatency,
5919 double FabricClock,
5920 double uclk_freq_mhz,
5921 enum dml2_qos_param_type qos_type,
5922 unsigned int trip_to_memory_uclk_cycles,
5923 unsigned int max_round_trip_to_furthest_cs_fclk_cycles,
5924 unsigned int mall_overhead_fclk_cycles,
5925 double umc_max_latency_margin,
5926 double fabric_max_transport_latency_margin)
5927 {
5928 double trip_to_memory_us;
5929 if (qos_type == dml2_qos_param_type_dcn4x) {
5930 trip_to_memory_us = mall_overhead_fclk_cycles / FabricClock
5931 + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0)
5932 + trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0);
5933 } else {
5934 trip_to_memory_us = UrgLatency;
5935 }
5936
5937 #ifdef __DML_VBA_DEBUG__
5938 if (qos_type == dml2_qos_param_type_dcn4x) {
5939 dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type);
5940 dml2_printf("DML::%s: max_round_trip_to_furthest_cs_fclk_cycles = %d\n", __func__, max_round_trip_to_furthest_cs_fclk_cycles);
5941 dml2_printf("DML::%s: mall_overhead_fclk_cycles = %d\n", __func__, mall_overhead_fclk_cycles);
5942 dml2_printf("DML::%s: trip_to_memory_uclk_cycles = %d\n", __func__, trip_to_memory_uclk_cycles);
5943 dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
5944 dml2_printf("DML::%s: FabricClock = %f\n", __func__, FabricClock);
5945 dml2_printf("DML::%s: fabric_max_transport_latency_margin = %f\n", __func__, fabric_max_transport_latency_margin);
5946 dml2_printf("DML::%s: umc_max_latency_margin = %f\n", __func__, umc_max_latency_margin);
5947 } else {
5948 dml2_printf("DML::%s: UrgLatency = %f\n", __func__, UrgLatency);
5949 }
5950 dml2_printf("DML::%s: trip_to_memory_us = %f\n", __func__, trip_to_memory_us);
5951 #endif
5952
5953
5954 return trip_to_memory_us;
5955 }
5956
CalculateMetaTripToMemory(double UrgLatency,double FabricClock,double uclk_freq_mhz,enum dml2_qos_param_type qos_type,unsigned int meta_trip_to_memory_uclk_cycles,unsigned int meta_trip_to_memory_fclk_cycles,double umc_max_latency_margin,double fabric_max_transport_latency_margin)5957 static double CalculateMetaTripToMemory(
5958 double UrgLatency,
5959 double FabricClock,
5960 double uclk_freq_mhz,
5961 enum dml2_qos_param_type qos_type,
5962 unsigned int meta_trip_to_memory_uclk_cycles,
5963 unsigned int meta_trip_to_memory_fclk_cycles,
5964 double umc_max_latency_margin,
5965 double fabric_max_transport_latency_margin)
5966 {
5967 double meta_trip_to_memory_us;
5968 if (qos_type == dml2_qos_param_type_dcn4x) {
5969 meta_trip_to_memory_us = meta_trip_to_memory_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0)
5970 + meta_trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0);
5971 } else {
5972 meta_trip_to_memory_us = UrgLatency;
5973 }
5974
5975 #ifdef __DML_VBA_DEBUG__
5976 if (qos_type == dml2_qos_param_type_dcn4x) {
5977 dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type);
5978 dml2_printf("DML::%s: meta_trip_to_memory_fclk_cycles = %d\n", __func__, meta_trip_to_memory_fclk_cycles);
5979 dml2_printf("DML::%s: meta_trip_to_memory_uclk_cycles = %d\n", __func__, meta_trip_to_memory_uclk_cycles);
5980 dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
5981 } else {
5982 dml2_printf("DML::%s: UrgLatency = %f\n", __func__, UrgLatency);
5983 }
5984 dml2_printf("DML::%s: meta_trip_to_memory_us = %f\n", __func__, meta_trip_to_memory_us);
5985 #endif
5986
5987
5988 return meta_trip_to_memory_us;
5989 }
5990
calculate_cursor_req_attributes(unsigned int cursor_width,unsigned int cursor_bpp,unsigned int * cursor_lines_per_chunk,unsigned int * cursor_bytes_per_line,unsigned int * cursor_bytes_per_chunk,unsigned int * cursor_bytes)5991 static void calculate_cursor_req_attributes(
5992 unsigned int cursor_width,
5993 unsigned int cursor_bpp,
5994
5995 // output
5996 unsigned int *cursor_lines_per_chunk,
5997 unsigned int *cursor_bytes_per_line,
5998 unsigned int *cursor_bytes_per_chunk,
5999 unsigned int *cursor_bytes)
6000 {
6001 unsigned int cursor_pitch = 0;
6002 unsigned int cursor_bytes_per_req = 0;
6003 unsigned int cursor_width_bytes = 0;
6004 unsigned int cursor_height = 0;
6005
6006 //SW determines the cursor pitch to support the maximum cursor_width that will be used but the following restrictions apply.
6007 //- For 2bpp, cursor_pitch = 256 pixels due to min cursor request size of 64B
6008 //- For 32 or 64 bpp, cursor_pitch = 64, 128 or 256 pixels depending on the cursor width
6009 if (cursor_bpp == 2)
6010 cursor_pitch = 256;
6011 else
6012 cursor_pitch = (unsigned int)1 << (unsigned int)math_ceil2(math_log((float)cursor_width, 2), 1);
6013
6014 //The cursor requestor uses a cursor request size of 64B, 128B, or 256B depending on the cursor_width and cursor_bpp as follows.
6015
6016 cursor_width_bytes = (unsigned int)math_ceil2((double)cursor_width * cursor_bpp / 8, 1);
6017 if (cursor_width_bytes <= 64)
6018 cursor_bytes_per_req = 64;
6019 else if (cursor_width_bytes <= 128)
6020 cursor_bytes_per_req = 128;
6021 else
6022 cursor_bytes_per_req = 256;
6023
6024 //If cursor_width_bytes is greater than 256B, then multiple 256B requests are issued to fetch the entire cursor line.
6025 *cursor_bytes_per_line = (unsigned int)math_ceil2((double)cursor_width_bytes, cursor_bytes_per_req);
6026
6027 //Nominally, the cursor chunk is 1KB or 2KB but it is restricted to a power of 2 number of lines with a maximum of 16 lines.
6028 if (cursor_bpp == 2) {
6029 *cursor_lines_per_chunk = 16;
6030 } else if (cursor_bpp == 32) {
6031 if (cursor_width <= 32)
6032 *cursor_lines_per_chunk = 16;
6033 else if (cursor_width <= 64)
6034 *cursor_lines_per_chunk = 8;
6035 else if (cursor_width <= 128)
6036 *cursor_lines_per_chunk = 4;
6037 else
6038 *cursor_lines_per_chunk = 2;
6039 } else if (cursor_bpp == 64) {
6040 if (cursor_width <= 16)
6041 *cursor_lines_per_chunk = 16;
6042 else if (cursor_width <= 32)
6043 *cursor_lines_per_chunk = 8;
6044 else if (cursor_width <= 64)
6045 *cursor_lines_per_chunk = 4;
6046 else if (cursor_width <= 128)
6047 *cursor_lines_per_chunk = 2;
6048 else
6049 *cursor_lines_per_chunk = 1;
6050 } else {
6051 if (cursor_width > 0) {
6052 dml2_printf("DML::%s: Invalid cursor_bpp = %d\n", __func__, cursor_bpp);
6053 dml2_assert(0);
6054 }
6055 }
6056
6057 *cursor_bytes_per_chunk = *cursor_bytes_per_line * *cursor_lines_per_chunk;
6058
6059 // For the cursor implementation, all requested data is stored in the return buffer. Given this fact, the cursor_bytes can be directly compared with the CursorBufferSize.
6060 // Only cursor_width is provided for worst case sizing so assume that the cursor is square
6061 cursor_height = cursor_width;
6062 *cursor_bytes = *cursor_bytes_per_line * cursor_height;
6063 #ifdef __DML_VBA_DEBUG__
6064 dml2_printf("DML::%s: cursor_bpp = %d\n", __func__, cursor_bpp);
6065 dml2_printf("DML::%s: cursor_width = %d\n", __func__, cursor_width);
6066 dml2_printf("DML::%s: cursor_width_bytes = %d\n", __func__, cursor_width_bytes);
6067 dml2_printf("DML::%s: cursor_bytes_per_req = %d\n", __func__, cursor_bytes_per_req);
6068 dml2_printf("DML::%s: cursor_lines_per_chunk = %d\n", __func__, *cursor_lines_per_chunk);
6069 dml2_printf("DML::%s: cursor_bytes_per_line = %d\n", __func__, *cursor_bytes_per_line);
6070 dml2_printf("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, *cursor_bytes_per_chunk);
6071 dml2_printf("DML::%s: cursor_bytes = %d\n", __func__, *cursor_bytes);
6072 dml2_printf("DML::%s: cursor_pitch = %d\n", __func__, cursor_pitch);
6073 #endif
6074 }
6075
calculate_cursor_urgent_burst_factor(unsigned int CursorBufferSize,unsigned int CursorWidth,unsigned int cursor_bytes_per_chunk,unsigned int cursor_lines_per_chunk,double LineTime,double UrgentLatency,double * UrgentBurstFactorCursor,bool * NotEnoughUrgentLatencyHiding)6076 static void calculate_cursor_urgent_burst_factor(
6077 unsigned int CursorBufferSize,
6078 unsigned int CursorWidth,
6079 unsigned int cursor_bytes_per_chunk,
6080 unsigned int cursor_lines_per_chunk,
6081 double LineTime,
6082 double UrgentLatency,
6083
6084 double *UrgentBurstFactorCursor,
6085 bool *NotEnoughUrgentLatencyHiding)
6086 {
6087 unsigned int LinesInCursorBuffer = 0;
6088 double CursorBufferSizeInTime = 0;
6089
6090 if (CursorWidth > 0) {
6091 LinesInCursorBuffer = (unsigned int)math_floor2(CursorBufferSize * 1024.0 / (double)cursor_bytes_per_chunk, 1) * cursor_lines_per_chunk;
6092
6093 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime;
6094 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
6095 *NotEnoughUrgentLatencyHiding = 1;
6096 *UrgentBurstFactorCursor = 0;
6097 } else {
6098 *NotEnoughUrgentLatencyHiding = 0;
6099 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
6100 }
6101
6102 #ifdef __DML_VBA_DEBUG__
6103 dml2_printf("DML::%s: LinesInCursorBuffer = %u\n", __func__, LinesInCursorBuffer);
6104 dml2_printf("DML::%s: CursorBufferSizeInTime = %f\n", __func__, CursorBufferSizeInTime);
6105 dml2_printf("DML::%s: CursorBufferSize = %u (kbytes)\n", __func__, CursorBufferSize);
6106 dml2_printf("DML::%s: cursor_bytes_per_chunk = %u\n", __func__, cursor_bytes_per_chunk);
6107 dml2_printf("DML::%s: cursor_lines_per_chunk = %u\n", __func__, cursor_lines_per_chunk);
6108 dml2_printf("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, *UrgentBurstFactorCursor);
6109 dml2_printf("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding);
6110 #endif
6111
6112 }
6113 }
6114
CalculateUrgentBurstFactor(const struct dml2_plane_parameters * plane_cfg,unsigned int swath_width_luma_ub,unsigned int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double LineTime,double UrgentLatency,double VRatio,double VRatioC,double BytePerPixelInDETY,double BytePerPixelInDETC,unsigned int DETBufferSizeY,unsigned int DETBufferSizeC,double * UrgentBurstFactorLuma,double * UrgentBurstFactorChroma,bool * NotEnoughUrgentLatencyHiding)6115 static void CalculateUrgentBurstFactor(
6116 const struct dml2_plane_parameters *plane_cfg,
6117 unsigned int swath_width_luma_ub,
6118 unsigned int swath_width_chroma_ub,
6119 unsigned int SwathHeightY,
6120 unsigned int SwathHeightC,
6121 double LineTime,
6122 double UrgentLatency,
6123 double VRatio,
6124 double VRatioC,
6125 double BytePerPixelInDETY,
6126 double BytePerPixelInDETC,
6127 unsigned int DETBufferSizeY,
6128 unsigned int DETBufferSizeC,
6129 // Output
6130 double *UrgentBurstFactorLuma,
6131 double *UrgentBurstFactorChroma,
6132 bool *NotEnoughUrgentLatencyHiding)
6133 {
6134 double LinesInDETLuma;
6135 double LinesInDETChroma;
6136 double DETBufferSizeInTimeLuma;
6137 double DETBufferSizeInTimeChroma;
6138
6139 *NotEnoughUrgentLatencyHiding = 0;
6140 *UrgentBurstFactorLuma = 0;
6141 *UrgentBurstFactorChroma = 0;
6142
6143 #ifdef __DML_VBA_DEBUG__
6144 dml2_printf("DML::%s: VRatio = %f\n", __func__, VRatio);
6145 dml2_printf("DML::%s: VRatioC = %f\n", __func__, VRatioC);
6146 dml2_printf("DML::%s: DETBufferSizeY = %d\n", __func__, DETBufferSizeY);
6147 dml2_printf("DML::%s: DETBufferSizeC = %d\n", __func__, DETBufferSizeC);
6148 dml2_printf("DML::%s: BytePerPixelInDETY = %f\n", __func__, BytePerPixelInDETY);
6149 dml2_printf("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
6150 dml2_printf("DML::%s: LineTime = %f\n", __func__, LineTime);
6151 #endif
6152 DML2_ASSERT(VRatio > 0);
6153
6154 LinesInDETLuma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
6155
6156 DETBufferSizeInTimeLuma = math_floor2(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
6157 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
6158 *NotEnoughUrgentLatencyHiding = 1;
6159 *UrgentBurstFactorLuma = 0;
6160 } else {
6161 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
6162 }
6163
6164 if (BytePerPixelInDETC > 0) {
6165 LinesInDETChroma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC / swath_width_chroma_ub;
6166
6167 DETBufferSizeInTimeChroma = math_floor2(LinesInDETChroma, SwathHeightC) * LineTime / VRatioC;
6168 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
6169 *NotEnoughUrgentLatencyHiding = 1;
6170 *UrgentBurstFactorChroma = 0;
6171 } else {
6172 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
6173 }
6174 }
6175
6176 #ifdef __DML_VBA_DEBUG__
6177 dml2_printf("DML::%s: LinesInDETLuma = %f\n", __func__, LinesInDETLuma);
6178 dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
6179 dml2_printf("DML::%s: DETBufferSizeInTimeLuma = %f\n", __func__, DETBufferSizeInTimeLuma);
6180 dml2_printf("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, *UrgentBurstFactorLuma);
6181 dml2_printf("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, *UrgentBurstFactorChroma);
6182 dml2_printf("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding);
6183 #endif
6184
6185 }
6186
CalculateDCFCLKDeepSleep(const struct dml2_display_cfg * display_cfg,unsigned int NumberOfActiveSurfaces,unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],unsigned int SwathWidthY[],unsigned int SwathWidthC[],unsigned int DPPPerSurface[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int ReturnBusWidth,double * DCFClkDeepSleep)6187 static void CalculateDCFCLKDeepSleep(
6188 const struct dml2_display_cfg *display_cfg,
6189 unsigned int NumberOfActiveSurfaces,
6190 unsigned int BytePerPixelY[],
6191 unsigned int BytePerPixelC[],
6192 unsigned int SwathWidthY[],
6193 unsigned int SwathWidthC[],
6194 unsigned int DPPPerSurface[],
6195 double PSCL_THROUGHPUT[],
6196 double PSCL_THROUGHPUT_CHROMA[],
6197 double Dppclk[],
6198 double ReadBandwidthLuma[],
6199 double ReadBandwidthChroma[],
6200 unsigned int ReturnBusWidth,
6201
6202 // Output
6203 double *DCFClkDeepSleep)
6204 {
6205 double DisplayPipeLineDeliveryTimeLuma;
6206 double DisplayPipeLineDeliveryTimeChroma;
6207 double DCFClkDeepSleepPerSurface[DML2_MAX_PLANES];
6208
6209 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
6210 double pixel_rate_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
6211
6212 if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) {
6213 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_rate_mhz;
6214 } else {
6215 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
6216 }
6217 if (BytePerPixelC[k] == 0) {
6218 DisplayPipeLineDeliveryTimeChroma = 0;
6219 } else {
6220 if (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio <= 1) {
6221 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerSurface[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_rate_mhz;
6222 } else {
6223 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
6224 }
6225 }
6226
6227 if (BytePerPixelC[k] > 0) {
6228 DCFClkDeepSleepPerSurface[k] = math_max2(__DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
6229 __DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
6230 } else {
6231 DCFClkDeepSleepPerSurface[k] = __DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
6232 }
6233 DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], pixel_rate_mhz / 16);
6234
6235 #ifdef __DML_VBA_DEBUG__
6236 dml2_printf("DML::%s: k=%u, PixelClock = %f\n", __func__, k, pixel_rate_mhz);
6237 dml2_printf("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
6238 #endif
6239 }
6240
6241 double ReadBandwidth = 0.0;
6242 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
6243 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
6244 }
6245
6246 *DCFClkDeepSleep = math_max2(8.0, __DML2_CALCS_DCFCLK_FACTOR__ * ReadBandwidth / (double)ReturnBusWidth);
6247
6248 #ifdef __DML_VBA_DEBUG__
6249 dml2_printf("DML::%s: __DML2_CALCS_DCFCLK_FACTOR__ = %f\n", __func__, __DML2_CALCS_DCFCLK_FACTOR__);
6250 dml2_printf("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
6251 dml2_printf("DML::%s: ReturnBusWidth = %u\n", __func__, ReturnBusWidth);
6252 dml2_printf("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
6253 #endif
6254
6255 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
6256 *DCFClkDeepSleep = math_max2(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
6257 }
6258 dml2_printf("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
6259 }
6260
CalculateWriteBackDelay(enum dml2_source_format_class WritebackPixelFormat,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackVTaps,unsigned int WritebackDestinationWidth,unsigned int WritebackDestinationHeight,unsigned int WritebackSourceHeight,unsigned int HTotal)6261 static double CalculateWriteBackDelay(
6262 enum dml2_source_format_class WritebackPixelFormat,
6263 double WritebackHRatio,
6264 double WritebackVRatio,
6265 unsigned int WritebackVTaps,
6266 unsigned int WritebackDestinationWidth,
6267 unsigned int WritebackDestinationHeight,
6268 unsigned int WritebackSourceHeight,
6269 unsigned int HTotal)
6270 {
6271 double CalculateWriteBackDelay;
6272 double Line_length;
6273 double Output_lines_last_notclamped;
6274 double WritebackVInit;
6275
6276 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
6277 Line_length = math_max2((double)WritebackDestinationWidth, math_ceil2((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
6278 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - math_ceil2(((double)WritebackSourceHeight - (double)WritebackVInit) / (double)WritebackVRatio, 1.0);
6279 if (Output_lines_last_notclamped < 0) {
6280 CalculateWriteBackDelay = 0;
6281 } else {
6282 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
6283 }
6284 return CalculateWriteBackDelay;
6285 }
6286
CalculateMaxVStartup(bool ptoi_supported,unsigned int vblank_nom_default_us,const struct dml2_timing_cfg * timing,double write_back_delay_us)6287 static unsigned int CalculateMaxVStartup(
6288 bool ptoi_supported,
6289 unsigned int vblank_nom_default_us,
6290 const struct dml2_timing_cfg *timing,
6291 double write_back_delay_us)
6292 {
6293 unsigned int vblank_size = 0;
6294 unsigned int max_vstartup_lines = 0;
6295
6296 double line_time_us = (double)timing->h_total / ((double)timing->pixel_clock_khz / 1000);
6297 unsigned int vblank_actual = timing->v_total - timing->v_active;
6298 unsigned int vblank_nom_default_in_line = (unsigned int)math_floor2((double)vblank_nom_default_us / line_time_us, 1.0);
6299 unsigned int vblank_nom_input = (unsigned int)math_min2(timing->vblank_nom, vblank_nom_default_in_line);
6300 unsigned int vblank_avail = (vblank_nom_input == 0) ? vblank_nom_default_in_line : vblank_nom_input;
6301
6302 vblank_size = (unsigned int)math_min2(vblank_actual, vblank_avail);
6303
6304 if (timing->interlaced && !ptoi_supported)
6305 max_vstartup_lines = (unsigned int)(math_floor2(vblank_size / 2.0, 1.0));
6306 else
6307 max_vstartup_lines = vblank_size - (unsigned int)math_max2(1.0, math_ceil2(write_back_delay_us / line_time_us, 1.0));
6308 #ifdef __DML_VBA_DEBUG__
6309 dml2_printf("DML::%s: VBlankNom = %u\n", __func__, timing->vblank_nom);
6310 dml2_printf("DML::%s: vblank_nom_default_us = %u\n", __func__, vblank_nom_default_us);
6311 dml2_printf("DML::%s: line_time_us = %f\n", __func__, line_time_us);
6312 dml2_printf("DML::%s: vblank_actual = %u\n", __func__, vblank_actual);
6313 dml2_printf("DML::%s: vblank_avail = %u\n", __func__, vblank_avail);
6314 dml2_printf("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines);
6315 #endif
6316 return max_vstartup_lines;
6317 }
6318
CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params * p)6319 static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch *scratch,
6320 struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *p)
6321 {
6322 struct dml2_core_shared_CalculateSwathAndDETConfiguration_locals *l = &scratch->CalculateSwathAndDETConfiguration_locals;
6323 memset(l, 0, sizeof(struct dml2_core_shared_CalculateSwathAndDETConfiguration_locals));
6324
6325 #ifdef __DML_VBA_DEBUG__
6326 dml2_printf("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP);
6327 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6328 dml2_printf("DML::%s: DPPPerSurface[%u] = %u\n", __func__, k, p->DPPPerSurface[k]);
6329 }
6330 #endif
6331 CalculateSwathWidth(
6332 p->display_cfg,
6333 p->ForceSingleDPP,
6334 p->NumberOfActiveSurfaces,
6335 p->ODMMode,
6336 p->BytePerPixY,
6337 p->BytePerPixC,
6338 p->Read256BytesBlockHeightY,
6339 p->Read256BytesBlockHeightC,
6340 p->Read256BytesBlockWidthY,
6341 p->Read256BytesBlockWidthC,
6342 p->surf_linear128_l,
6343 p->surf_linear128_c,
6344 p->DPPPerSurface,
6345
6346 // Output
6347 p->req_per_swath_ub_l,
6348 p->req_per_swath_ub_c,
6349 l->SwathWidthSingleDPP,
6350 l->SwathWidthSingleDPPChroma,
6351 p->SwathWidth,
6352 p->SwathWidthChroma,
6353 l->MaximumSwathHeightY,
6354 l->MaximumSwathHeightC,
6355 p->swath_width_luma_ub,
6356 p->swath_width_chroma_ub);
6357
6358 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6359 p->full_swath_bytes_l[k] = (unsigned int)(p->swath_width_luma_ub[k] * p->BytePerPixDETY[k] * l->MaximumSwathHeightY[k]);
6360 p->full_swath_bytes_c[k] = (unsigned int)(p->swath_width_chroma_ub[k] * p->BytePerPixDETC[k] * l->MaximumSwathHeightC[k]);
6361 #ifdef __DML_VBA_DEBUG__
6362 dml2_printf("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]);
6363 dml2_printf("DML::%s: k=%u swath_width_luma_ub = %u\n", __func__, k, p->swath_width_luma_ub[k]);
6364 dml2_printf("DML::%s: k=%u BytePerPixDETY = %f\n", __func__, k, p->BytePerPixDETY[k]);
6365 dml2_printf("DML::%s: k=%u MaximumSwathHeightY = %u\n", __func__, k, l->MaximumSwathHeightY[k]);
6366 dml2_printf("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]);
6367 dml2_printf("DML::%s: k=%u swath_width_chroma_ub = %u\n", __func__, k, p->swath_width_chroma_ub[k]);
6368 dml2_printf("DML::%s: k=%u BytePerPixDETC = %f\n", __func__, k, p->BytePerPixDETC[k]);
6369 dml2_printf("DML::%s: k=%u MaximumSwathHeightC = %u\n", __func__, k, l->MaximumSwathHeightC[k]);
6370 dml2_printf("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]);
6371 #endif
6372 if (p->display_cfg->plane_descriptors[k].pixel_format == dml2_420_10) {
6373 p->full_swath_bytes_l[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_l[k], 256));
6374 p->full_swath_bytes_c[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_c[k], 256));
6375 }
6376 }
6377
6378 unsigned int TotalActiveDPP = 0;
6379 bool NoChromaOrLinear = true;
6380 unsigned int SurfaceDoingUnboundedRequest = 0;
6381
6382 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6383 TotalActiveDPP = TotalActiveDPP + (p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]);
6384 if (p->DPPPerSurface[k] > 0)
6385 SurfaceDoingUnboundedRequest = k;
6386 if (dml2_core_shared_is_420(p->display_cfg->plane_descriptors[k].pixel_format) || p->display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha
6387 || p->display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) {
6388 NoChromaOrLinear = false;
6389 }
6390 l->SwathTimeValueUs[k] = (unsigned int) ((double)l->MaximumSwathHeightY[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total
6391 / p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz * 1000);
6392 }
6393
6394 *p->UnboundedRequestEnabled = UnboundedRequest(p->display_cfg->overrides.hw.force_unbounded_requesting.enable, p->display_cfg->overrides.hw.force_unbounded_requesting.value, TotalActiveDPP, NoChromaOrLinear);
6395
6396 scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.display_cfg = p->display_cfg;
6397 scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.ForceSingleDPP = p->ForceSingleDPP;
6398 scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.NumberOfActiveSurfaces = p->NumberOfActiveSurfaces;
6399 scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.UnboundedRequestEnabled = *p->UnboundedRequestEnabled;
6400 scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.nomDETInKByte = p->nomDETInKByte;
6401 scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.MaxTotalDETInKByte = p->MaxTotalDETInKByte;
6402 scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.ConfigReturnBufferSizeInKByte = p->ConfigReturnBufferSizeInKByte;
6403 scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.MinCompressedBufferSizeInKByte = p->MinCompressedBufferSizeInKByte;
6404 scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.ConfigReturnBufferSegmentSizeInkByte = p->ConfigReturnBufferSegmentSizeInkByte;
6405 scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.CompressedBufferSegmentSizeInkByte = p->CompressedBufferSegmentSizeInkByte;
6406 scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.ReadBandwidthLuma = p->ReadBandwidthLuma;
6407 scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.ReadBandwidthChroma = p->ReadBandwidthChroma;
6408 scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.full_swath_bytes_l = p->full_swath_bytes_l;
6409 scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.full_swath_bytes_c = p->full_swath_bytes_c;
6410 scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.DPPPerSurface = p->DPPPerSurface;
6411 scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.DETBufferSizeInKByte = p->DETBufferSizeInKByte;
6412 scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.CompressedBufferSizeInkByte = p->CompressedBufferSizeInkByte;
6413 scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.swath_time_value_us = l->SwathTimeValueUs;
6414 scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.bestEffortMinActiveLatencyHidingUs = p->display_cfg->overrides.best_effort_min_active_latency_hiding_us;
6415 if (p->funcs->calculate_det_buffer_size) {
6416 p->funcs->calculate_det_buffer_size(&scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params);
6417 } else {
6418 CalculateDETBufferSize(&scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params);
6419 }
6420
6421 #ifdef __DML_VBA_DEBUG__
6422 dml2_printf("DML::%s: TotalActiveDPP = %u\n", __func__, TotalActiveDPP);
6423 dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte);
6424 dml2_printf("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte);
6425 dml2_printf("DML::%s: UnboundedRequestEnabled = %u\n", __func__, *p->UnboundedRequestEnabled);
6426 dml2_printf("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte);
6427 #endif
6428
6429 unsigned int DETBufferSizeInKByteForSwathCalculation;
6430 *p->ViewportSizeSupport = true;
6431 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6432
6433 DETBufferSizeInKByteForSwathCalculation = (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 1024 : p->DETBufferSizeInKByte[k]);
6434 #ifdef __DML_VBA_DEBUG__
6435 dml2_printf("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation = %u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation);
6436 #endif
6437
6438 if (p->full_swath_bytes_l[k] + p->full_swath_bytes_c[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
6439 p->SwathHeightY[k] = l->MaximumSwathHeightY[k];
6440 p->SwathHeightC[k] = l->MaximumSwathHeightC[k];
6441 l->RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k];
6442 l->RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k];
6443 p->request_size_bytes_luma[k] = 256;
6444 p->request_size_bytes_chroma[k] = 256;
6445
6446 } else if (p->full_swath_bytes_l[k] >= 1.5 * p->full_swath_bytes_c[k] && p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
6447 p->SwathHeightY[k] = l->MaximumSwathHeightY[k] / 2;
6448 p->SwathHeightC[k] = l->MaximumSwathHeightC[k];
6449 l->RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2;
6450 l->RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k];
6451 p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
6452 p->request_size_bytes_chroma[k] = 256;
6453
6454 } else if (p->full_swath_bytes_l[k] < 1.5 * p->full_swath_bytes_c[k] && p->full_swath_bytes_l[k] + p->full_swath_bytes_c[k] / 2 <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
6455 p->SwathHeightY[k] = l->MaximumSwathHeightY[k];
6456 p->SwathHeightC[k] = l->MaximumSwathHeightC[k] / 2;
6457 l->RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k];
6458 l->RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2;
6459 p->request_size_bytes_luma[k] = 256;
6460 p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
6461
6462 } else {
6463 p->SwathHeightY[k] = l->MaximumSwathHeightY[k] / 2;
6464 p->SwathHeightC[k] = l->MaximumSwathHeightC[k] / 2;
6465 l->RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2;
6466 l->RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2;
6467 p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
6468 p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
6469 }
6470
6471 if (p->SwathHeightC[k] == 0)
6472 p->request_size_bytes_chroma[k] = 0;
6473
6474 if ((p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] / 2 > DETBufferSizeInKByteForSwathCalculation * 1024 / 2) ||
6475 p->SwathWidth[k] > p->MaximumSwathWidthLuma[k] || (p->SwathHeightC[k] > 0 && p->SwathWidthChroma[k] > p->MaximumSwathWidthChroma[k])) {
6476 *p->ViewportSizeSupport = false;
6477 p->ViewportSizeSupportPerSurface[k] = false;
6478 } else {
6479 p->ViewportSizeSupportPerSurface[k] = true;
6480 }
6481
6482 if (p->SwathHeightC[k] == 0) {
6483 #ifdef __DML_VBA_DEBUG__
6484 dml2_printf("DML::%s: k=%u, All DET will be used for plane0\n", __func__, k);
6485 #endif
6486 p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024;
6487 p->DETBufferSizeC[k] = 0;
6488 } else if (l->RoundedUpSwathSizeBytesY[k] <= 1.5 * l->RoundedUpSwathSizeBytesC[k]) {
6489 #ifdef __DML_VBA_DEBUG__
6490 dml2_printf("DML::%s: k=%u, Half DET will be used for plane0, and half for plane1\n", __func__, k);
6491 #endif
6492 p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024 / 2;
6493 p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 / 2;
6494 } else {
6495 #ifdef __DML_VBA_DEBUG__
6496 dml2_printf("DML::%s: k=%u, 2/3 DET will be used for plane0, and 1/3 for plane1\n", __func__, k);
6497 #endif
6498 p->DETBufferSizeY[k] = (unsigned int)(math_floor2(p->DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024));
6499 p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 - p->DETBufferSizeY[k];
6500 }
6501
6502 #ifdef __DML_VBA_DEBUG__
6503 dml2_printf("DML::%s: k=%u SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
6504 dml2_printf("DML::%s: k=%u SwathHeightC = %u\n", __func__, k, p->SwathHeightC[k]);
6505 dml2_printf("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]);
6506 dml2_printf("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]);
6507 dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesY = %u\n", __func__, k, l->RoundedUpSwathSizeBytesY[k]);
6508 dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, l->RoundedUpSwathSizeBytesC[k]);
6509 dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]);
6510 dml2_printf("DML::%s: k=%u DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
6511 dml2_printf("DML::%s: k=%u DETBufferSizeC = %u\n", __func__, k, p->DETBufferSizeC[k]);
6512 dml2_printf("DML::%s: k=%u ViewportSizeSupportPerSurface = %u\n", __func__, k, p->ViewportSizeSupportPerSurface[k]);
6513 #endif
6514
6515 }
6516
6517 const long TTUFIFODEPTH = 8;
6518 const long MAXIMUMCOMPRESSION = 4;
6519 *p->compbuf_reserved_space_64b = 2 * p->pixel_chunk_size_kbytes * 1024 / 64;
6520 if (*p->UnboundedRequestEnabled) {
6521 *p->compbuf_reserved_space_64b = (unsigned int)math_ceil2(math_max2(*p->compbuf_reserved_space_64b,
6522 (double)(p->rob_buffer_size_kbytes * 1024 / 64) - (double)(l->RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / 64)), 1.0);
6523 #ifdef __DML_VBA_DEBUG__
6524 dml2_printf("DML::%s: RoundedUpSwathSizeBytesY[%d] = %u\n", __func__, SurfaceDoingUnboundedRequest, l->RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest]);
6525 dml2_printf("DML::%s: rob_buffer_size_kbytes = %u\n", __func__, p->rob_buffer_size_kbytes);
6526 #endif
6527 }
6528 #ifdef __DML_VBA_DEBUG__
6529 dml2_printf("DML::%s: compbuf_reserved_space_64b = %u\n", __func__, *p->compbuf_reserved_space_64b);
6530 #endif
6531
6532 *p->hw_debug5 = false;
6533 if (!p->mrq_present) {
6534 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6535 if (!(*p->UnboundedRequestEnabled)
6536 && p->display_cfg->plane_descriptors[k].surface.dcc.enable
6537 && ((p->rob_buffer_size_kbytes * 1024 + *p->CompressedBufferSizeInkByte * MAXIMUMCOMPRESSION * 1024) > TTUFIFODEPTH * (l->RoundedUpSwathSizeBytesY[k] + l->RoundedUpSwathSizeBytesC[k])))
6538 *p->hw_debug5 = true;
6539 #ifdef __DML_VBA_DEBUG__
6540 dml2_printf("DML::%s: k=%u UnboundedRequestEnabled = %u\n", __func__, k, *p->UnboundedRequestEnabled);
6541 dml2_printf("DML::%s: k=%u MAXIMUMCOMPRESSION = %lu\n", __func__, k, MAXIMUMCOMPRESSION);
6542 dml2_printf("DML::%s: k=%u TTUFIFODEPTH = %lu\n", __func__, k, TTUFIFODEPTH);
6543 dml2_printf("DML::%s: k=%u CompressedBufferSizeInkByte = %u\n", __func__, k, *p->CompressedBufferSizeInkByte);
6544 dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, l->RoundedUpSwathSizeBytesC[k]);
6545 dml2_printf("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5);
6546 #endif
6547 }
6548 }
6549 }
6550
CalculateODMMode(unsigned int MaximumPixelsPerLinePerDSCUnit,unsigned int HActive,enum dml2_output_encoder_class Output,enum dml2_odm_mode ODMUse,double MaxDispclk,bool DSCEnable,unsigned int TotalNumberOfActiveDPP,unsigned int MaxNumDPP,double PixelClock,bool * TotalAvailablePipesSupport,unsigned int * NumberOfDPP,enum dml2_odm_mode * ODMMode,double * RequiredDISPCLKPerSurface)6551 static void CalculateODMMode(
6552 unsigned int MaximumPixelsPerLinePerDSCUnit,
6553 unsigned int HActive,
6554 enum dml2_output_encoder_class Output,
6555 enum dml2_odm_mode ODMUse,
6556 double MaxDispclk,
6557 bool DSCEnable,
6558 unsigned int TotalNumberOfActiveDPP,
6559 unsigned int MaxNumDPP,
6560 double PixelClock,
6561
6562 // Output
6563 bool *TotalAvailablePipesSupport,
6564 unsigned int *NumberOfDPP,
6565 enum dml2_odm_mode *ODMMode,
6566 double *RequiredDISPCLKPerSurface)
6567 {
6568 double SurfaceRequiredDISPCLKWithoutODMCombine;
6569 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
6570 double SurfaceRequiredDISPCLKWithODMCombineThreeToOne;
6571 double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
6572
6573 SurfaceRequiredDISPCLKWithoutODMCombine = CalculateRequiredDispclk(dml2_odm_mode_bypass, PixelClock);
6574 SurfaceRequiredDISPCLKWithODMCombineTwoToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_2to1, PixelClock);
6575 SurfaceRequiredDISPCLKWithODMCombineThreeToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_3to1, PixelClock);
6576 SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_4to1, PixelClock);
6577 *TotalAvailablePipesSupport = true;
6578
6579 if (ODMUse == dml2_odm_mode_bypass || ODMUse == dml2_odm_mode_auto)
6580 *ODMMode = dml2_odm_mode_bypass;
6581 else if (ODMUse == dml2_odm_mode_combine_2to1)
6582 *ODMMode = dml2_odm_mode_combine_2to1;
6583 else if (ODMUse == dml2_odm_mode_combine_3to1)
6584 *ODMMode = dml2_odm_mode_combine_3to1;
6585 else if (ODMUse == dml2_odm_mode_combine_4to1)
6586 *ODMMode = dml2_odm_mode_combine_4to1;
6587 else if (ODMUse == dml2_odm_mode_split_1to2)
6588 *ODMMode = dml2_odm_mode_split_1to2;
6589 else if (ODMUse == dml2_odm_mode_mso_1to2)
6590 *ODMMode = dml2_odm_mode_mso_1to2;
6591 else if (ODMUse == dml2_odm_mode_mso_1to4)
6592 *ODMMode = dml2_odm_mode_mso_1to4;
6593
6594 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
6595 *NumberOfDPP = 0;
6596
6597 #ifdef __DML_VBA_DEBUG__
6598 dml2_printf("DML::%s: ODMUse = %d\n", __func__, ODMUse);
6599 dml2_printf("DML::%s: Output = %d\n", __func__, Output);
6600 dml2_printf("DML::%s: DSCEnable = %d\n", __func__, DSCEnable);
6601 dml2_printf("DML::%s: MaxDispclk = %f\n", __func__, MaxDispclk);
6602 dml2_printf("DML::%s: MaximumPixelsPerLinePerDSCUnit = %d\n", __func__, MaximumPixelsPerLinePerDSCUnit);
6603 dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithoutODMCombine = %f\n", __func__, SurfaceRequiredDISPCLKWithoutODMCombine);
6604 dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithODMCombineTwoToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineTwoToOne);
6605 dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithODMCombineThreeToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineThreeToOne);
6606 dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithODMCombineFourToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineFourToOne);
6607 #endif
6608
6609 if (ODMUse == dml2_odm_mode_combine_4to1 || (ODMUse == dml2_odm_mode_auto &&
6610 (SurfaceRequiredDISPCLKWithODMCombineThreeToOne > MaxDispclk || (DSCEnable && (HActive > 3 * MaximumPixelsPerLinePerDSCUnit))))) {
6611 if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
6612 *ODMMode = dml2_odm_mode_combine_4to1;
6613 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
6614 *NumberOfDPP = 4;
6615 } else {
6616 *TotalAvailablePipesSupport = false;
6617 }
6618 } else if (ODMUse == dml2_odm_mode_combine_3to1 || (ODMUse == dml2_odm_mode_auto &&
6619 ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > MaxDispclk && SurfaceRequiredDISPCLKWithODMCombineThreeToOne <= MaxDispclk) ||
6620 (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit))))) {
6621 if (TotalNumberOfActiveDPP + 3 <= MaxNumDPP) {
6622 *ODMMode = dml2_odm_mode_combine_3to1;
6623 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineThreeToOne;
6624 *NumberOfDPP = 3;
6625 } else {
6626 *TotalAvailablePipesSupport = false;
6627 }
6628
6629 } else if (ODMUse == dml2_odm_mode_combine_2to1 || (ODMUse == dml2_odm_mode_auto &&
6630 ((SurfaceRequiredDISPCLKWithoutODMCombine > MaxDispclk && SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= MaxDispclk) ||
6631 (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit))))) {
6632 if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
6633 *ODMMode = dml2_odm_mode_combine_2to1;
6634 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
6635 *NumberOfDPP = 2;
6636 } else {
6637 *TotalAvailablePipesSupport = false;
6638 }
6639
6640 } else {
6641 if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP) {
6642 *NumberOfDPP = 1;
6643 } else {
6644 *TotalAvailablePipesSupport = false;
6645 }
6646 }
6647 #ifdef __DML_VBA_DEBUG__
6648 dml2_printf("DML::%s: ODMMode = %d\n", __func__, *ODMMode);
6649 dml2_printf("DML::%s: NumberOfDPP = %d\n", __func__, *NumberOfDPP);
6650 dml2_printf("DML::%s: TotalAvailablePipesSupport = %d\n", __func__, *TotalAvailablePipesSupport);
6651 dml2_printf("DML::%s: RequiredDISPCLKPerSurface = %f\n", __func__, *RequiredDISPCLKPerSurface);
6652 #endif
6653
6654 }
6655
CalculateOutputLink(struct dml2_core_internal_scratch * s,double PHYCLK,double PHYCLKD18,double PHYCLKD32,double Downspreading,bool IsMainSurfaceUsingTheIndicatedTiming,enum dml2_output_encoder_class Output,enum dml2_output_format_class OutputFormat,unsigned int HTotal,unsigned int HActive,double PixelClockBackEnd,double ForcedOutputLinkBPP,unsigned int DSCInputBitPerComponent,unsigned int NumberOfDSCSlices,double AudioSampleRate,unsigned int AudioSampleLayout,enum dml2_odm_mode ODMModeNoDSC,enum dml2_odm_mode ODMModeDSC,enum dml2_dsc_enable_option DSCEnable,unsigned int OutputLinkDPLanes,enum dml2_output_link_dp_rate OutputLinkDPRate,bool * RequiresDSC,bool * RequiresFEC,double * OutBpp,enum dml2_core_internal_output_type * OutputType,enum dml2_core_internal_output_type_rate * OutputRate,unsigned int * RequiredSlots)6656 static void CalculateOutputLink(
6657 struct dml2_core_internal_scratch *s,
6658 double PHYCLK,
6659 double PHYCLKD18,
6660 double PHYCLKD32,
6661 double Downspreading,
6662 bool IsMainSurfaceUsingTheIndicatedTiming,
6663 enum dml2_output_encoder_class Output,
6664 enum dml2_output_format_class OutputFormat,
6665 unsigned int HTotal,
6666 unsigned int HActive,
6667 double PixelClockBackEnd,
6668 double ForcedOutputLinkBPP,
6669 unsigned int DSCInputBitPerComponent,
6670 unsigned int NumberOfDSCSlices,
6671 double AudioSampleRate,
6672 unsigned int AudioSampleLayout,
6673 enum dml2_odm_mode ODMModeNoDSC,
6674 enum dml2_odm_mode ODMModeDSC,
6675 enum dml2_dsc_enable_option DSCEnable,
6676 unsigned int OutputLinkDPLanes,
6677 enum dml2_output_link_dp_rate OutputLinkDPRate,
6678
6679 // Output
6680 bool *RequiresDSC,
6681 bool *RequiresFEC,
6682 double *OutBpp,
6683 enum dml2_core_internal_output_type *OutputType,
6684 enum dml2_core_internal_output_type_rate *OutputRate,
6685 unsigned int *RequiredSlots)
6686 {
6687 bool LinkDSCEnable;
6688 unsigned int dummy;
6689 *RequiresDSC = false;
6690 *RequiresFEC = false;
6691 *OutBpp = 0;
6692
6693 *OutputType = dml2_core_internal_output_type_unknown;
6694 *OutputRate = dml2_core_internal_output_rate_unknown;
6695
6696 #ifdef __DML_VBA_DEBUG__
6697 dml2_printf("DML::%s: DSCEnable = %u (dis, en, en_if_necessary)\n", __func__, DSCEnable);
6698 dml2_printf("DML::%s: IsMainSurfaceUsingTheIndicatedTiming = %u\n", __func__, IsMainSurfaceUsingTheIndicatedTiming);
6699 dml2_printf("DML::%s: PHYCLK = %f\n", __func__, PHYCLK);
6700 dml2_printf("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd);
6701 dml2_printf("DML::%s: AudioSampleRate = %f\n", __func__, AudioSampleRate);
6702 dml2_printf("DML::%s: HActive = %u\n", __func__, HActive);
6703 dml2_printf("DML::%s: HTotal = %u\n", __func__, HTotal);
6704 dml2_printf("DML::%s: ODMModeNoDSC = %u\n", __func__, ODMModeNoDSC);
6705 dml2_printf("DML::%s: ODMModeDSC = %u\n", __func__, ODMModeDSC);
6706 dml2_printf("DML::%s: ForcedOutputLinkBPP = %f\n", __func__, ForcedOutputLinkBPP);
6707 dml2_printf("DML::%s: Output (encoder) = %u\n", __func__, Output);
6708 dml2_printf("DML::%s: OutputLinkDPRate = %u\n", __func__, OutputLinkDPRate);
6709 #endif
6710 if (IsMainSurfaceUsingTheIndicatedTiming) {
6711 if (Output == dml2_hdmi) {
6712 *RequiresDSC = false;
6713 *RequiresFEC = false;
6714 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, math_min2(600, PHYCLK) * 10, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, false, Output,
6715 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
6716 //OutputTypeAndRate = "HDMI";
6717 *OutputType = dml2_core_internal_output_type_hdmi;
6718 } else if (Output == dml2_dp || Output == dml2_dp2p0 || Output == dml2_edp) {
6719 if (DSCEnable == dml2_dsc_enable) {
6720 *RequiresDSC = true;
6721 LinkDSCEnable = true;
6722 if (Output == dml2_dp || Output == dml2_dp2p0) {
6723 *RequiresFEC = true;
6724 } else {
6725 *RequiresFEC = false;
6726 }
6727 } else {
6728 *RequiresDSC = false;
6729 LinkDSCEnable = false;
6730 if (Output == dml2_dp2p0) {
6731 *RequiresFEC = true;
6732 } else {
6733 *RequiresFEC = false;
6734 }
6735 }
6736 if (Output == dml2_dp2p0) {
6737 *OutBpp = 0;
6738 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr10) && PHYCLKD32 >= 10000.0 / 32) {
6739 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
6740 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
6741 if (*OutBpp == 0 && PHYCLKD32 < 13500.0 / 32 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
6742 *RequiresDSC = true;
6743 LinkDSCEnable = true;
6744 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
6745 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
6746 }
6747 //OutputTypeAndRate = Output & " UHBR10";
6748 *OutputType = dml2_core_internal_output_type_dp2p0;
6749 *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr10;
6750 }
6751 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr13p5) && *OutBpp == 0 && PHYCLKD32 >= 13500.0 / 32) {
6752 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
6753 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
6754
6755 if (*OutBpp == 0 && PHYCLKD32 < 20000 / 32 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
6756 *RequiresDSC = true;
6757 LinkDSCEnable = true;
6758 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
6759 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
6760 }
6761 //OutputTypeAndRate = Output & " UHBR13p5";
6762 *OutputType = dml2_core_internal_output_type_dp2p0;
6763 *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr13p5;
6764 }
6765 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr20) && *OutBpp == 0 && PHYCLKD32 >= 20000 / 32) {
6766 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
6767 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
6768 if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
6769 *RequiresDSC = true;
6770 LinkDSCEnable = true;
6771 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
6772 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
6773 }
6774 //OutputTypeAndRate = Output & " UHBR20";
6775 *OutputType = dml2_core_internal_output_type_dp2p0;
6776 *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr20;
6777 }
6778 } else { // output is dp or edp
6779 *OutBpp = 0;
6780 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr) && PHYCLK >= 270) {
6781 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
6782 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
6783 if (*OutBpp == 0 && PHYCLK < 540 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
6784 *RequiresDSC = true;
6785 LinkDSCEnable = true;
6786 if (Output == dml2_dp) {
6787 *RequiresFEC = true;
6788 }
6789 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
6790 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
6791 }
6792 //OutputTypeAndRate = Output & " HBR";
6793 *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp;
6794 *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr;
6795 }
6796 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr2) && *OutBpp == 0 && PHYCLK >= 540) {
6797 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
6798 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
6799
6800 if (*OutBpp == 0 && PHYCLK < 810 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
6801 *RequiresDSC = true;
6802 LinkDSCEnable = true;
6803 if (Output == dml2_dp) {
6804 *RequiresFEC = true;
6805 }
6806 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
6807 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
6808 }
6809 //OutputTypeAndRate = Output & " HBR2";
6810 *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp;
6811 *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr2;
6812 }
6813 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr3) && *OutBpp == 0 && PHYCLK >= 810) { // VBA_ERROR, vba code doesn't have hbr3 check
6814 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
6815 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
6816
6817 if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
6818 *RequiresDSC = true;
6819 LinkDSCEnable = true;
6820 if (Output == dml2_dp) {
6821 *RequiresFEC = true;
6822 }
6823 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
6824 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
6825 }
6826 //OutputTypeAndRate = Output & " HBR3";
6827 *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp;
6828 *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr3;
6829 }
6830 }
6831 } else if (Output == dml2_hdmifrl) {
6832 if (DSCEnable == dml2_dsc_enable) {
6833 *RequiresDSC = true;
6834 LinkDSCEnable = true;
6835 *RequiresFEC = true;
6836 } else {
6837 *RequiresDSC = false;
6838 LinkDSCEnable = false;
6839 *RequiresFEC = false;
6840 }
6841 *OutBpp = 0;
6842 if (PHYCLKD18 >= 3000.0 / 18) {
6843 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 3000, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
6844 //OutputTypeAndRate = Output & "3x3";
6845 *OutputType = dml2_core_internal_output_type_hdmifrl;
6846 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_3x3;
6847 }
6848 if (*OutBpp == 0 && PHYCLKD18 >= 6000.0 / 18) {
6849 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 6000, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
6850 //OutputTypeAndRate = Output & "6x3";
6851 *OutputType = dml2_core_internal_output_type_hdmifrl;
6852 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_6x3;
6853 }
6854 if (*OutBpp == 0 && PHYCLKD18 >= 6000.0 / 18) {
6855 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 6000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
6856 //OutputTypeAndRate = Output & "6x4";
6857 *OutputType = dml2_core_internal_output_type_hdmifrl;
6858 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_6x4;
6859 }
6860 if (*OutBpp == 0 && PHYCLKD18 >= 8000.0 / 18) {
6861 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 8000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
6862 //OutputTypeAndRate = Output & "8x4";
6863 *OutputType = dml2_core_internal_output_type_hdmifrl;
6864 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_8x4;
6865 }
6866 if (*OutBpp == 0 && PHYCLKD18 >= 10000.0 / 18) {
6867 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 10000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
6868 if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0 && PHYCLKD18 < 12000.0 / 18) {
6869 *RequiresDSC = true;
6870 LinkDSCEnable = true;
6871 *RequiresFEC = true;
6872 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 10000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
6873 }
6874 //OutputTypeAndRate = Output & "10x4";
6875 *OutputType = dml2_core_internal_output_type_hdmifrl;
6876 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_10x4;
6877 }
6878 if (*OutBpp == 0 && PHYCLKD18 >= 12000.0 / 18) {
6879 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 12000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
6880 if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
6881 *RequiresDSC = true;
6882 LinkDSCEnable = true;
6883 *RequiresFEC = true;
6884 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 12000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
6885 }
6886 //OutputTypeAndRate = Output & "12x4";
6887 *OutputType = dml2_core_internal_output_type_hdmifrl;
6888 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_12x4;
6889 }
6890 }
6891 }
6892 #ifdef __DML_VBA_DEBUG__
6893 dml2_printf("DML::%s: RequiresDSC = %u\n", __func__, *RequiresDSC);
6894 dml2_printf("DML::%s: RequiresFEC = %u\n", __func__, *RequiresFEC);
6895 dml2_printf("DML::%s: OutBpp = %f\n", __func__, *OutBpp);
6896 #endif
6897 }
6898
CalculateWriteBackDISPCLK(enum dml2_source_format_class WritebackPixelFormat,double PixelClock,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackHTaps,unsigned int WritebackVTaps,unsigned int WritebackSourceWidth,unsigned int WritebackDestinationWidth,unsigned int HTotal,unsigned int WritebackLineBufferSize)6899 static double CalculateWriteBackDISPCLK(
6900 enum dml2_source_format_class WritebackPixelFormat,
6901 double PixelClock,
6902 double WritebackHRatio,
6903 double WritebackVRatio,
6904 unsigned int WritebackHTaps,
6905 unsigned int WritebackVTaps,
6906 unsigned int WritebackSourceWidth,
6907 unsigned int WritebackDestinationWidth,
6908 unsigned int HTotal,
6909 unsigned int WritebackLineBufferSize)
6910 {
6911 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
6912
6913 DISPCLK_H = PixelClock * math_ceil2((double)WritebackHTaps / 8.0, 1) / WritebackHRatio;
6914 DISPCLK_V = PixelClock * (WritebackVTaps * math_ceil2((double)WritebackDestinationWidth / 6.0, 1) + 8.0) / (double)HTotal;
6915 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / (double)WritebackSourceWidth;
6916 return math_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
6917 }
6918
RequiredDTBCLK(bool DSCEnable,double PixelClock,enum dml2_output_format_class OutputFormat,double OutputBpp,unsigned int DSCSlices,unsigned int HTotal,unsigned int HActive,unsigned int AudioRate,unsigned int AudioLayout)6919 static double RequiredDTBCLK(
6920 bool DSCEnable,
6921 double PixelClock,
6922 enum dml2_output_format_class OutputFormat,
6923 double OutputBpp,
6924 unsigned int DSCSlices,
6925 unsigned int HTotal,
6926 unsigned int HActive,
6927 unsigned int AudioRate,
6928 unsigned int AudioLayout)
6929 {
6930 if (DSCEnable != true) {
6931 return math_max2(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
6932 } else {
6933 double PixelWordRate = PixelClock / (OutputFormat == dml2_444 ? 1 : 2);
6934 double HCActive = math_ceil2(DSCSlices * math_ceil2(OutputBpp * math_ceil2(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
6935 double HCBlank = 64 + 32 * math_ceil2(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
6936 double AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
6937 double HActiveTribyteRate = PixelWordRate * HCActive / HActive;
6938 return math_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
6939 }
6940 }
6941
DSCDelayRequirement(bool DSCEnabled,enum dml2_odm_mode ODMMode,unsigned int DSCInputBitPerComponent,double OutputBpp,unsigned int HActive,unsigned int HTotal,unsigned int NumberOfDSCSlices,enum dml2_output_format_class OutputFormat,enum dml2_output_encoder_class Output,double PixelClock,double PixelClockBackEnd)6942 static unsigned int DSCDelayRequirement(
6943 bool DSCEnabled,
6944 enum dml2_odm_mode ODMMode,
6945 unsigned int DSCInputBitPerComponent,
6946 double OutputBpp,
6947 unsigned int HActive,
6948 unsigned int HTotal,
6949 unsigned int NumberOfDSCSlices,
6950 enum dml2_output_format_class OutputFormat,
6951 enum dml2_output_encoder_class Output,
6952 double PixelClock,
6953 double PixelClockBackEnd)
6954 {
6955 unsigned int DSCDelayRequirement_val = 0;
6956 unsigned int NumberOfDSCSlicesFactor = 1;
6957
6958 if (DSCEnabled == true && OutputBpp != 0) {
6959
6960 if (ODMMode == dml2_odm_mode_combine_4to1)
6961 NumberOfDSCSlicesFactor = 4;
6962 else if (ODMMode == dml2_odm_mode_combine_3to1)
6963 NumberOfDSCSlicesFactor = 3;
6964 else if (ODMMode == dml2_odm_mode_combine_2to1)
6965 NumberOfDSCSlicesFactor = 2;
6966
6967 DSCDelayRequirement_val = NumberOfDSCSlicesFactor * (dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (unsigned int)(math_ceil2((double)HActive / (double)NumberOfDSCSlices, 1.0)),
6968 (NumberOfDSCSlices / NumberOfDSCSlicesFactor), OutputFormat, Output) + dscComputeDelay(OutputFormat, Output));
6969
6970 DSCDelayRequirement_val = (unsigned int)(DSCDelayRequirement_val + (HTotal - HActive) * math_ceil2((double)DSCDelayRequirement_val / (double)HActive, 1.0));
6971 DSCDelayRequirement_val = (unsigned int)(DSCDelayRequirement_val * PixelClock / PixelClockBackEnd);
6972
6973 } else {
6974 DSCDelayRequirement_val = 0;
6975 }
6976 #ifdef __DML_VBA_DEBUG__
6977 dml2_printf("DML::%s: DSCEnabled= %u\n", __func__, DSCEnabled);
6978 dml2_printf("DML::%s: ODMMode = %u\n", __func__, ODMMode);
6979 dml2_printf("DML::%s: OutputBpp = %f\n", __func__, OutputBpp);
6980 dml2_printf("DML::%s: HActive = %u\n", __func__, HActive);
6981 dml2_printf("DML::%s: HTotal = %u\n", __func__, HTotal);
6982 dml2_printf("DML::%s: PixelClock = %f\n", __func__, PixelClock);
6983 dml2_printf("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd);
6984 dml2_printf("DML::%s: OutputFormat = %u\n", __func__, OutputFormat);
6985 dml2_printf("DML::%s: DSCInputBitPerComponent = %u\n", __func__, DSCInputBitPerComponent);
6986 dml2_printf("DML::%s: NumberOfDSCSlices = %u\n", __func__, NumberOfDSCSlices);
6987 dml2_printf("DML::%s: DSCDelayRequirement_val = %u\n", __func__, DSCDelayRequirement_val);
6988 #endif
6989
6990 return DSCDelayRequirement_val;
6991 }
6992
CalculateSurfaceSizeInMall(const struct dml2_display_cfg * display_cfg,unsigned int NumberOfActiveSurfaces,unsigned int MALLAllocatedForDCN,unsigned int BytesPerPixelY[],unsigned int BytesPerPixelC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int ReadBlockWidthY[],unsigned int ReadBlockWidthC[],unsigned int ReadBlockHeightY[],unsigned int ReadBlockHeightC[],unsigned int SurfaceSizeInMALL[],bool * ExceededMALLSize)6993 static void CalculateSurfaceSizeInMall(
6994 const struct dml2_display_cfg *display_cfg,
6995 unsigned int NumberOfActiveSurfaces,
6996 unsigned int MALLAllocatedForDCN,
6997 unsigned int BytesPerPixelY[],
6998 unsigned int BytesPerPixelC[],
6999 unsigned int Read256BytesBlockWidthY[],
7000 unsigned int Read256BytesBlockWidthC[],
7001 unsigned int Read256BytesBlockHeightY[],
7002 unsigned int Read256BytesBlockHeightC[],
7003 unsigned int ReadBlockWidthY[],
7004 unsigned int ReadBlockWidthC[],
7005 unsigned int ReadBlockHeightY[],
7006 unsigned int ReadBlockHeightC[],
7007
7008 // Output
7009 unsigned int SurfaceSizeInMALL[],
7010 bool *ExceededMALLSize)
7011 {
7012 unsigned int TotalSurfaceSizeInMALLForSS = 0;
7013 unsigned int TotalSurfaceSizeInMALLForSubVP = 0;
7014 unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024;
7015
7016 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
7017 const struct dml2_composition_cfg *composition = &display_cfg->plane_descriptors[k].composition;
7018 const struct dml2_surface_cfg *surface = &display_cfg->plane_descriptors[k].surface;
7019
7020 if (composition->viewport.stationary) {
7021 SurfaceSizeInMALL[k] = (unsigned int)(math_min2(math_ceil2((double)surface->plane0.width, ReadBlockWidthY[k]),
7022 math_floor2(composition->viewport.plane0.x_start + composition->viewport.plane0.width + ReadBlockWidthY[k] - 1, ReadBlockWidthY[k]) -
7023 math_floor2((double)composition->viewport.plane0.x_start, ReadBlockWidthY[k])) *
7024 math_min2(math_ceil2((double)surface->plane0.height, ReadBlockHeightY[k]),
7025 math_floor2((double)composition->viewport.plane0.y_start + composition->viewport.plane0.height + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
7026 math_floor2((double)composition->viewport.plane0.y_start, ReadBlockHeightY[k])) * BytesPerPixelY[k]);
7027
7028 if (ReadBlockWidthC[k] > 0) {
7029 SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] +
7030 math_min2(math_ceil2((double)surface->plane1.width, ReadBlockWidthC[k]),
7031 math_floor2((double)composition->viewport.plane1.y_start + composition->viewport.plane1.width + ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
7032 math_floor2((double)composition->viewport.plane1.y_start, ReadBlockWidthC[k])) *
7033 math_min2(math_ceil2((double)surface->plane1.height, ReadBlockHeightC[k]),
7034 math_floor2((double)composition->viewport.plane1.y_start + composition->viewport.plane1.height + ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
7035 math_floor2(composition->viewport.plane1.y_start, ReadBlockHeightC[k])) * BytesPerPixelC[k]);
7036 }
7037 if (surface->dcc.enable) {
7038 SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] +
7039 math_min2(math_ceil2(surface->plane0.width, 8 * Read256BytesBlockWidthY[k]),
7040 math_floor2(composition->viewport.plane0.x_start + composition->viewport.plane0.width + 8 * Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k]) -
7041 math_floor2(composition->viewport.plane0.x_start, 8 * Read256BytesBlockWidthY[k])) *
7042 math_min2(math_ceil2(surface->plane0.height, 8 * Read256BytesBlockHeightY[k]),
7043 math_floor2(composition->viewport.plane0.y_start + composition->viewport.plane0.height + 8 * Read256BytesBlockHeightY[k] - 1, 8 * Read256BytesBlockHeightY[k]) -
7044 math_floor2(composition->viewport.plane0.y_start, 8 * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256) + (64 * 1024);
7045 if (Read256BytesBlockWidthC[k] > 0) {
7046 SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] +
7047 math_min2(math_ceil2(surface->plane1.width, 8 * Read256BytesBlockWidthC[k]),
7048 math_floor2(composition->viewport.plane1.y_start + composition->viewport.plane1.width + 8 * Read256BytesBlockWidthC[k] - 1, 8 * Read256BytesBlockWidthC[k]) -
7049 math_floor2(composition->viewport.plane1.y_start, 8 * Read256BytesBlockWidthC[k])) *
7050 math_min2(math_ceil2(surface->plane1.height, 8 * Read256BytesBlockHeightC[k]),
7051 math_floor2(composition->viewport.plane1.y_start + composition->viewport.plane1.height + 8 * Read256BytesBlockHeightC[k] - 1, 8 * Read256BytesBlockHeightC[k]) -
7052 math_floor2(composition->viewport.plane1.y_start, 8 * Read256BytesBlockHeightC[k])) * BytesPerPixelC[k] / 256);
7053 }
7054 }
7055 } else {
7056 SurfaceSizeInMALL[k] = (unsigned int)(math_ceil2(math_min2(surface->plane0.width, composition->viewport.plane0.width + ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
7057 math_ceil2(math_min2(surface->plane0.height, composition->viewport.plane0.height + ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * BytesPerPixelY[k]);
7058 if (ReadBlockWidthC[k] > 0) {
7059 SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] +
7060 math_ceil2(math_min2(surface->plane1.width, composition->viewport.plane1.width + ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
7061 math_ceil2(math_min2(surface->plane1.height, composition->viewport.plane1.height + ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * BytesPerPixelC[k]);
7062 }
7063 if (surface->dcc.enable) {
7064 SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] +
7065 math_ceil2(math_min2(surface->plane0.width, composition->viewport.plane0.width + 8 * Read256BytesBlockWidthY[k] - 1), 8 * Read256BytesBlockWidthY[k]) *
7066 math_ceil2(math_min2(surface->plane0.height, composition->viewport.plane0.height + 8 * Read256BytesBlockHeightY[k] - 1), 8 * Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256) + (64 * 1024);
7067
7068 if (Read256BytesBlockWidthC[k] > 0) {
7069 SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] +
7070 math_ceil2(math_min2(surface->plane1.width, composition->viewport.plane1.width + 8 * Read256BytesBlockWidthC[k] - 1), 8 * Read256BytesBlockWidthC[k]) *
7071 math_ceil2(math_min2(surface->plane1.height, composition->viewport.plane1.height + 8 * Read256BytesBlockHeightC[k] - 1), 8 * Read256BytesBlockHeightC[k]) * BytesPerPixelC[k] / 256);
7072 }
7073 }
7074 }
7075 }
7076
7077 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
7078 /* SS and Subvp counted separate as they are never used at the same time */
7079 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))
7080 TotalSurfaceSizeInMALLForSubVP += SurfaceSizeInMALL[k];
7081 else if (display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable)
7082 TotalSurfaceSizeInMALLForSS += SurfaceSizeInMALL[k];
7083 }
7084
7085 *ExceededMALLSize = (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) ||
7086 (TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes);
7087
7088 #ifdef __DML_VBA_DEBUG__
7089 dml2_printf("DML::%s: MALLAllocatedForDCN = %u\n", __func__, MALLAllocatedForDCN * 1024 * 1024);
7090 dml2_printf("DML::%s: TotalSurfaceSizeInMALLForSubVP = %u\n", __func__, TotalSurfaceSizeInMALLForSubVP);
7091 dml2_printf("DML::%s: TotalSurfaceSizeInMALLForSS = %u\n", __func__, TotalSurfaceSizeInMALLForSS);
7092 dml2_printf("DML::%s: ExceededMALLSize = %u\n", __func__, *ExceededMALLSize);
7093 #endif
7094 }
7095
calculate_tdlut_setting(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_calculate_tdlut_setting_params * p)7096 static void calculate_tdlut_setting(
7097 struct dml2_core_internal_scratch *scratch,
7098 struct dml2_core_calcs_calculate_tdlut_setting_params *p)
7099 {
7100 if (!p->setup_for_tdlut) {
7101 *p->tdlut_groups_per_2row_ub = 0;
7102 *p->tdlut_opt_time = 0;
7103 *p->tdlut_drain_time = 0;
7104 *p->tdlut_bytes_per_group = 0;
7105 *p->tdlut_pte_bytes_per_frame = 0;
7106 *p->tdlut_bytes_per_frame = 0;
7107 return;
7108 }
7109
7110 // locals
7111 unsigned int tdlut_bpe = 8;
7112 unsigned int tdlut_width;
7113 unsigned int tdlut_pitch_bytes;
7114 unsigned int tdlut_footprint_bytes;
7115 unsigned int vmpg_bytes;
7116 unsigned int tdlut_vmpg_per_frame;
7117 unsigned int tdlut_pte_req_per_frame;
7118 unsigned int tdlut_bytes_per_line;
7119 unsigned int tdlut_delivery_cycles;
7120 double tdlut_drain_rate;
7121 unsigned int tdlut_mpc_width;
7122 unsigned int tdlut_bytes_per_group_simple;
7123
7124 if (p->tdlut_mpc_width_flag) {
7125 tdlut_mpc_width = 33;
7126 tdlut_bytes_per_group_simple = 39 * 256;
7127 } else {
7128 tdlut_mpc_width = 17;
7129 tdlut_bytes_per_group_simple = 10 * 256;
7130 }
7131
7132 vmpg_bytes = p->gpuvm_page_size_kbytes * 1024;
7133
7134 if (p->tdlut_addressing_mode == dml2_tdlut_simple_linear) {
7135 if (p->tdlut_width_mode == dml2_tdlut_width_17_cube)
7136 tdlut_width = 4916;
7137 else
7138 tdlut_width = 35940;
7139 } else {
7140 if (p->tdlut_width_mode == dml2_tdlut_width_17_cube)
7141 tdlut_width = 17;
7142 else // dml2_tdlut_width_33_cube
7143 tdlut_width = 33;
7144 }
7145
7146 if (p->is_gfx11)
7147 tdlut_pitch_bytes = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); //256B alignment
7148 else
7149 tdlut_pitch_bytes = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 128); //128B alignment
7150
7151 if (p->tdlut_addressing_mode == dml2_tdlut_sw_linear)
7152 tdlut_footprint_bytes = tdlut_pitch_bytes * tdlut_width * tdlut_width;
7153 else
7154 tdlut_footprint_bytes = tdlut_pitch_bytes;
7155
7156 if (!p->gpuvm_enable) {
7157 tdlut_vmpg_per_frame = 0;
7158 tdlut_pte_req_per_frame = 0;
7159 } else {
7160 tdlut_vmpg_per_frame = (unsigned int)math_ceil2(tdlut_footprint_bytes - 1, vmpg_bytes) / vmpg_bytes + 1;
7161 tdlut_pte_req_per_frame = (unsigned int)math_ceil2(tdlut_vmpg_per_frame - 1, 8) / 8 + 1;
7162 }
7163 tdlut_bytes_per_line = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 64); //64b request
7164 *p->tdlut_pte_bytes_per_frame = tdlut_pte_req_per_frame * 64;
7165
7166 if (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) {
7167 //the tdlut_width is either 17 or 33 but the 33x33x33 is subsampled every other line/slice
7168 *p->tdlut_bytes_per_frame = tdlut_bytes_per_line * tdlut_mpc_width * tdlut_mpc_width;
7169 *p->tdlut_bytes_per_group = tdlut_bytes_per_line * tdlut_mpc_width;
7170 //the delivery cycles is DispClk cycles per line * number of lines * number of slices
7171 tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_mpc_width / 2.0, 1) * tdlut_mpc_width * tdlut_mpc_width;
7172 tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / math_ceil2(tdlut_mpc_width/2.0, 1);
7173 } else {
7174 //tdlut_addressing_mode = tdlut_simple_linear, 3dlut width should be 4*1229=4916 elements
7175 *p->tdlut_bytes_per_frame = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256);
7176 *p->tdlut_bytes_per_group = tdlut_bytes_per_group_simple;
7177 tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_width / 2.0, 1);
7178 tdlut_drain_rate = 2 * tdlut_bpe * p->dispclk_mhz;
7179 }
7180
7181 //the tdlut is fetched during the 2 row times of prefetch.
7182 if (p->setup_for_tdlut) {
7183 *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2(*p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1);
7184 *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate;
7185 *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate;
7186 }
7187
7188 #ifdef __DML_VBA_DEBUG__
7189 dml2_printf("DML::%s: gpuvm_enable = %d\n", __func__, p->gpuvm_enable);
7190 dml2_printf("DML::%s: vmpg_bytes = %d\n", __func__, vmpg_bytes);
7191 dml2_printf("DML::%s: tdlut_vmpg_per_frame = %d\n", __func__, tdlut_vmpg_per_frame);
7192 dml2_printf("DML::%s: tdlut_pte_req_per_frame = %d\n", __func__, tdlut_pte_req_per_frame);
7193 dml2_printf("DML::%s: dispclk_mhz = %f\n", __func__, p->dispclk_mhz);
7194 dml2_printf("DML::%s: tdlut_width = %u\n", __func__, tdlut_width);
7195 dml2_printf("DML::%s: tdlut_addressing_mode = %u\n", __func__, p->tdlut_addressing_mode);
7196 dml2_printf("DML::%s: tdlut_pitch_bytes = %u\n", __func__, tdlut_pitch_bytes);
7197 dml2_printf("DML::%s: tdlut_footprint_bytes = %u\n", __func__, tdlut_footprint_bytes);
7198 dml2_printf("DML::%s: tdlut_bytes_per_frame = %u\n", __func__, *p->tdlut_bytes_per_frame);
7199 dml2_printf("DML::%s: tdlut_bytes_per_line = %u\n", __func__, tdlut_bytes_per_line);
7200 dml2_printf("DML::%s: tdlut_bytes_per_group = %u\n", __func__, *p->tdlut_bytes_per_group);
7201 dml2_printf("DML::%s: tdlut_drain_rate = %f\n", __func__, tdlut_drain_rate);
7202 dml2_printf("DML::%s: tdlut_delivery_cycles = %u\n", __func__, tdlut_delivery_cycles);
7203 dml2_printf("DML::%s: tdlut_opt_time = %f\n", __func__, *p->tdlut_opt_time);
7204 dml2_printf("DML::%s: tdlut_drain_time = %f\n", __func__, *p->tdlut_drain_time);
7205 dml2_printf("DML::%s: tdlut_groups_per_2row_ub = %d\n", __func__, *p->tdlut_groups_per_2row_ub);
7206 #endif
7207 }
7208
CalculateTarb(const struct dml2_display_cfg * display_cfg,unsigned int PixelChunkSizeInKByte,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],unsigned int dpte_group_bytes[],unsigned int tdlut_bytes_per_group[],double HostVMInefficiencyFactor,double HostVMInefficiencyFactorPrefetch,unsigned int HostVMMinPageSize,double ReturnBW,unsigned int MetaChunkSize,double * Tarb,double * Tarb_prefetch)7209 static void CalculateTarb(
7210 const struct dml2_display_cfg *display_cfg,
7211 unsigned int PixelChunkSizeInKByte,
7212 unsigned int NumberOfActiveSurfaces,
7213 unsigned int NumberOfDPP[],
7214 unsigned int dpte_group_bytes[],
7215 unsigned int tdlut_bytes_per_group[],
7216 double HostVMInefficiencyFactor,
7217 double HostVMInefficiencyFactorPrefetch,
7218 unsigned int HostVMMinPageSize,
7219 double ReturnBW,
7220 unsigned int MetaChunkSize,
7221
7222 // output
7223 double *Tarb,
7224 double *Tarb_prefetch)
7225 {
7226 double extra_bytes = 0;
7227 double extra_bytes_prefetch = 0;
7228 double HostVMDynamicLevels = CalculateHostVMDynamicLevels(display_cfg->gpuvm_enable, display_cfg->hostvm_enable, HostVMMinPageSize, display_cfg->hostvm_max_non_cached_page_table_levels);
7229
7230 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
7231 extra_bytes = extra_bytes + (NumberOfDPP[k] * PixelChunkSizeInKByte * 1024);
7232
7233 if (display_cfg->plane_descriptors[k].surface.dcc.enable)
7234 extra_bytes = extra_bytes + (MetaChunkSize * 1024);
7235
7236 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut)
7237 extra_bytes = extra_bytes + tdlut_bytes_per_group[k];
7238 }
7239
7240 extra_bytes_prefetch = extra_bytes;
7241
7242 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
7243 if (display_cfg->gpuvm_enable == true) {
7244 extra_bytes = extra_bytes + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
7245 extra_bytes_prefetch = extra_bytes_prefetch + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactorPrefetch;
7246 }
7247 }
7248 *Tarb = extra_bytes / ReturnBW;
7249 *Tarb_prefetch = extra_bytes_prefetch / ReturnBW;
7250 #ifdef __DML_VBA_DEBUG__
7251 dml2_printf("DML::%s: PixelChunkSizeInKByte = %d\n", __func__, PixelChunkSizeInKByte);
7252 dml2_printf("DML::%s: MetaChunkSize = %d\n", __func__, MetaChunkSize);
7253 dml2_printf("DML::%s: extra_bytes = %f\n", __func__, extra_bytes);
7254 dml2_printf("DML::%s: extra_bytes_prefetch = %f\n", __func__, extra_bytes_prefetch);
7255 #endif
7256 }
7257
CalculateTWait(long reserved_vblank_time_ns,double UrgentLatency,double Ttrip)7258 static double CalculateTWait(
7259 long reserved_vblank_time_ns,
7260 double UrgentLatency,
7261 double Ttrip)
7262 {
7263 double TWait;
7264 double t_urg_trip = math_max2(UrgentLatency, Ttrip);
7265 TWait = reserved_vblank_time_ns / 1000.0 + t_urg_trip;
7266
7267 #ifdef __DML_VBA_DEBUG__
7268 dml2_printf("DML::%s: reserved_vblank_time_ns = %d\n", __func__, reserved_vblank_time_ns);
7269 dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
7270 dml2_printf("DML::%s: Ttrip = %f\n", __func__, Ttrip);
7271 dml2_printf("DML::%s: TWait = %f\n", __func__, TWait);
7272 #endif
7273 return TWait;
7274 }
7275
7276
CalculateVUpdateAndDynamicMetadataParameters(unsigned int MaxInterDCNTileRepeaters,double Dppclk,double Dispclk,double DCFClkDeepSleep,double PixelClock,unsigned int HTotal,unsigned int VBlank,unsigned int DynamicMetadataTransmittedBytes,unsigned int DynamicMetadataLinesBeforeActiveRequired,unsigned int InterlaceEnable,bool ProgressiveToInterlaceUnitInOPP,double * TSetup,double * Tdmbf,double * Tdmec,double * Tdmsks,unsigned int * VUpdateOffsetPix,unsigned int * VUpdateWidthPix,unsigned int * VReadyOffsetPix)7277 static void CalculateVUpdateAndDynamicMetadataParameters(
7278 unsigned int MaxInterDCNTileRepeaters,
7279 double Dppclk,
7280 double Dispclk,
7281 double DCFClkDeepSleep,
7282 double PixelClock,
7283 unsigned int HTotal,
7284 unsigned int VBlank,
7285 unsigned int DynamicMetadataTransmittedBytes,
7286 unsigned int DynamicMetadataLinesBeforeActiveRequired,
7287 unsigned int InterlaceEnable,
7288 bool ProgressiveToInterlaceUnitInOPP,
7289
7290 // Output
7291 double *TSetup,
7292 double *Tdmbf,
7293 double *Tdmec,
7294 double *Tdmsks,
7295 unsigned int *VUpdateOffsetPix,
7296 unsigned int *VUpdateWidthPix,
7297 unsigned int *VReadyOffsetPix)
7298 {
7299 double TotalRepeaterDelayTime;
7300 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
7301 *VUpdateWidthPix = (unsigned int)(math_ceil2((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0));
7302 *VReadyOffsetPix = (unsigned int)(math_ceil2(math_max2(150.0 / Dppclk, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0));
7303 *VUpdateOffsetPix = (unsigned int)(math_ceil2(HTotal / 4.0, 1.0));
7304 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
7305 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
7306 *Tdmec = HTotal / PixelClock;
7307
7308 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
7309 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
7310 } else {
7311 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
7312 }
7313 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
7314 *Tdmsks = *Tdmsks / 2;
7315 }
7316 #ifdef __DML_VBA_DEBUG__
7317 dml2_printf("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired);
7318 dml2_printf("DML::%s: VBlank = %u\n", __func__, VBlank);
7319 dml2_printf("DML::%s: HTotal = %u\n", __func__, HTotal);
7320 dml2_printf("DML::%s: PixelClock = %f\n", __func__, PixelClock);
7321 dml2_printf("DML::%s: Dppclk = %f\n", __func__, Dppclk);
7322 dml2_printf("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep);
7323 dml2_printf("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters);
7324 dml2_printf("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime);
7325
7326 dml2_printf("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix);
7327 dml2_printf("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix);
7328 dml2_printf("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix);
7329
7330 dml2_printf("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
7331 #endif
7332 }
7333
get_urgent_bandwidth_required(struct dml2_core_shared_get_urgent_bandwidth_required_locals * l,const struct dml2_display_cfg * display_cfg,enum dml2_core_internal_soc_state_type state_type,enum dml2_core_internal_bw_type bw_type,bool inc_flip_bw,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],double dcc_dram_bw_nom_overhead_factor_p0[],double dcc_dram_bw_nom_overhead_factor_p1[],double dcc_dram_bw_pref_overhead_factor_p0[],double dcc_dram_bw_pref_overhead_factor_p1[],double mall_prefetch_sdp_overhead_factor[],double mall_prefetch_dram_overhead_factor[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double cursor_bw[],double dpte_row_bw[],double meta_row_bw[],double prefetch_cursor_bw[],double prefetch_vmrow_bw[],double flip_bw[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[])7334 static double get_urgent_bandwidth_required(
7335 struct dml2_core_shared_get_urgent_bandwidth_required_locals *l,
7336 const struct dml2_display_cfg *display_cfg,
7337 enum dml2_core_internal_soc_state_type state_type,
7338 enum dml2_core_internal_bw_type bw_type,
7339 bool inc_flip_bw, // including flip bw
7340 unsigned int NumberOfActiveSurfaces,
7341 unsigned int NumberOfDPP[],
7342 double dcc_dram_bw_nom_overhead_factor_p0[],
7343 double dcc_dram_bw_nom_overhead_factor_p1[],
7344 double dcc_dram_bw_pref_overhead_factor_p0[],
7345 double dcc_dram_bw_pref_overhead_factor_p1[],
7346 double mall_prefetch_sdp_overhead_factor[],
7347 double mall_prefetch_dram_overhead_factor[],
7348 double ReadBandwidthLuma[],
7349 double ReadBandwidthChroma[],
7350 double PrefetchBandwidthLuma[],
7351 double PrefetchBandwidthChroma[],
7352 double cursor_bw[],
7353 double dpte_row_bw[],
7354 double meta_row_bw[],
7355 double prefetch_cursor_bw[],
7356 double prefetch_vmrow_bw[],
7357 double flip_bw[],
7358 double UrgentBurstFactorLuma[],
7359 double UrgentBurstFactorChroma[],
7360 double UrgentBurstFactorCursor[],
7361 double UrgentBurstFactorLumaPre[],
7362 double UrgentBurstFactorChromaPre[],
7363 double UrgentBurstFactorCursorPre[])
7364 {
7365 memset(l, 0, sizeof(struct dml2_core_shared_get_urgent_bandwidth_required_locals));
7366
7367 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
7368 l->mall_svp_prefetch_factor = (state_type == dml2_core_internal_soc_state_svp_prefetch) ? (bw_type == dml2_core_internal_bw_dram ? mall_prefetch_dram_overhead_factor[k] : mall_prefetch_sdp_overhead_factor[k]) : 1.0;
7369 l->tmp_nom_adj_factor_p0 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p0[k] : 1.0) * l->mall_svp_prefetch_factor;
7370 l->tmp_nom_adj_factor_p1 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p1[k] : 1.0) * l->mall_svp_prefetch_factor;
7371 l->tmp_pref_adj_factor_p0 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_pref_overhead_factor_p0[k] : 1.0) * l->mall_svp_prefetch_factor;
7372 l->tmp_pref_adj_factor_p1 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_pref_overhead_factor_p1[k] : 1.0) * l->mall_svp_prefetch_factor;
7373
7374 l->adj_factor_p0 = UrgentBurstFactorLuma[k] * l->tmp_nom_adj_factor_p0;
7375 l->adj_factor_p1 = UrgentBurstFactorChroma[k] * l->tmp_nom_adj_factor_p1;
7376 l->adj_factor_cur = UrgentBurstFactorCursor[k];
7377 l->adj_factor_p0_pre = UrgentBurstFactorLumaPre[k] * l->tmp_pref_adj_factor_p0;
7378 l->adj_factor_p1_pre = UrgentBurstFactorChromaPre[k] * l->tmp_pref_adj_factor_p1;
7379 l->adj_factor_cur_pre = UrgentBurstFactorCursorPre[k];
7380
7381 // both dchub_urgent_bw_at_sdp_noflip and dchub_urgent_bw_at_dram_noflip don't include the phantom_pipe because iflips dont occur while phantom_pipe is active
7382 bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]);
7383 bool exclude_this_plane = 0;
7384
7385 // Exclude phantom pipe in bw calculation for non svp prefetch state
7386 if (state_type != dml2_core_internal_soc_state_svp_prefetch && is_phantom)
7387 exclude_this_plane = 1;
7388
7389 if (display_cfg->plane_descriptors[k].immediate_flip == false || !inc_flip_bw)
7390 l->per_plane_flip_bw[k] = NumberOfDPP[k] * (dpte_row_bw[k] + meta_row_bw[k]);
7391 else
7392 l->per_plane_flip_bw[k] = NumberOfDPP[k] * flip_bw[k];
7393
7394
7395 if (!exclude_this_plane) {
7396 l->required_bandwidth_mbps_this_surface = math_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
7397 l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur,
7398 l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre);
7399
7400 l->required_bandwidth_mbps = l->required_bandwidth_mbps + l->required_bandwidth_mbps_this_surface;
7401 }
7402
7403 #ifdef __DML_VBA_DEBUG__
7404 dml2_printf("DML::%s: k=%d, NumberOfDPP=%d\n", __func__, k, NumberOfDPP[k]);
7405 dml2_printf("DML::%s: k=%d, mall_svp_prefetch_factor=%f\n", __func__, k, l->mall_svp_prefetch_factor);
7406 dml2_printf("DML::%s: k=%d, adj_factor_p0=%f\n", __func__, k, l->adj_factor_p0);
7407 dml2_printf("DML::%s: k=%d, adj_factor_p1=%f\n", __func__, k, l->adj_factor_p1);
7408 dml2_printf("DML::%s: k=%d, adj_factor_cur=%f\n", __func__, k, l->adj_factor_cur);
7409
7410 dml2_printf("DML::%s: k=%d, adj_factor_p0_pre=%f\n", __func__, k, l->adj_factor_p0_pre);
7411 dml2_printf("DML::%s: k=%d, adj_factor_p1_pre=%f\n", __func__, k, l->adj_factor_p1_pre);
7412 dml2_printf("DML::%s: k=%d, adj_factor_cur_pre=%f\n", __func__, k, l->adj_factor_cur_pre);
7413
7414 dml2_printf("DML::%s: k=%d, per_plane_flip_bw=%f\n", __func__, k, l->per_plane_flip_bw[k]);
7415 dml2_printf("DML::%s: k=%d, prefetch_vmrow_bw=%f\n", __func__, k, prefetch_vmrow_bw[k]);
7416 dml2_printf("DML::%s: k=%d, ReadBandwidthLuma=%f\n", __func__, k, ReadBandwidthLuma[k]);
7417 dml2_printf("DML::%s: k=%d, ReadBandwidthChroma=%f\n", __func__, k, ReadBandwidthChroma[k]);
7418 dml2_printf("DML::%s: k=%d, cursor_bw=%f\n", __func__, k, cursor_bw[k]);
7419
7420 dml2_printf("DML::%s: k=%d, meta_row_bw=%f\n", __func__, k, meta_row_bw[k]);
7421 dml2_printf("DML::%s: k=%d, dpte_row_bw=%f\n", __func__, k, dpte_row_bw[k]);
7422 dml2_printf("DML::%s: k=%d, PrefetchBandwidthLuma=%f\n", __func__, k, PrefetchBandwidthLuma[k]);
7423 dml2_printf("DML::%s: k=%d, PrefetchBandwidthChroma=%f\n", __func__, k, PrefetchBandwidthChroma[k]);
7424 dml2_printf("DML::%s: k=%d, prefetch_cursor_bw=%f\n", __func__, k, prefetch_cursor_bw[k]);
7425 dml2_printf("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane);
7426 #endif
7427 }
7428
7429 return l->required_bandwidth_mbps;
7430 }
7431
CalculateExtraLatency(const struct dml2_display_cfg * display_cfg,unsigned int ROBBufferSizeInKByte,unsigned int RoundTripPingLatencyCycles,unsigned int ReorderingBytes,double DCFCLK,double FabricClock,unsigned int PixelChunkSizeInKByte,double ReturnBW,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],unsigned int dpte_group_bytes[],unsigned int tdlut_bytes_per_group[],double HostVMInefficiencyFactor,double HostVMInefficiencyFactorPrefetch,unsigned int HostVMMinPageSize,enum dml2_qos_param_type qos_type,bool max_oustanding_when_urgent_expected,unsigned int max_outstanding_requests,unsigned int request_size_bytes_luma[],unsigned int request_size_bytes_chroma[],unsigned int MetaChunkSize,unsigned int dchub_arb_to_ret_delay,double Ttrip,unsigned int hostvm_mode,double * ExtraLatency,double * ExtraLatency_sr,double * ExtraLatencyPrefetch)7432 static void CalculateExtraLatency(
7433 const struct dml2_display_cfg *display_cfg,
7434 unsigned int ROBBufferSizeInKByte,
7435 unsigned int RoundTripPingLatencyCycles,
7436 unsigned int ReorderingBytes,
7437 double DCFCLK,
7438 double FabricClock,
7439 unsigned int PixelChunkSizeInKByte,
7440 double ReturnBW,
7441 unsigned int NumberOfActiveSurfaces,
7442 unsigned int NumberOfDPP[],
7443 unsigned int dpte_group_bytes[],
7444 unsigned int tdlut_bytes_per_group[],
7445 double HostVMInefficiencyFactor,
7446 double HostVMInefficiencyFactorPrefetch,
7447 unsigned int HostVMMinPageSize,
7448 enum dml2_qos_param_type qos_type,
7449 bool max_oustanding_when_urgent_expected,
7450 unsigned int max_outstanding_requests,
7451 unsigned int request_size_bytes_luma[],
7452 unsigned int request_size_bytes_chroma[],
7453 unsigned int MetaChunkSize,
7454 unsigned int dchub_arb_to_ret_delay,
7455 double Ttrip,
7456 unsigned int hostvm_mode,
7457
7458 // output
7459 double *ExtraLatency,
7460 double *ExtraLatency_sr,
7461 double *ExtraLatencyPrefetch)
7462 {
7463 double Tarb;
7464 double Tarb_prefetch;
7465
7466 CalculateTarb(
7467 display_cfg,
7468 PixelChunkSizeInKByte,
7469 NumberOfActiveSurfaces,
7470 NumberOfDPP,
7471 dpte_group_bytes,
7472 tdlut_bytes_per_group,
7473 HostVMInefficiencyFactor,
7474 HostVMInefficiencyFactorPrefetch,
7475 HostVMMinPageSize,
7476 ReturnBW,
7477 MetaChunkSize,
7478 // output
7479 &Tarb,
7480 &Tarb_prefetch);
7481
7482 unsigned int max_request_size_bytes = 0;
7483 double Tex_trips = (display_cfg->hostvm_enable && hostvm_mode == 1) ? (2.0 * Ttrip) : 0.0;
7484
7485 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
7486 if (request_size_bytes_luma[k] > max_request_size_bytes)
7487 max_request_size_bytes = request_size_bytes_luma[k];
7488 if (request_size_bytes_chroma[k] > max_request_size_bytes)
7489 max_request_size_bytes = request_size_bytes_chroma[k];
7490 }
7491
7492 if (qos_type == dml2_qos_param_type_dcn4x) {
7493 *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK;
7494 *ExtraLatency = *ExtraLatency_sr;
7495 if (max_oustanding_when_urgent_expected)
7496 *ExtraLatency = *ExtraLatency + (ROBBufferSizeInKByte * 1024 - max_outstanding_requests * max_request_size_bytes) / ReturnBW;
7497 } else {
7498 *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK + RoundTripPingLatencyCycles / FabricClock + ReorderingBytes / ReturnBW;
7499 *ExtraLatency = *ExtraLatency_sr;
7500 }
7501 *ExtraLatency = *ExtraLatency + Tex_trips;
7502 *ExtraLatencyPrefetch = *ExtraLatency + Tarb_prefetch;
7503 *ExtraLatency = *ExtraLatency + Tarb;
7504 *ExtraLatency_sr = *ExtraLatency_sr + Tarb;
7505
7506 #ifdef __DML_VBA_DEBUG__
7507 dml2_printf("DML::%s: qos_type=%u\n", __func__, qos_type);
7508 dml2_printf("DML::%s: hostvm_mode=%u\n", __func__, hostvm_mode);
7509 dml2_printf("DML::%s: Tex_trips=%u\n", __func__, Tex_trips);
7510 dml2_printf("DML::%s: max_oustanding_when_urgent_expected=%u\n", __func__, max_oustanding_when_urgent_expected);
7511 dml2_printf("DML::%s: FabricClock=%f\n", __func__, FabricClock);
7512 dml2_printf("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
7513 dml2_printf("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
7514 dml2_printf("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles);
7515 dml2_printf("DML::%s: ReorderingBytes=%u\n", __func__, ReorderingBytes);
7516 dml2_printf("DML::%s: Tarb=%f\n", __func__, Tarb);
7517 dml2_printf("DML::%s: ExtraLatency=%f\n", __func__, *ExtraLatency);
7518 dml2_printf("DML::%s: ExtraLatency_sr=%f\n", __func__, *ExtraLatency_sr);
7519 dml2_printf("DML::%s: ExtraLatencyPrefetch=%f\n", __func__, *ExtraLatencyPrefetch);
7520 #endif
7521 }
7522
CalculatePrefetchSchedule(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculatePrefetchSchedule_params * p)7523 static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_CalculatePrefetchSchedule_params *p)
7524 {
7525 struct dml2_core_calcs_CalculatePrefetchSchedule_locals *s = &scratch->CalculatePrefetchSchedule_locals;
7526
7527 s->NoTimeToPrefetch = false;
7528 s->DPPCycles = 0;
7529 s->DISPCLKCycles = 0;
7530 s->DSTTotalPixelsAfterScaler = 0.0;
7531 s->LineTime = 0.0;
7532 s->dst_y_prefetch_equ = 0.0;
7533 s->prefetch_bw_oto = 0.0;
7534 s->Tvm_oto = 0.0;
7535 s->Tr0_oto = 0.0;
7536 s->Tvm_oto_lines = 0.0;
7537 s->Tr0_oto_lines = 0.0;
7538 s->dst_y_prefetch_oto = 0.0;
7539 s->TimeForFetchingVM = 0.0;
7540 s->TimeForFetchingRowInVBlank = 0.0;
7541 s->LinesToRequestPrefetchPixelData = 0.0;
7542 s->HostVMDynamicLevelsTrips = 0;
7543 s->trip_to_mem = 0.0;
7544 *p->Tvm_trips = 0.0;
7545 *p->Tr0_trips = 0.0;
7546 s->Tvm_trips_rounded = 0.0;
7547 s->Tr0_trips_rounded = 0.0;
7548 s->max_Tsw = 0.0;
7549 s->Lsw_oto = 0.0;
7550 s->Tpre_rounded = 0.0;
7551 s->prefetch_bw_equ = 0.0;
7552 s->Tvm_equ = 0.0;
7553 s->Tr0_equ = 0.0;
7554 s->Tdmbf = 0.0;
7555 s->Tdmec = 0.0;
7556 s->Tdmsks = 0.0;
7557 s->prefetch_sw_bytes = 0.0;
7558 s->prefetch_bw_pr = 0.0;
7559 s->bytes_pp = 0.0;
7560 s->dep_bytes = 0.0;
7561 s->min_Lsw_oto = 0.0;
7562 s->Tsw_est1 = 0.0;
7563 s->Tsw_est3 = 0.0;
7564 s->cursor_prefetch_bytes = 0;
7565 *p->prefetch_cursor_bw = 0;
7566 bool dcc_mrq_enable = (p->dcc_enable && p->mrq_present);
7567
7568 s->TWait_p = p->TWait - p->Ttrip; // TWait includes max(Turg, Ttrip)
7569
7570 if (p->display_cfg->gpuvm_enable == true && p->display_cfg->hostvm_enable == true) {
7571 s->HostVMDynamicLevelsTrips = p->display_cfg->hostvm_max_non_cached_page_table_levels;
7572 } else {
7573 s->HostVMDynamicLevelsTrips = 0;
7574 }
7575 #ifdef __DML_VBA_DEBUG__
7576 dml2_printf("DML::%s: dcc_enable = %u\n", __func__, p->dcc_enable);
7577 dml2_printf("DML::%s: mrq_present = %u\n", __func__, p->mrq_present);
7578 dml2_printf("DML::%s: dcc_mrq_enable = %u\n", __func__, dcc_mrq_enable);
7579 dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, p->display_cfg->gpuvm_enable);
7580 dml2_printf("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels);
7581 dml2_printf("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable);
7582 dml2_printf("DML::%s: VStartup = %u\n", __func__, p->VStartup);
7583 dml2_printf("DML::%s: MaxVStartup = %u\n", __func__, p->MaxVStartup);
7584 dml2_printf("DML::%s: HostVMEnable = %u\n", __func__, p->display_cfg->hostvm_enable);
7585 dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
7586 dml2_printf("DML::%s: TWait = %f\n", __func__, p->TWait);
7587 dml2_printf("DML::%s: TWait_p = %f\n", __func__, s->TWait_p);
7588 dml2_printf("DML::%s: Ttrip = %f\n", __func__, p->Ttrip);
7589 dml2_printf("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk);
7590 dml2_printf("DML::%s: myPipe->Dispclk = %f\n", __func__, p->myPipe->Dispclk);
7591 #endif
7592 CalculateVUpdateAndDynamicMetadataParameters(
7593 p->MaxInterDCNTileRepeaters,
7594 p->myPipe->Dppclk,
7595 p->myPipe->Dispclk,
7596 p->myPipe->DCFClkDeepSleep,
7597 p->myPipe->PixelClock,
7598 p->myPipe->HTotal,
7599 p->myPipe->VBlank,
7600 p->DynamicMetadataTransmittedBytes,
7601 p->DynamicMetadataLinesBeforeActiveRequired,
7602 p->myPipe->InterlaceEnable,
7603 p->myPipe->ProgressiveToInterlaceUnitInOPP,
7604 p->TSetup,
7605
7606 // Output
7607 &s->Tdmbf,
7608 &s->Tdmec,
7609 &s->Tdmsks,
7610 p->VUpdateOffsetPix,
7611 p->VUpdateWidthPix,
7612 p->VReadyOffsetPix);
7613
7614 s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock;
7615 s->trip_to_mem = p->Ttrip;
7616 *p->Tvm_trips = p->ExtraLatencyPrefetch + s->trip_to_mem * (p->display_cfg->gpuvm_max_page_table_levels * (s->HostVMDynamicLevelsTrips + 1));
7617 if (dcc_mrq_enable)
7618 *p->Tvm_trips_flip = *p->Tvm_trips;
7619 else
7620 *p->Tvm_trips_flip = *p->Tvm_trips - s->trip_to_mem;
7621 *p->Tr0_trips_flip = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1);
7622 *p->Tr0_trips = math_max2(*p->Tr0_trips_flip, p->tdlut_opt_time / 2);
7623
7624 if (p->DynamicMetadataVMEnabled == true) {
7625 *p->Tdmdl_vm = s->TWait_p + *p->Tvm_trips;
7626 *p->Tdmdl = *p->Tdmdl_vm + p->Ttrip;
7627 } else {
7628 *p->Tdmdl_vm = 0;
7629 *p->Tdmdl = p->TWait + p->ExtraLatencyPrefetch; // Tex
7630 }
7631
7632 if (p->DynamicMetadataEnable == true) {
7633 if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) {
7634 *p->NotEnoughTimeForDynamicMetadata = true;
7635 dml2_printf("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
7636 dml2_printf("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
7637 dml2_printf("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
7638 dml2_printf("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
7639 dml2_printf("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
7640 } else {
7641 *p->NotEnoughTimeForDynamicMetadata = false;
7642 }
7643 } else {
7644 *p->NotEnoughTimeForDynamicMetadata = false;
7645 }
7646
7647 if (p->myPipe->ScalerEnabled)
7648 s->DPPCycles = (unsigned int)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCL);
7649 else
7650 s->DPPCycles = (unsigned int)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCLLBOnly);
7651
7652 s->DPPCycles = (unsigned int)(s->DPPCycles + p->myPipe->NumberOfCursors * p->DPPCLKDelayCNVCCursor);
7653
7654 s->DISPCLKCycles = (unsigned int)p->DISPCLKDelaySubtotal;
7655
7656 if (p->myPipe->Dppclk == 0.0 || p->myPipe->Dispclk == 0.0)
7657 return true;
7658
7659 *p->DSTXAfterScaler = (unsigned int)math_round(s->DPPCycles * p->myPipe->PixelClock / p->myPipe->Dppclk + s->DISPCLKCycles * p->myPipe->PixelClock / p->myPipe->Dispclk + p->DSCDelay);
7660 *p->DSTXAfterScaler = (unsigned int)math_round(*p->DSTXAfterScaler + (p->myPipe->ODMMode != dml2_odm_mode_bypass ? 18 : 0) + (p->myPipe->DPPPerSurface - 1) * p->DPP_RECOUT_WIDTH +
7661 ((p->myPipe->ODMMode == dml2_odm_mode_split_1to2 || p->myPipe->ODMMode == dml2_odm_mode_mso_1to2) ? (double)p->myPipe->HActive / 2.0 : 0) +
7662 ((p->myPipe->ODMMode == dml2_odm_mode_mso_1to4) ? (double)p->myPipe->HActive * 3.0 / 4.0 : 0));
7663
7664 #ifdef __DML_VBA_DEBUG__
7665 dml2_printf("DML::%s: DynamicMetadataVMEnabled = %u\n", __func__, p->DynamicMetadataVMEnabled);
7666 dml2_printf("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles);
7667 dml2_printf("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock);
7668 dml2_printf("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk);
7669 dml2_printf("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles);
7670 dml2_printf("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk);
7671 dml2_printf("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay);
7672 dml2_printf("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode);
7673 dml2_printf("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH);
7674 dml2_printf("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler);
7675
7676 dml2_printf("DML::%s: setup_for_tdlut = %u\n", __func__, p->setup_for_tdlut);
7677 dml2_printf("DML::%s: tdlut_opt_time = %f\n", __func__, p->tdlut_opt_time);
7678 dml2_printf("DML::%s: tdlut_pte_bytes_per_frame = %u\n", __func__, p->tdlut_pte_bytes_per_frame);
7679 #endif
7680
7681 if (p->OutputFormat == dml2_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP))
7682 *p->DSTYAfterScaler = 1;
7683 else
7684 *p->DSTYAfterScaler = 0;
7685
7686 s->DSTTotalPixelsAfterScaler = *p->DSTYAfterScaler * p->myPipe->HTotal + *p->DSTXAfterScaler;
7687 *p->DSTYAfterScaler = (unsigned int)(math_floor2(s->DSTTotalPixelsAfterScaler / p->myPipe->HTotal, 1));
7688 *p->DSTXAfterScaler = (unsigned int)(s->DSTTotalPixelsAfterScaler - ((double)(*p->DSTYAfterScaler * p->myPipe->HTotal)));
7689 #ifdef __DML_VBA_DEBUG__
7690 dml2_printf("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler);
7691 dml2_printf("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler);
7692 #endif
7693
7694 s->NoTimeToPrefetch = false;
7695 #ifdef __DML_VBA_DEBUG__
7696 dml2_printf("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips);
7697 dml2_printf("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips);
7698 dml2_printf("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem);
7699 dml2_printf("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch);
7700 dml2_printf("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels);
7701 dml2_printf("DML::%s: HostVMDynamicLevelsTrips = %u\n", __func__, s->HostVMDynamicLevelsTrips);
7702 #endif
7703 if (p->display_cfg->gpuvm_enable) {
7704 s->Tvm_trips_rounded = math_ceil2(4.0 * *p->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
7705 *p->Tvm_trips_flip_rounded = math_ceil2(4.0 * *p->Tvm_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime;
7706 } else {
7707 s->Tvm_trips_rounded = s->LineTime / 4.0;
7708 *p->Tvm_trips_flip_rounded = s->LineTime / 4.0;
7709 }
7710 s->Tvm_trips_rounded = math_max2(s->Tvm_trips_rounded, s->LineTime / 4.0);
7711 *p->Tvm_trips_flip_rounded = math_max2(*p->Tvm_trips_flip_rounded, s->LineTime / 4.0);
7712
7713 if (p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable) {
7714 s->Tr0_trips_rounded = math_ceil2(4.0 * *p->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
7715 *p->Tr0_trips_flip_rounded = math_ceil2(4.0 * *p->Tr0_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime;
7716 } else {
7717 s->Tr0_trips_rounded = s->LineTime / 4.0;
7718 *p->Tr0_trips_flip_rounded = s->LineTime / 4.0;
7719 }
7720 s->Tr0_trips_rounded = math_max2(s->Tr0_trips_rounded, s->LineTime / 4.0);
7721 *p->Tr0_trips_flip_rounded = math_max2(*p->Tr0_trips_flip_rounded, s->LineTime / 4.0);
7722
7723 *p->Tno_bw_flip = 0;
7724 if (p->display_cfg->gpuvm_enable == true) {
7725 if (p->display_cfg->gpuvm_max_page_table_levels >= 3) {
7726 *p->Tno_bw = p->ExtraLatencyPrefetch + s->trip_to_mem * (double)((p->display_cfg->gpuvm_max_page_table_levels - 2) * (s->HostVMDynamicLevelsTrips + 1));
7727 } else if (p->display_cfg->gpuvm_max_page_table_levels == 1 && !dcc_mrq_enable && !p->setup_for_tdlut) {
7728 *p->Tno_bw = p->ExtraLatencyPrefetch;
7729 } else {
7730 *p->Tno_bw = 0;
7731 }
7732 *p->Tno_bw_flip = *p->Tno_bw;
7733 } else {
7734 *p->Tno_bw = 0;
7735 }
7736
7737 if (dml2_core_shared_is_420(p->myPipe->SourcePixelFormat)) {
7738 s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4.0;
7739 } else {
7740 s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC;
7741 }
7742
7743 s->prefetch_bw_pr = s->bytes_pp * p->myPipe->PixelClock / (double)p->myPipe->DPPPerSurface;
7744 if (p->myPipe->VRatio < 1.0)
7745 s->prefetch_bw_pr = p->myPipe->VRatio * s->prefetch_bw_pr;
7746 s->max_Tsw = (math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) * s->LineTime);
7747
7748 s->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC;
7749 s->prefetch_bw_pr = s->prefetch_bw_pr * p->mall_prefetch_sdp_overhead_factor;
7750 s->prefetch_sw_bytes = s->prefetch_sw_bytes * p->mall_prefetch_sdp_overhead_factor;
7751 s->prefetch_bw_oto = math_max2(s->prefetch_bw_pr, s->prefetch_sw_bytes / s->max_Tsw);
7752
7753 s->min_Lsw_oto = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_OTO__;
7754 s->min_Lsw_oto = math_max2(s->min_Lsw_oto, 2.0);
7755 s->min_Lsw_oto = math_max2(s->min_Lsw_oto, p->tdlut_drain_time / s->LineTime);
7756
7757 unsigned int vm_bytes = p->vm_bytes; // vm_bytes is dpde0_bytes_per_frame_ub_l + dpde0_bytes_per_frame_ub_c + 2*extra_dpde_bytes;
7758 unsigned int extra_tdpe_bytes = (unsigned int)math_max2(0, (p->display_cfg->gpuvm_max_page_table_levels - 1) * 128);
7759
7760 if (p->setup_for_tdlut)
7761 vm_bytes = vm_bytes + p->tdlut_pte_bytes_per_frame + (p->display_cfg->gpuvm_enable ? extra_tdpe_bytes : 0);
7762
7763 unsigned long tdlut_row_bytes = (unsigned long) math_ceil2(p->tdlut_bytes_per_frame/2.0, 1.0);
7764 s->prefetch_bw_oto = math_max3(s->prefetch_bw_oto,
7765 p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw,
7766 (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime));
7767 s->Lsw_oto = math_ceil2(4.0 * math_max2(s->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, s->min_Lsw_oto), 1.0) / 4.0;
7768
7769 if (p->display_cfg->gpuvm_enable == true) {
7770 s->Tvm_oto = math_max3(
7771 *p->Tvm_trips,
7772 *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto,
7773 s->LineTime / 4.0);
7774
7775 #ifdef __DML_VBA_DEBUG__
7776 dml2_printf("DML::%s: Tvm_oto max0 = %f\n", __func__, *p->Tvm_trips);
7777 dml2_printf("DML::%s: Tvm_oto max1 = %f\n", __func__, *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto);
7778 dml2_printf("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4);
7779 #endif
7780
7781 } else
7782 s->Tvm_oto = s->LineTime / 4.0;
7783
7784 if ((p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable)) {
7785 s->Tr0_oto = math_max3(
7786 *p->Tr0_trips,
7787 (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto,
7788 s->LineTime / 4.0);
7789 #ifdef __DML_VBA_DEBUG__
7790 dml2_printf("DML::%s: Tr0_oto max0 = %f\n", __func__, *p->Tr0_trips);
7791 dml2_printf("DML::%s: Tr0_oto max1 = %f\n", __func__, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto);
7792 dml2_printf("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime / 4);
7793 #endif
7794 } else
7795 s->Tr0_oto = (s->LineTime - s->Tvm_oto) / 4.0;
7796
7797 s->Tvm_oto_lines = math_ceil2(4.0 * s->Tvm_oto / s->LineTime, 1) / 4.0;
7798 s->Tr0_oto_lines = math_ceil2(4.0 * s->Tr0_oto / s->LineTime, 1) / 4.0;
7799 s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto;
7800
7801 //To (time for delay after scaler) in line time
7802 unsigned int Lo = (unsigned int)(*p->DSTYAfterScaler + (double)*p->DSTXAfterScaler / (double)p->myPipe->HTotal);
7803
7804 //Tpre_equ in line time
7805 s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(s->TWait_p + p->TCalc, *p->Tdmdl - p->Ttrip)) / s->LineTime - Lo;
7806 s->dst_y_prefetch_equ = math_min2(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
7807
7808 #ifdef __DML_VBA_DEBUG__
7809 dml2_printf("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal);
7810 dml2_printf("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto);
7811 dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw);
7812 dml2_printf("DML::%s: Tno_bw_flip = %f\n", __func__, *p->Tno_bw_flip);
7813 dml2_printf("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch);
7814 dml2_printf("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem);
7815 dml2_printf("DML::%s: mall_prefetch_sdp_overhead_factor = %f\n", __func__, p->mall_prefetch_sdp_overhead_factor);
7816 dml2_printf("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
7817 dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
7818 dml2_printf("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
7819 dml2_printf("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC);
7820 dml2_printf("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
7821 dml2_printf("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub);
7822 dml2_printf("DML::%s: prefetch_sw_bytes = %f\n", __func__, s->prefetch_sw_bytes);
7823 dml2_printf("DML::%s: max_Tsw = %f\n", __func__, s->max_Tsw);
7824 dml2_printf("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp);
7825 dml2_printf("DML::%s: vm_bytes = %u\n", __func__, vm_bytes);
7826 dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
7827 dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
7828 dml2_printf("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips);
7829 dml2_printf("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips);
7830 dml2_printf("DML::%s: Tvm_trips_flip = %f\n", __func__, *p->Tvm_trips_flip);
7831 dml2_printf("DML::%s: Tr0_trips_flip = %f\n", __func__, *p->Tr0_trips_flip);
7832 dml2_printf("DML::%s: prefetch_bw_pr = %f\n", __func__, s->prefetch_bw_pr);
7833 dml2_printf("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto);
7834 dml2_printf("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto);
7835 dml2_printf("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto);
7836 dml2_printf("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines);
7837 dml2_printf("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines);
7838 dml2_printf("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto);
7839 dml2_printf("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto);
7840 dml2_printf("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ);
7841 dml2_printf("DML::%s: tdlut_row_bytes = %d\n", __func__, tdlut_row_bytes);
7842 dml2_printf("DML::%s: meta_row_bytes = %d\n", __func__, p->meta_row_bytes);
7843 #endif
7844
7845 s->dst_y_prefetch_equ = math_floor2(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0;
7846 s->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime;
7847
7848 dml2_printf("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ);
7849 dml2_printf("DML::%s: LineTime: %f\n", __func__, s->LineTime);
7850 dml2_printf("DML::%s: VStartup: %u\n", __func__, p->VStartup);
7851 dml2_printf("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime);
7852 dml2_printf("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup);
7853 dml2_printf("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc);
7854 dml2_printf("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait);
7855 dml2_printf("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
7856 dml2_printf("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
7857 dml2_printf("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
7858 dml2_printf("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm);
7859 dml2_printf("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
7860 dml2_printf("DML::%s: TWait_p: %fus\n", __func__, s->TWait_p);
7861 dml2_printf("DML::%s: Ttrip: %fus\n", __func__, p->Ttrip);
7862 dml2_printf("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler);
7863 dml2_printf("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler);
7864
7865 s->dep_bytes = math_max2(vm_bytes * p->HostVMInefficiencyFactor, p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes);
7866
7867 dml2_printf("DML::%s: dep_bytes: %f\n", __func__, s->dep_bytes);
7868 dml2_printf("DML::%s: prefetch_sw_bytes: %f\n", __func__, s->prefetch_sw_bytes);
7869 dml2_printf("DML::%s: vm_bytes: %f (hvm inefficiency scaled)\n", __func__, vm_bytes * p->HostVMInefficiencyFactor);
7870 dml2_printf("DML::%s: row_bytes: %f (hvm inefficiency scaled, 1 row)\n", __func__, p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes);
7871
7872 if (s->prefetch_sw_bytes < s->dep_bytes) {
7873 s->prefetch_sw_bytes = 2 * s->dep_bytes;
7874 dml2_printf("DML::%s: bump prefetch_sw_bytes to %f\n", __func__, s->prefetch_sw_bytes);
7875 }
7876
7877 *p->dst_y_per_vm_vblank = 0;
7878 *p->dst_y_per_row_vblank = 0;
7879 *p->VRatioPrefetchY = 0;
7880 *p->VRatioPrefetchC = 0;
7881 *p->RequiredPrefetchPixelDataBWLuma = 0;
7882
7883 if (s->dst_y_prefetch_equ > 1) {
7884 s->prefetch_bw1 = 0.;
7885 s->prefetch_bw2 = 0.;
7886 s->prefetch_bw3 = 0.;
7887 s->prefetch_bw4 = 0.;
7888
7889 if (s->Tpre_rounded - *p->Tno_bw > 0) {
7890 s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor
7891 + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)
7892 + s->prefetch_sw_bytes)
7893 / (s->Tpre_rounded - *p->Tno_bw);
7894 s->Tsw_est1 = s->prefetch_sw_bytes / s->prefetch_bw1;
7895 } else
7896 s->prefetch_bw1 = 0;
7897
7898 dml2_printf("DML::%s: prefetch_bw1: %f\n", __func__, s->prefetch_bw1);
7899 if (p->VStartup == p->MaxVStartup && (s->Tsw_est1 / s->LineTime < s->min_Lsw_oto) && s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0) {
7900 s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) /
7901 (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw);
7902 dml2_printf("DML::%s: prefetch_bw1: %f (updated)\n", __func__, s->prefetch_bw1);
7903 }
7904
7905 if (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded > 0)
7906 s->prefetch_bw2 = (vm_bytes * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) /
7907 (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded);
7908 else
7909 s->prefetch_bw2 = 0;
7910
7911 if (s->Tpre_rounded - s->Tvm_trips_rounded > 0) {
7912 s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + s->prefetch_sw_bytes) /
7913 (s->Tpre_rounded - s->Tvm_trips_rounded);
7914 s->Tsw_est3 = s->prefetch_sw_bytes / s->prefetch_bw3;
7915 } else
7916 s->prefetch_bw3 = 0;
7917
7918
7919 dml2_printf("DML::%s: prefetch_bw3: %f\n", __func__, s->prefetch_bw3);
7920 if (p->VStartup == p->MaxVStartup && (s->Tsw_est3 / s->LineTime < s->min_Lsw_oto) && s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded > 0) {
7921 s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded);
7922 dml2_printf("DML::%s: prefetch_bw3: %f (updated)\n", __func__, s->prefetch_bw3);
7923 }
7924
7925 if (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0)
7926 s->prefetch_bw4 = s->prefetch_sw_bytes / (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded);
7927 else
7928 s->prefetch_bw4 = 0;
7929
7930 #ifdef __DML_VBA_DEBUG__
7931 dml2_printf("DML::%s: Tpre_rounded: %f\n", __func__, s->Tpre_rounded);
7932 dml2_printf("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw);
7933 dml2_printf("DML::%s: Tvm_trips_rounded: %f\n", __func__, s->Tvm_trips_rounded);
7934 dml2_printf("DML::%s: Tr0_trips_rounded: %f\n", __func__, 2 * s->Tr0_trips_rounded);
7935 dml2_printf("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1);
7936 dml2_printf("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3);
7937 dml2_printf("DML::%s: prefetch_bw1: %f (final)\n", __func__, s->prefetch_bw1);
7938 dml2_printf("DML::%s: prefetch_bw2: %f (final)\n", __func__, s->prefetch_bw2);
7939 dml2_printf("DML::%s: prefetch_bw3: %f (final)\n", __func__, s->prefetch_bw3);
7940 dml2_printf("DML::%s: prefetch_bw4: %f (final)\n", __func__, s->prefetch_bw4);
7941 #endif
7942
7943 {
7944 bool Case1OK = false;
7945 bool Case2OK = false;
7946 bool Case3OK = false;
7947
7948 if (s->prefetch_bw1 > 0) {
7949 if (*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw1 >= s->Tvm_trips_rounded &&
7950 (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw1 >= s->Tr0_trips_rounded) {
7951 Case1OK = true;
7952 }
7953 }
7954
7955 if (s->prefetch_bw2 > 0) {
7956 if (*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw2 >= s->Tvm_trips_rounded &&
7957 (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw2 < s->Tr0_trips_rounded) {
7958 Case2OK = true;
7959 }
7960 }
7961
7962 if (s->prefetch_bw3 > 0) {
7963 if (*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw3 < s->Tvm_trips_rounded &&
7964 (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw3 >= s->Tr0_trips_rounded) {
7965 Case3OK = true;
7966 }
7967 }
7968
7969 if (Case1OK) {
7970 s->prefetch_bw_equ = s->prefetch_bw1;
7971 } else if (Case2OK) {
7972 s->prefetch_bw_equ = s->prefetch_bw2;
7973 } else if (Case3OK) {
7974 s->prefetch_bw_equ = s->prefetch_bw3;
7975 } else {
7976 s->prefetch_bw_equ = s->prefetch_bw4;
7977 }
7978
7979 #ifdef __DML_VBA_DEBUG__
7980 dml2_printf("DML::%s: Case1OK: %u\n", __func__, Case1OK);
7981 dml2_printf("DML::%s: Case2OK: %u\n", __func__, Case2OK);
7982 dml2_printf("DML::%s: Case3OK: %u\n", __func__, Case3OK);
7983 dml2_printf("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ);
7984 #endif
7985 s->prefetch_bw_equ = math_max3(s->prefetch_bw_equ,
7986 p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw,
7987 (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime));
7988
7989 if (s->prefetch_bw_equ > 0) {
7990 if (p->display_cfg->gpuvm_enable == true) {
7991 s->Tvm_equ = math_max3(*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_equ, *p->Tvm_trips, s->LineTime / 4);
7992 } else {
7993 s->Tvm_equ = s->LineTime / 4;
7994 }
7995
7996 if (p->display_cfg->gpuvm_enable == true || dcc_mrq_enable || p->setup_for_tdlut) {
7997 s->Tr0_equ = math_max3((p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_equ, // PixelPTEBytesPerRow is dpte_row_bytes
7998 *p->Tr0_trips,
7999 s->LineTime / 4);
8000 } else {
8001 s->Tr0_equ = s->LineTime / 4;
8002 }
8003 } else {
8004 s->Tvm_equ = 0;
8005 s->Tr0_equ = 0;
8006 dml2_printf("DML::%s: prefetch_bw_equ equals 0!\n", __func__);
8007 }
8008 }
8009 #ifdef __DML_VBA_DEBUG__
8010 dml2_printf("DML::%s: Tvm_equ = %f\n", __func__, s->Tvm_equ);
8011 dml2_printf("DML::%s: Tr0_equ = %f\n", __func__, s->Tr0_equ);
8012 #endif
8013
8014 if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) {
8015 *p->dst_y_prefetch = s->dst_y_prefetch_oto;
8016 s->TimeForFetchingVM = s->Tvm_oto;
8017 s->TimeForFetchingRowInVBlank = s->Tr0_oto;
8018
8019 *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0;
8020 *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
8021 #ifdef __DML_VBA_DEBUG__
8022 dml2_printf("DML::%s: Using oto bw scheduling for prefetch\n", __func__);
8023 #endif
8024
8025 } else {
8026 *p->dst_y_prefetch = s->dst_y_prefetch_equ;
8027 s->TimeForFetchingVM = s->Tvm_equ;
8028 s->TimeForFetchingRowInVBlank = s->Tr0_equ;
8029
8030 if (p->VStartup == p->MaxVStartup) {
8031 *p->dst_y_per_vm_vblank = math_floor2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0;
8032 *p->dst_y_per_row_vblank = math_floor2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
8033 } else {
8034 *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0;
8035 *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
8036 }
8037 #ifdef __DML_VBA_DEBUG__
8038 dml2_printf("DML::%s: Using equ bw scheduling for prefetch\n", __func__);
8039 #endif
8040 }
8041 dml2_assert(*p->dst_y_prefetch < 64);
8042
8043 // Lsw = dst_y_prefetch - (dst_y_per_vm_vblank + 2*dst_y_per_row_vblank)
8044 s->LinesToRequestPrefetchPixelData = *p->dst_y_prefetch - *p->dst_y_per_vm_vblank - 2 * *p->dst_y_per_row_vblank; // Lsw
8045
8046 s->cursor_prefetch_bytes = (unsigned int)math_max2(p->cursor_bytes_per_chunk, 4 * p->cursor_bytes_per_line);
8047 *p->prefetch_cursor_bw = p->num_cursors * s->cursor_prefetch_bytes / (s->LinesToRequestPrefetchPixelData * s->LineTime);
8048
8049 #ifdef __DML_VBA_DEBUG__
8050 dml2_printf("DML::%s: TimeForFetchingVM = %f\n", __func__, s->TimeForFetchingVM);
8051 dml2_printf("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank);
8052 dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime);
8053 dml2_printf("DML::%s: dst_y_prefetch = %f\n", __func__, *p->dst_y_prefetch);
8054 dml2_printf("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank);
8055 dml2_printf("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank);
8056 dml2_printf("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData);
8057 dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
8058
8059 dml2_printf("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, p->cursor_bytes_per_chunk);
8060 dml2_printf("DML::%s: cursor_bytes_per_line = %d\n", __func__, p->cursor_bytes_per_line);
8061 dml2_printf("DML::%s: cursor_prefetch_bytes = %d\n", __func__, s->cursor_prefetch_bytes);
8062 dml2_printf("DML::%s: prefetch_cursor_bw = %f\n", __func__, *p->prefetch_cursor_bw);
8063 #endif
8064 unsigned int min_lsw_required = (unsigned int)math_max2(2, p->tdlut_drain_time / s->LineTime);
8065
8066 if (s->LinesToRequestPrefetchPixelData >= min_lsw_required && s->prefetch_bw_equ > 0) {
8067 *p->VRatioPrefetchY = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData;
8068 *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY, 1.0);
8069 #ifdef __DML_VBA_DEBUG__
8070 dml2_printf("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
8071 dml2_printf("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY);
8072 dml2_printf("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY);
8073 #endif
8074 if ((p->SwathHeightY > 4) && (p->VInitPreFillY > 3)) {
8075 if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillY - 3.0) / 2.0) {
8076 *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY,
8077 (double)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0));
8078 } else {
8079 s->NoTimeToPrefetch = true;
8080 dml2_printf("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY);
8081 *p->VRatioPrefetchY = 0;
8082 }
8083 #ifdef __DML_VBA_DEBUG__
8084 dml2_printf("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
8085 dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
8086 dml2_printf("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY);
8087 #endif
8088 }
8089
8090 *p->VRatioPrefetchC = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData;
8091 *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, 1.0);
8092
8093 #ifdef __DML_VBA_DEBUG__
8094 dml2_printf("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
8095 dml2_printf("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC);
8096 dml2_printf("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC);
8097 #endif
8098 if ((p->SwathHeightC > 4) && (p->VInitPreFillC > 3)) {
8099 if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillC - 3.0) / 2.0) {
8100 *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, (double)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0));
8101 } else {
8102 s->NoTimeToPrefetch = true;
8103 dml2_printf("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC);
8104 *p->VRatioPrefetchC = 0;
8105 }
8106 #ifdef __DML_VBA_DEBUG__
8107 dml2_printf("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
8108 dml2_printf("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
8109 dml2_printf("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC);
8110 #endif
8111 }
8112
8113 *p->RequiredPrefetchPixelDataBWLuma = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelY * p->swath_width_luma_ub / s->LineTime;
8114
8115 #ifdef __DML_VBA_DEBUG__
8116 dml2_printf("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
8117 dml2_printf("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
8118 dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime);
8119 dml2_printf("DML::%s: RequiredPrefetchPixelDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWLuma);
8120 #endif
8121 *p->RequiredPrefetchPixelDataBWChroma = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelC * p->swath_width_chroma_ub / s->LineTime;
8122 } else {
8123 s->NoTimeToPrefetch = true;
8124 dml2_printf("DML::%s: MyErr set, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required);
8125 dml2_printf("DML::%s: MyErr set, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ);
8126 *p->VRatioPrefetchY = 0;
8127 *p->VRatioPrefetchC = 0;
8128 *p->RequiredPrefetchPixelDataBWLuma = 0;
8129 *p->RequiredPrefetchPixelDataBWChroma = 0;
8130 }
8131
8132 dml2_printf("DML: Tpre: %fus - sum of time to request 2 x data pte, swaths\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingVM);
8133 dml2_printf("DML: Tvm: %fus - time to fetch vm\n", s->TimeForFetchingVM);
8134 dml2_printf("DML: Tr0: %fus - time to fetch first row of data pagetables\n", s->TimeForFetchingRowInVBlank);
8135 dml2_printf("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime);
8136 dml2_printf("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime);
8137 dml2_printf("DML: Tvstartup - TSetup - Tcalc - TWait - Tpre - To > 0\n");
8138 dml2_printf("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingVM - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup);
8139 dml2_printf("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow);
8140
8141 } else {
8142 dml2_printf("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ);
8143 s->NoTimeToPrefetch = true;
8144 s->TimeForFetchingVM = 0;
8145 s->TimeForFetchingRowInVBlank = 0;
8146 *p->dst_y_per_vm_vblank = 0;
8147 *p->dst_y_per_row_vblank = 0;
8148 s->LinesToRequestPrefetchPixelData = 0;
8149 *p->VRatioPrefetchY = 0;
8150 *p->VRatioPrefetchC = 0;
8151 *p->RequiredPrefetchPixelDataBWLuma = 0;
8152 *p->RequiredPrefetchPixelDataBWChroma = 0;
8153 }
8154
8155 {
8156 double prefetch_vm_bw;
8157 double prefetch_row_bw;
8158
8159 if (vm_bytes == 0) {
8160 prefetch_vm_bw = 0;
8161 } else if (*p->dst_y_per_vm_vblank > 0) {
8162 #ifdef __DML_VBA_DEBUG__
8163 dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
8164 dml2_printf("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank);
8165 dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime);
8166 #endif
8167 prefetch_vm_bw = vm_bytes * p->HostVMInefficiencyFactor / (*p->dst_y_per_vm_vblank * s->LineTime);
8168 #ifdef __DML_VBA_DEBUG__
8169 dml2_printf("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
8170 #endif
8171 } else {
8172 prefetch_vm_bw = 0;
8173 s->NoTimeToPrefetch = true;
8174 dml2_printf("DML::%s: MyErr set. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank);
8175 }
8176
8177 if (p->PixelPTEBytesPerRow == 0 && tdlut_row_bytes == 0) {
8178 prefetch_row_bw = 0;
8179 } else if (*p->dst_y_per_row_vblank > 0) {
8180 prefetch_row_bw = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + tdlut_row_bytes) / (*p->dst_y_per_row_vblank * s->LineTime);
8181
8182 #ifdef __DML_VBA_DEBUG__
8183 dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
8184 dml2_printf("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank);
8185 dml2_printf("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
8186 #endif
8187 } else {
8188 prefetch_row_bw = 0;
8189 s->NoTimeToPrefetch = true;
8190 dml2_printf("DML::%s: MyErr set. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank);
8191 }
8192
8193 *p->prefetch_vmrow_bw = math_max2(prefetch_vm_bw, prefetch_row_bw);
8194 }
8195
8196 if (s->NoTimeToPrefetch) {
8197 s->TimeForFetchingVM = 0;
8198 s->TimeForFetchingRowInVBlank = 0;
8199 *p->dst_y_per_vm_vblank = 0;
8200 *p->dst_y_per_row_vblank = 0;
8201 *p->dst_y_prefetch = 0;
8202 s->LinesToRequestPrefetchPixelData = 0;
8203 *p->VRatioPrefetchY = 0;
8204 *p->VRatioPrefetchC = 0;
8205 *p->RequiredPrefetchPixelDataBWLuma = 0;
8206 *p->RequiredPrefetchPixelDataBWChroma = 0;
8207 }
8208
8209 dml2_printf("DML::%s: dst_y_per_vm_vblank = %f (final)\n", __func__, *p->dst_y_per_vm_vblank);
8210 dml2_printf("DML::%s: dst_y_per_row_vblank = %f (final)\n", __func__, *p->dst_y_per_row_vblank);
8211 dml2_printf("DML::%s: NoTimeToPrefetch=%d\n", __func__, s->NoTimeToPrefetch);
8212 return s->NoTimeToPrefetch;
8213 }
8214
calculate_peak_bandwidth_required(struct dml2_core_internal_scratch * s,double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],const struct dml2_display_cfg * display_cfg,unsigned int inc_flip_bw,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],double dcc_dram_bw_nom_overhead_factor_p0[],double dcc_dram_bw_nom_overhead_factor_p1[],double dcc_dram_bw_pref_overhead_factor_p0[],double dcc_dram_bw_pref_overhead_factor_p1[],double mall_prefetch_sdp_overhead_factor[],double mall_prefetch_dram_overhead_factor[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double cursor_bw[],double dpte_row_bw[],double meta_row_bw[],double prefetch_cursor_bw[],double prefetch_vmrow_bw[],double flip_bw[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[])8215 static void calculate_peak_bandwidth_required(
8216 struct dml2_core_internal_scratch *s,
8217
8218 // output
8219 double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
8220 double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
8221 double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
8222
8223 // input
8224 const struct dml2_display_cfg *display_cfg,
8225 unsigned int inc_flip_bw,
8226 unsigned int NumberOfActiveSurfaces,
8227 unsigned int NumberOfDPP[],
8228 double dcc_dram_bw_nom_overhead_factor_p0[],
8229 double dcc_dram_bw_nom_overhead_factor_p1[],
8230 double dcc_dram_bw_pref_overhead_factor_p0[],
8231 double dcc_dram_bw_pref_overhead_factor_p1[],
8232 double mall_prefetch_sdp_overhead_factor[],
8233 double mall_prefetch_dram_overhead_factor[],
8234 double ReadBandwidthLuma[],
8235 double ReadBandwidthChroma[],
8236 double PrefetchBandwidthLuma[],
8237 double PrefetchBandwidthChroma[],
8238 double cursor_bw[],
8239 double dpte_row_bw[],
8240 double meta_row_bw[],
8241 double prefetch_cursor_bw[],
8242 double prefetch_vmrow_bw[],
8243 double flip_bw[],
8244 double UrgentBurstFactorLuma[],
8245 double UrgentBurstFactorChroma[],
8246 double UrgentBurstFactorCursor[],
8247 double UrgentBurstFactorLumaPre[],
8248 double UrgentBurstFactorChromaPre[],
8249 double UrgentBurstFactorCursorPre[])
8250 {
8251 unsigned int n;
8252 unsigned int m;
8253
8254 struct dml2_core_shared_calculate_peak_bandwidth_required_locals *l = &s->calculate_peak_bandwidth_required_locals;
8255
8256 memset(l, 0, sizeof(struct dml2_core_shared_calculate_peak_bandwidth_required_locals));
8257
8258 #ifdef __DML_VBA_DEBUG__
8259 dml2_printf("DML::%s: inc_flip_bw = %d\n", __func__, inc_flip_bw);
8260 dml2_printf("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
8261 #endif
8262
8263 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
8264 l->unity_array[k] = 1.0;
8265 l->zero_array[k] = 0.0;
8266 }
8267
8268 for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
8269 for (n = 0; n < dml2_core_internal_bw_max; n++) {
8270 urg_vactive_bandwidth_required[m][n] = get_urgent_bandwidth_required(
8271 &s->get_urgent_bandwidth_required_locals,
8272 display_cfg,
8273 m,
8274 n,
8275 0, //inc_flip_bw,
8276 NumberOfActiveSurfaces,
8277 NumberOfDPP,
8278 dcc_dram_bw_nom_overhead_factor_p0,
8279 dcc_dram_bw_nom_overhead_factor_p1,
8280 dcc_dram_bw_pref_overhead_factor_p0,
8281 dcc_dram_bw_pref_overhead_factor_p1,
8282 mall_prefetch_sdp_overhead_factor,
8283 mall_prefetch_dram_overhead_factor,
8284 ReadBandwidthLuma,
8285 ReadBandwidthChroma,
8286 l->zero_array, //PrefetchBandwidthLuma,
8287 l->zero_array, //PrefetchBandwidthChroma,
8288 cursor_bw,
8289 dpte_row_bw,
8290 meta_row_bw,
8291 l->zero_array, //prefetch_cursor_bw,
8292 l->zero_array, //prefetch_vmrow_bw,
8293 l->zero_array, //flip_bw,
8294 UrgentBurstFactorLuma,
8295 UrgentBurstFactorChroma,
8296 UrgentBurstFactorCursor,
8297 UrgentBurstFactorLumaPre,
8298 UrgentBurstFactorChromaPre,
8299 UrgentBurstFactorCursorPre);
8300
8301
8302 urg_bandwidth_required[m][n] = get_urgent_bandwidth_required(
8303 &s->get_urgent_bandwidth_required_locals,
8304 display_cfg,
8305 m,
8306 n,
8307 inc_flip_bw,
8308 NumberOfActiveSurfaces,
8309 NumberOfDPP,
8310 dcc_dram_bw_nom_overhead_factor_p0,
8311 dcc_dram_bw_nom_overhead_factor_p1,
8312 dcc_dram_bw_pref_overhead_factor_p0,
8313 dcc_dram_bw_pref_overhead_factor_p1,
8314 mall_prefetch_sdp_overhead_factor,
8315 mall_prefetch_dram_overhead_factor,
8316 ReadBandwidthLuma,
8317 ReadBandwidthChroma,
8318 PrefetchBandwidthLuma,
8319 PrefetchBandwidthChroma,
8320 cursor_bw,
8321 dpte_row_bw,
8322 meta_row_bw,
8323 prefetch_cursor_bw,
8324 prefetch_vmrow_bw,
8325 flip_bw,
8326 UrgentBurstFactorLuma,
8327 UrgentBurstFactorChroma,
8328 UrgentBurstFactorCursor,
8329 UrgentBurstFactorLumaPre,
8330 UrgentBurstFactorChromaPre,
8331 UrgentBurstFactorCursorPre);
8332
8333 non_urg_bandwidth_required[m][n] = get_urgent_bandwidth_required(
8334 &s->get_urgent_bandwidth_required_locals,
8335 display_cfg,
8336 m,
8337 n,
8338 inc_flip_bw,
8339 NumberOfActiveSurfaces,
8340 NumberOfDPP,
8341 dcc_dram_bw_nom_overhead_factor_p0,
8342 dcc_dram_bw_nom_overhead_factor_p1,
8343 dcc_dram_bw_pref_overhead_factor_p0,
8344 dcc_dram_bw_pref_overhead_factor_p1,
8345 mall_prefetch_sdp_overhead_factor,
8346 mall_prefetch_dram_overhead_factor,
8347 ReadBandwidthLuma,
8348 ReadBandwidthChroma,
8349 PrefetchBandwidthLuma,
8350 PrefetchBandwidthChroma,
8351 cursor_bw,
8352 dpte_row_bw,
8353 meta_row_bw,
8354 prefetch_cursor_bw,
8355 prefetch_vmrow_bw,
8356 flip_bw,
8357 l->unity_array,
8358 l->unity_array,
8359 l->unity_array,
8360 l->unity_array,
8361 l->unity_array,
8362 l->unity_array);
8363
8364 #ifdef __DML_VBA_DEBUG__
8365 dml2_printf("DML::%s: urg_vactive_bandwidth_required%s[%s][%s]=%f\n", __func__, (inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_vactive_bandwidth_required[m][n]);
8366 dml2_printf("DML::%s: urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_bandwidth_required[m][n]);
8367 dml2_printf("DML::%s: non_urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), non_urg_bandwidth_required[m][n]);
8368 #endif
8369 dml2_assert(urg_bandwidth_required[m][n] >= non_urg_bandwidth_required[m][n]);
8370 }
8371 }
8372 }
8373
check_urgent_bandwidth_support(double * frac_urg_bandwidth_nom,double * frac_urg_bandwidth_mall,bool * vactive_bandwidth_support_ok,bool * bandwidth_support_ok,unsigned int mall_allocated_for_dcn_mbytes,double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])8374 static void check_urgent_bandwidth_support(
8375 double *frac_urg_bandwidth_nom,
8376 double *frac_urg_bandwidth_mall,
8377 bool *vactive_bandwidth_support_ok, // vactive ok
8378 bool *bandwidth_support_ok, // max of vm, prefetch, vactive all ok
8379
8380 unsigned int mall_allocated_for_dcn_mbytes,
8381 double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
8382 double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
8383 double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
8384 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])
8385 {
8386 *bandwidth_support_ok = 1;
8387 *vactive_bandwidth_support_ok = 1;
8388
8389 double frac_urg_bandwidth_nom_sdp = non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] / urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
8390 double frac_urg_bandwidth_nom_dram = non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] / urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
8391 double frac_urg_bandwidth_mall_sdp = non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] / urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
8392 double frac_urg_bandwidth_mall_dram = non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] / urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
8393
8394 // Check urgent bandwidth required at sdp vs urgent bandwidth avail at sdp -> FractionOfUrgentBandwidth
8395 // Check urgent bandwidth required at dram vs urgent bandwidth avail at dram
8396 // Check urgent bandwidth required at sdp vs urgent bandwidth avail at sdp, svp_prefetch -> FractionOfUrgentBandwidthMALL
8397 // Check urgent bandwidth required at dram vs urgent bandwidth avail at dram, svp_prefetch
8398
8399 *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
8400 *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
8401
8402 if (mall_allocated_for_dcn_mbytes > 0) {
8403 *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
8404 *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
8405 }
8406
8407 *frac_urg_bandwidth_nom = math_max2(frac_urg_bandwidth_nom_sdp, frac_urg_bandwidth_nom_dram);
8408 *frac_urg_bandwidth_mall = math_max2(frac_urg_bandwidth_mall_sdp, frac_urg_bandwidth_mall_dram);
8409
8410 *bandwidth_support_ok &= (*frac_urg_bandwidth_nom <= 1.0);
8411
8412 if (mall_allocated_for_dcn_mbytes > 0)
8413 *bandwidth_support_ok &= (*frac_urg_bandwidth_mall <= 1.0);
8414
8415 *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
8416 *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
8417 if (mall_allocated_for_dcn_mbytes > 0) {
8418 *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
8419 *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
8420 }
8421
8422 #ifdef __DML_VBA_DEBUG__
8423 dml2_printf("DML::%s: frac_urg_bandwidth_nom_sdp = %f\n", __func__, frac_urg_bandwidth_nom_sdp);
8424 dml2_printf("DML::%s: frac_urg_bandwidth_nom_dram = %f\n", __func__, frac_urg_bandwidth_nom_dram);
8425 dml2_printf("DML::%s: frac_urg_bandwidth_nom = %f\n", __func__, *frac_urg_bandwidth_nom);
8426
8427 dml2_printf("DML::%s: frac_urg_bandwidth_mall_sdp = %f\n", __func__, frac_urg_bandwidth_mall_sdp);
8428 dml2_printf("DML::%s: frac_urg_bandwidth_mall_dram = %f\n", __func__, frac_urg_bandwidth_mall_dram);
8429 dml2_printf("DML::%s: frac_urg_bandwidth_mall = %f\n", __func__, *frac_urg_bandwidth_mall);
8430 dml2_printf("DML::%s: bandwidth_support_ok = %d\n", __func__, *bandwidth_support_ok);
8431 #endif
8432
8433 }
8434
get_bandwidth_available_for_immediate_flip(enum dml2_core_internal_soc_state_type eval_state,double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])8435 static double get_bandwidth_available_for_immediate_flip(enum dml2_core_internal_soc_state_type eval_state,
8436 double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], // no flip
8437 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])
8438 {
8439 double flip_bw_available_mbps;
8440 double flip_bw_available_sdp_mbps;
8441 double flip_bw_available_dram_mbps;
8442
8443 flip_bw_available_sdp_mbps = urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp] - urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp];
8444 flip_bw_available_dram_mbps = urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram] - urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram];
8445 flip_bw_available_mbps = flip_bw_available_sdp_mbps < flip_bw_available_dram_mbps ? flip_bw_available_sdp_mbps : flip_bw_available_dram_mbps;
8446
8447 #ifdef __DML_VBA_DEBUG__
8448 dml2_printf("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state));
8449 dml2_printf("DML::%s: urg_bandwidth_available_sdp_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp]);
8450 dml2_printf("DML::%s: urg_bandwidth_available_dram_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram]);
8451 dml2_printf("DML::%s: urg_bandwidth_required_sdp_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp]);
8452 dml2_printf("DML::%s: urg_bandwidth_required_dram_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram]);
8453 dml2_printf("DML::%s: flip_bw_available_sdp_mbps = %f\n", __func__, flip_bw_available_sdp_mbps);
8454 dml2_printf("DML::%s: flip_bw_available_dram_mbps = %f\n", __func__, flip_bw_available_dram_mbps);
8455 dml2_printf("DML::%s: flip_bw_available_mbps = %f\n", __func__, flip_bw_available_mbps);
8456 #endif
8457
8458 return flip_bw_available_mbps;
8459 }
8460
calculate_immediate_flip_bandwidth_support(double * frac_urg_bandwidth_flip,bool * flip_bandwidth_support_ok,enum dml2_core_internal_soc_state_type eval_state,double urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])8461 static void calculate_immediate_flip_bandwidth_support(
8462 // Output
8463 double *frac_urg_bandwidth_flip,
8464 bool *flip_bandwidth_support_ok,
8465
8466 // Input
8467 enum dml2_core_internal_soc_state_type eval_state,
8468 double urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
8469 double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
8470 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])
8471 {
8472 double frac_urg_bw_flip_sdp = non_urg_bandwidth_required_flip[eval_state][dml2_core_internal_bw_sdp] / urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp];
8473 double frac_urg_bw_flip_dram = non_urg_bandwidth_required_flip[eval_state][dml2_core_internal_bw_dram] / urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram];
8474
8475 *flip_bandwidth_support_ok = true;
8476 for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { // check sdp and dram
8477 *flip_bandwidth_support_ok &= urg_bandwidth_available[eval_state][n] >= urg_bandwidth_required_flip[eval_state][n];
8478
8479 #ifdef __DML_VBA_DEBUG__
8480 dml2_printf("DML::%s: n = %s\n", __func__, dml2_core_internal_bw_type_str((enum dml2_core_internal_bw_type) eval_state));
8481 dml2_printf("DML::%s: urg_bandwidth_available = %f\n", __func__, urg_bandwidth_available[eval_state][n]);
8482 dml2_printf("DML::%s: non_urg_bandwidth_required_flip = %f\n", __func__, non_urg_bandwidth_required_flip[eval_state][n]);
8483 dml2_printf("DML::%s: urg_bandwidth_required_flip = %f\n", __func__, urg_bandwidth_required_flip[eval_state][n]);
8484 dml2_printf("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok);
8485 #endif
8486 dml2_assert(urg_bandwidth_required_flip[eval_state][n] > non_urg_bandwidth_required_flip[eval_state][n]);
8487 }
8488
8489 *frac_urg_bandwidth_flip = (frac_urg_bw_flip_sdp > frac_urg_bw_flip_dram) ? frac_urg_bw_flip_sdp : frac_urg_bw_flip_dram;
8490 *flip_bandwidth_support_ok &= (*frac_urg_bandwidth_flip <= 1);
8491
8492 #ifdef __DML_VBA_DEBUG__
8493 dml2_printf("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state));
8494 dml2_printf("DML::%s: frac_urg_bw_flip_sdp = %f\n", __func__, frac_urg_bw_flip_sdp);
8495 dml2_printf("DML::%s: frac_urg_bw_flip_dram = %f\n", __func__, frac_urg_bw_flip_dram);
8496 dml2_printf("DML::%s: frac_urg_bandwidth_flip = %f\n", __func__, *frac_urg_bandwidth_flip);
8497 dml2_printf("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok);
8498
8499 for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) {
8500 for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) {
8501 dml2_printf("DML::%s: state:%s bw_type:%s, urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n",
8502 __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n),
8503 urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required_flip[m][n]) ? "<" : ">=", urg_bandwidth_required_flip[m][n]);
8504 }
8505 }
8506 #endif
8507 }
8508
CalculateFlipSchedule(struct dml2_core_internal_scratch * s,bool iflip_enable,bool use_lb_flip_bw,double HostVMInefficiencyFactor,double Tvm_trips_flip,double Tr0_trips_flip,double Tvm_trips_flip_rounded,double Tr0_trips_flip_rounded,bool GPUVMEnable,double vm_bytes,double DPTEBytesPerRow,double BandwidthAvailableForImmediateFlip,unsigned int TotImmediateFlipBytes,enum dml2_source_format_class SourcePixelFormat,double LineTime,double VRatio,double VRatioChroma,double Tno_bw_flip,unsigned int dpte_row_height,unsigned int dpte_row_height_chroma,bool use_one_row_for_frame_flip,unsigned int max_flip_time_us,unsigned int per_pipe_flip_bytes,unsigned int meta_row_bytes,unsigned int meta_row_height,unsigned int meta_row_height_chroma,bool dcc_mrq_enable,double * dst_y_per_vm_flip,double * dst_y_per_row_flip,double * final_flip_bw,bool * ImmediateFlipSupportedForPipe)8509 static void CalculateFlipSchedule(
8510 struct dml2_core_internal_scratch *s,
8511 bool iflip_enable,
8512 bool use_lb_flip_bw,
8513 double HostVMInefficiencyFactor,
8514 double Tvm_trips_flip,
8515 double Tr0_trips_flip,
8516 double Tvm_trips_flip_rounded,
8517 double Tr0_trips_flip_rounded,
8518 bool GPUVMEnable,
8519 double vm_bytes, // vm_bytes
8520 double DPTEBytesPerRow, // dpte_row_bytes
8521 double BandwidthAvailableForImmediateFlip,
8522 unsigned int TotImmediateFlipBytes,
8523 enum dml2_source_format_class SourcePixelFormat,
8524 double LineTime,
8525 double VRatio,
8526 double VRatioChroma,
8527 double Tno_bw_flip,
8528 unsigned int dpte_row_height,
8529 unsigned int dpte_row_height_chroma,
8530 bool use_one_row_for_frame_flip,
8531 unsigned int max_flip_time_us,
8532 unsigned int per_pipe_flip_bytes,
8533 unsigned int meta_row_bytes,
8534 unsigned int meta_row_height,
8535 unsigned int meta_row_height_chroma,
8536 bool dcc_mrq_enable,
8537
8538 // Output
8539 double *dst_y_per_vm_flip,
8540 double *dst_y_per_row_flip,
8541 double *final_flip_bw,
8542 bool *ImmediateFlipSupportedForPipe)
8543 {
8544 struct dml2_core_shared_CalculateFlipSchedule_locals *l = &s->CalculateFlipSchedule_locals;
8545
8546 l->dual_plane = dml2_core_shared_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha;
8547 l->dpte_row_bytes = DPTEBytesPerRow;
8548
8549 #ifdef __DML_VBA_DEBUG__
8550 dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
8551 dml2_printf("DML::%s: ip.max_flip_time_us = %d\n", __func__, max_flip_time_us);
8552 dml2_printf("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
8553 dml2_printf("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes);
8554 dml2_printf("DML::%s: use_lb_flip_bw = %u\n", __func__, use_lb_flip_bw);
8555 dml2_printf("DML::%s: iflip_enable = %u\n", __func__, iflip_enable);
8556 dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
8557 dml2_printf("DML::%s: LineTime = %f\n", __func__, LineTime);
8558 dml2_printf("DML::%s: Tno_bw_flip = %f\n", __func__, Tno_bw_flip);
8559 dml2_printf("DML::%s: Tvm_trips_flip = %f\n", __func__, Tvm_trips_flip);
8560 dml2_printf("DML::%s: Tr0_trips_flip = %f\n", __func__, Tr0_trips_flip);
8561 dml2_printf("DML::%s: Tvm_trips_flip_rounded = %f\n", __func__, Tvm_trips_flip_rounded);
8562 dml2_printf("DML::%s: Tr0_trips_flip_rounded = %f\n", __func__, Tr0_trips_flip_rounded);
8563 dml2_printf("DML::%s: vm_bytes = %f\n", __func__, vm_bytes);
8564 dml2_printf("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow);
8565 dml2_printf("DML::%s: meta_row_bytes = %d\n", __func__, meta_row_bytes);
8566 dml2_printf("DML::%s: dpte_row_bytes = %f\n", __func__, l->dpte_row_bytes);
8567 dml2_printf("DML::%s: dpte_row_height = %d\n", __func__, dpte_row_height);
8568 dml2_printf("DML::%s: meta_row_height = %d\n", __func__, meta_row_height);
8569 dml2_printf("DML::%s: VRatio = %f\n", __func__, VRatio);
8570 #endif
8571
8572 if (TotImmediateFlipBytes > 0 && (GPUVMEnable || dcc_mrq_enable)) {
8573 if (l->dual_plane) {
8574 if (dcc_mrq_enable & GPUVMEnable) {
8575 l->min_row_height = math_min2(dpte_row_height, meta_row_height);
8576 l->min_row_height_chroma = math_min2(dpte_row_height_chroma, meta_row_height_chroma);
8577 } else if (GPUVMEnable) {
8578 l->min_row_height = dpte_row_height;
8579 l->min_row_height_chroma = dpte_row_height_chroma;
8580 } else {
8581 l->min_row_height = meta_row_height;
8582 l->min_row_height_chroma = meta_row_height_chroma;
8583 }
8584 l->min_row_time = math_min2(l->min_row_height * LineTime / VRatio, l->min_row_height_chroma * LineTime / VRatioChroma);
8585 } else {
8586 if (dcc_mrq_enable & GPUVMEnable)
8587 l->min_row_height = math_min2(dpte_row_height, meta_row_height);
8588 else if (GPUVMEnable)
8589 l->min_row_height = dpte_row_height;
8590 else
8591 l->min_row_height = meta_row_height;
8592
8593 l->min_row_time = l->min_row_height * LineTime / VRatio;
8594 }
8595 #ifdef __DML_VBA_DEBUG__
8596 dml2_printf("DML::%s: min_row_time = %f\n", __func__, l->min_row_time);
8597 #endif
8598 dml2_assert(l->min_row_time > 0);
8599
8600 if (use_lb_flip_bw) {
8601 // For mode check, calculation the flip bw requirement with worst case flip time
8602 l->max_flip_time = math_min2(l->min_row_time, math_max2(Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded, (double)max_flip_time_us));
8603
8604 //The lower bound on flip bandwidth
8605 // Note: The get_urgent_bandwidth_required already consider dpte_row_bw and meta_row_bw in bandwidth calculation, so leave final_flip_bw = 0 if iflip not required
8606 l->lb_flip_bw = 0;
8607
8608 if (iflip_enable) {
8609 l->hvm_scaled_vm_bytes = vm_bytes * HostVMInefficiencyFactor;
8610 l->num_rows = 2;
8611 l->hvm_scaled_row_bytes = (l->num_rows * l->dpte_row_bytes * HostVMInefficiencyFactor + l->num_rows * meta_row_bytes);
8612 l->hvm_scaled_vm_row_bytes = l->hvm_scaled_vm_bytes + l->hvm_scaled_row_bytes;
8613 l->lb_flip_bw = math_max3(
8614 l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip),
8615 l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded),
8616 l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded));
8617 #ifdef __DML_VBA_DEBUG__
8618 dml2_printf("DML::%s: max_flip_time = %f\n", __func__, l->max_flip_time);
8619 dml2_printf("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_bytes);
8620 dml2_printf("DML::%s: total row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_row_bytes);
8621 dml2_printf("DML::%s: total vm+row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_row_bytes);
8622 dml2_printf("DML::%s: lb_flip_bw for vm and row = %f\n", __func__, l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip));
8623 dml2_printf("DML::%s: lb_flip_bw for vm = %f\n", __func__, l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded));
8624 dml2_printf("DML::%s: lb_flip_bw for row = %f\n", __func__, l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded));
8625
8626 if (l->lb_flip_bw > 0) {
8627 dml2_printf("DML::%s: mode_support est Tvm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw);
8628 dml2_printf("DML::%s: mode_support est Tr0_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / l->num_rows);
8629 dml2_printf("DML::%s: mode_support est dst_y_per_vm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw / LineTime);
8630 dml2_printf("DML::%s: mode_support est dst_y_per_row_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / LineTime / l->num_rows);
8631 }
8632 #endif
8633 l->lb_flip_bw = math_max3(l->lb_flip_bw,
8634 l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip,
8635 (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime));
8636 #ifdef __DML_VBA_DEBUG__
8637 dml2_printf("DML::%s: lb_flip_bw for vm reg limit = %f\n", __func__, l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip);
8638 dml2_printf("DML::%s: lb_flip_bw for row reg limit = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime));
8639 #endif
8640 }
8641
8642 *final_flip_bw = l->lb_flip_bw;
8643
8644 *dst_y_per_vm_flip = 1; // not used
8645 *dst_y_per_row_flip = 1; // not used
8646 *ImmediateFlipSupportedForPipe = true;
8647 } else {
8648 if (iflip_enable) {
8649 l->ImmediateFlipBW = (double)per_pipe_flip_bytes * BandwidthAvailableForImmediateFlip / (double)TotImmediateFlipBytes; // flip_bw(i)
8650
8651 #ifdef __DML_VBA_DEBUG__
8652 dml2_printf("DML::%s: per_pipe_flip_bytes = %d\n", __func__, per_pipe_flip_bytes);
8653 dml2_printf("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
8654 dml2_printf("DML::%s: ImmediateFlipBW = %f\n", __func__, l->ImmediateFlipBW);
8655 #endif
8656 if (l->ImmediateFlipBW == 0) {
8657 l->Tvm_flip = 0;
8658 l->Tr0_flip = 0;
8659 } else {
8660 l->Tvm_flip = math_max3(Tvm_trips_flip,
8661 Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW,
8662 LineTime / 4.0);
8663
8664 l->Tr0_flip = math_max3(Tr0_trips_flip,
8665 (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW,
8666 LineTime / 4.0);
8667 }
8668 #ifdef __DML_VBA_DEBUG__
8669 dml2_printf("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, vm_bytes * HostVMInefficiencyFactor);
8670 dml2_printf("DML::%s: total row bytes (hvm ineff scaled, one row) = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes));
8671
8672 dml2_printf("DML::%s: Tvm_flip = %f (bw-based), Tvm_trips_flip = %f (latency-based)\n", __func__, Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW, Tvm_trips_flip);
8673 dml2_printf("DML::%s: Tr0_flip = %f (bw-based), Tr0_trips_flip = %f (latency-based)\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW, Tr0_trips_flip);
8674 #endif
8675 *dst_y_per_vm_flip = math_ceil2(4.0 * (l->Tvm_flip / LineTime), 1.0) / 4.0;
8676 *dst_y_per_row_flip = math_ceil2(4.0 * (l->Tr0_flip / LineTime), 1.0) / 4.0;
8677
8678 *final_flip_bw = math_max2(vm_bytes * HostVMInefficiencyFactor / (*dst_y_per_vm_flip * LineTime),
8679 (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (*dst_y_per_row_flip * LineTime));
8680
8681 if (*dst_y_per_vm_flip >= 32 || *dst_y_per_row_flip >= 16 || l->Tvm_flip + 2 * l->Tr0_flip > l->min_row_time) {
8682 *ImmediateFlipSupportedForPipe = false;
8683 } else {
8684 *ImmediateFlipSupportedForPipe = iflip_enable;
8685 }
8686 } else {
8687 l->Tvm_flip = 0;
8688 l->Tr0_flip = 0;
8689 *dst_y_per_vm_flip = 0;
8690 *dst_y_per_row_flip = 0;
8691 *final_flip_bw = 0;
8692 *ImmediateFlipSupportedForPipe = iflip_enable;
8693 }
8694 }
8695 } else {
8696 l->Tvm_flip = 0;
8697 l->Tr0_flip = 0;
8698 *dst_y_per_vm_flip = 0;
8699 *dst_y_per_row_flip = 0;
8700 *final_flip_bw = 0;
8701 *ImmediateFlipSupportedForPipe = iflip_enable;
8702 }
8703
8704 #ifdef __DML_VBA_DEBUG__
8705 if (!use_lb_flip_bw) {
8706 dml2_printf("DML::%s: dst_y_per_vm_flip = %f (should be < 32)\n", __func__, *dst_y_per_vm_flip);
8707 dml2_printf("DML::%s: dst_y_per_row_flip = %f (should be < 16)\n", __func__, *dst_y_per_row_flip);
8708 dml2_printf("DML::%s: Tvm_flip = %f (final)\n", __func__, l->Tvm_flip);
8709 dml2_printf("DML::%s: Tr0_flip = %f (final)\n", __func__, l->Tr0_flip);
8710 }
8711 dml2_printf("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw);
8712 dml2_printf("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe);
8713 #endif
8714 }
8715
CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params * p)8716 static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
8717 struct dml2_core_internal_scratch *scratch,
8718 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *p)
8719 {
8720 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals *s = &scratch->CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals;
8721
8722 s->TotalActiveWriteback = 0;
8723 p->Watermark->UrgentWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency;
8724
8725 #ifdef __DML_VBA_DEBUG__
8726 dml2_printf("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark);
8727 #endif
8728
8729 p->Watermark->USRRetrainingWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency + p->mmSOCParameters.USRRetrainingLatency + p->mmSOCParameters.SMNLatency;
8730 p->Watermark->DRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->Watermark->UrgentWatermark;
8731 p->Watermark->FCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->Watermark->UrgentWatermark;
8732 p->Watermark->StutterExitWatermark = p->mmSOCParameters.SRExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
8733 p->Watermark->StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
8734 p->Watermark->Z8StutterExitWatermark = p->mmSOCParameters.SRExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
8735 p->Watermark->Z8StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
8736
8737 #ifdef __DML_VBA_DEBUG__
8738 dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency);
8739 dml2_printf("DML::%s: ExtraLatency = %f\n", __func__, p->mmSOCParameters.ExtraLatency);
8740 dml2_printf("DML::%s: DRAMClockChangeLatency = %f\n", __func__, p->mmSOCParameters.DRAMClockChangeLatency);
8741 dml2_printf("DML::%s: SREnterPlusExitZ8Time = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitZ8Time);
8742 dml2_printf("DML::%s: SREnterPlusExitTime = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitTime);
8743 dml2_printf("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark);
8744 dml2_printf("DML::%s: USRRetrainingWatermark = %f\n", __func__, p->Watermark->USRRetrainingWatermark);
8745 dml2_printf("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, p->Watermark->DRAMClockChangeWatermark);
8746 dml2_printf("DML::%s: FCLKChangeWatermark = %f\n", __func__, p->Watermark->FCLKChangeWatermark);
8747 dml2_printf("DML::%s: StutterExitWatermark = %f\n", __func__, p->Watermark->StutterExitWatermark);
8748 dml2_printf("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark);
8749 dml2_printf("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark);
8750 dml2_printf("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark);
8751 #endif
8752
8753 s->TotalActiveWriteback = 0;
8754 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
8755 if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) {
8756 s->TotalActiveWriteback = s->TotalActiveWriteback + 1;
8757 }
8758 }
8759
8760 if (s->TotalActiveWriteback <= 1) {
8761 p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency;
8762 } else {
8763 p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK;
8764 }
8765 if (p->USRRetrainingRequired)
8766 p->Watermark->WritebackUrgentWatermark = p->Watermark->WritebackUrgentWatermark + p->mmSOCParameters.USRRetrainingLatency;
8767
8768 if (s->TotalActiveWriteback <= 1) {
8769 p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency;
8770 p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency;
8771 } else {
8772 p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK;
8773 p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024 / 32 / p->SOCCLK;
8774 }
8775
8776 if (p->USRRetrainingRequired)
8777 p->Watermark->WritebackDRAMClockChangeWatermark = p->Watermark->WritebackDRAMClockChangeWatermark + p->mmSOCParameters.USRRetrainingLatency;
8778
8779 if (p->USRRetrainingRequired)
8780 p->Watermark->WritebackFCLKChangeWatermark = p->Watermark->WritebackFCLKChangeWatermark + p->mmSOCParameters.USRRetrainingLatency;
8781
8782 #ifdef __DML_VBA_DEBUG__
8783 dml2_printf("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", __func__, p->Watermark->WritebackDRAMClockChangeWatermark);
8784 dml2_printf("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, p->Watermark->WritebackFCLKChangeWatermark);
8785 dml2_printf("DML::%s: WritebackUrgentWatermark = %f\n", __func__, p->Watermark->WritebackUrgentWatermark);
8786 dml2_printf("DML::%s: USRRetrainingRequired = %u\n", __func__, p->USRRetrainingRequired);
8787 dml2_printf("DML::%s: USRRetrainingLatency = %f\n", __func__, p->mmSOCParameters.USRRetrainingLatency);
8788 #endif
8789
8790 s->TotalPixelBW = 0.0;
8791 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
8792 double h_total = (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total;
8793 double pixel_clock_mhz = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000.0;
8794 double v_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
8795 double v_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
8796
8797 s->TotalPixelBW = s->TotalPixelBW + p->DPPPerSurface[k]
8798 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * v_ratio + p->SwathWidthC[k] * p->BytePerPixelDETC[k] * v_ratio_c) / (h_total / pixel_clock_mhz);
8799 }
8800
8801 *p->global_fclk_change_supported = true;
8802 *p->global_dram_clock_change_supported = true;
8803
8804 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
8805 double h_total = (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total;
8806 double pixel_clock_mhz = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000.0;
8807 double v_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
8808 double v_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
8809 double v_taps = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
8810 double v_taps_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
8811 double h_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio;
8812 double h_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio;
8813 double LBBitPerPixel = 57;
8814
8815 s->LBLatencyHidingSourceLinesY[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthY[k] / math_max2(h_ratio, 1.0)), 1)) - (v_taps - 1));
8816 s->LBLatencyHidingSourceLinesC[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthC[k] / math_max2(h_ratio_c, 1.0)), 1)) - (v_taps_c - 1));
8817
8818 #ifdef __DML_VBA_DEBUG__
8819 dml2_printf("DML::%s: k=%u, MaxLineBufferLines= %u\n", __func__, k, p->MaxLineBufferLines);
8820 dml2_printf("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize);
8821 dml2_printf("DML::%s: k=%u, LBBitPerPixel = %u\n", __func__, k, LBBitPerPixel);
8822 dml2_printf("DML::%s: k=%u, HRatio = %f\n", __func__, k, h_ratio);
8823 dml2_printf("DML::%s: k=%u, VTaps = %f\n", __func__, k, v_taps);
8824 #endif
8825
8826 s->EffectiveLBLatencyHidingY = s->LBLatencyHidingSourceLinesY[k] / v_ratio * (h_total / pixel_clock_mhz);
8827 s->EffectiveLBLatencyHidingC = s->LBLatencyHidingSourceLinesC[k] / v_ratio_c * (h_total / pixel_clock_mhz);
8828
8829 s->EffectiveDETBufferSizeY = p->DETBufferSizeY[k];
8830 if (p->UnboundedRequestEnabled) {
8831 s->EffectiveDETBufferSizeY = s->EffectiveDETBufferSizeY + p->CompressedBufferSizeInkByte * 1024 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * v_ratio) / (h_total / pixel_clock_mhz) / s->TotalPixelBW;
8832 }
8833
8834 s->LinesInDETY[k] = (double)s->EffectiveDETBufferSizeY / p->BytePerPixelDETY[k] / p->SwathWidthY[k];
8835 s->LinesInDETYRoundedDownToSwath[k] = (unsigned int)(math_floor2(s->LinesInDETY[k], p->SwathHeightY[k]));
8836 s->FullDETBufferingTimeY = s->LinesInDETYRoundedDownToSwath[k] * (h_total / pixel_clock_mhz) / v_ratio;
8837
8838 s->ActiveClockChangeLatencyHidingY = s->EffectiveLBLatencyHidingY + s->FullDETBufferingTimeY - ((double)p->DSTXAfterScaler[k] / h_total + (double)p->DSTYAfterScaler[k]) * h_total / pixel_clock_mhz;
8839
8840 if (p->NumberOfActiveSurfaces > 1) {
8841 s->ActiveClockChangeLatencyHidingY = s->ActiveClockChangeLatencyHidingY - (1.0 - 1.0 / (double)p->NumberOfActiveSurfaces) * (double)p->SwathHeightY[k] * (double)h_total / pixel_clock_mhz / v_ratio;
8842 }
8843
8844 if (p->BytePerPixelDETC[k] > 0) {
8845 s->LinesInDETC[k] = p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k];
8846 s->LinesInDETCRoundedDownToSwath[k] = (unsigned int)(math_floor2(s->LinesInDETC[k], p->SwathHeightC[k]));
8847 s->FullDETBufferingTimeC = s->LinesInDETCRoundedDownToSwath[k] * (h_total / pixel_clock_mhz) / v_ratio_c;
8848 s->ActiveClockChangeLatencyHidingC = s->EffectiveLBLatencyHidingC + s->FullDETBufferingTimeC - ((double)p->DSTXAfterScaler[k] / (double)h_total + (double)p->DSTYAfterScaler[k]) * (double)h_total / pixel_clock_mhz;
8849 if (p->NumberOfActiveSurfaces > 1) {
8850 s->ActiveClockChangeLatencyHidingC = s->ActiveClockChangeLatencyHidingC - (1.0 - 1.0 / (double)p->NumberOfActiveSurfaces) * (double)p->SwathHeightC[k] * (double)h_total / pixel_clock_mhz / v_ratio_c;
8851 }
8852 s->ActiveClockChangeLatencyHiding = math_min2(s->ActiveClockChangeLatencyHidingY, s->ActiveClockChangeLatencyHidingC);
8853 } else {
8854 s->ActiveClockChangeLatencyHiding = s->ActiveClockChangeLatencyHidingY;
8855 }
8856
8857 s->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->DRAMClockChangeWatermark;
8858 s->ActiveFCLKChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->FCLKChangeWatermark;
8859 s->USRRetrainingLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->USRRetrainingWatermark;
8860
8861 if (p->VActiveLatencyHidingMargin)
8862 p->VActiveLatencyHidingMargin[k] = s->ActiveDRAMClockChangeLatencyMargin[k];
8863
8864 p->VActiveLatencyHidingUs[k] = s->ActiveClockChangeLatencyHiding;
8865
8866 if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.enable) {
8867 s->WritebackLatencyHiding = (double)p->WritebackInterfaceBufferSize * 1024.0 / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height * (double)h_total / pixel_clock_mhz) * 4.0);
8868 if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format == dml2_444_64) {
8869 s->WritebackLatencyHiding = s->WritebackLatencyHiding / 2;
8870 }
8871 s->WritebackDRAMClockChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackDRAMClockChangeWatermark;
8872
8873 s->WritebackFCLKChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackFCLKChangeWatermark;
8874
8875 s->ActiveDRAMClockChangeLatencyMargin[k] = math_min2(s->ActiveDRAMClockChangeLatencyMargin[k], s->WritebackDRAMClockChangeLatencyMargin);
8876 s->ActiveFCLKChangeLatencyMargin[k] = math_min2(s->ActiveFCLKChangeLatencyMargin[k], s->WritebackFCLKChangeLatencyMargin);
8877 }
8878 p->MaxActiveDRAMClockChangeLatencySupported[k] = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 0 : (s->ActiveDRAMClockChangeLatencyMargin[k] + p->mmSOCParameters.DRAMClockChangeLatency);
8879
8880 enum dml2_uclk_pstate_change_strategy uclk_pstate_change_strategy = p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy;
8881 double reserved_vblank_time_us = (double)p->display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns / 1000;
8882
8883 p->FCLKChangeSupport[k] = dml2_fclock_change_unsupported;
8884 if (s->ActiveFCLKChangeLatencyMargin[k] > 0)
8885 p->FCLKChangeSupport[k] = dml2_fclock_change_vactive;
8886 else if (reserved_vblank_time_us >= p->mmSOCParameters.FCLKChangeLatency)
8887 p->FCLKChangeSupport[k] = dml2_fclock_change_vblank;
8888
8889 if (p->FCLKChangeSupport[k] == dml2_fclock_change_unsupported)
8890 *p->global_fclk_change_supported = false;
8891
8892 p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_unsupported;
8893 if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_auto) {
8894 if (s->ActiveDRAMClockChangeLatencyMargin[k] > 0 && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)
8895 p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vblank_and_vactive;
8896 else if (s->ActiveDRAMClockChangeLatencyMargin[k] > 0)
8897 p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vactive;
8898 else if (reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)
8899 p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vblank;
8900 } else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vactive && s->ActiveDRAMClockChangeLatencyMargin[k] > 0)
8901 p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vactive;
8902 else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vblank && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)
8903 p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vblank;
8904 else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_drr)
8905 p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_drr;
8906 else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_svp)
8907 p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_mall_svp;
8908 else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame)
8909 p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_mall_full_frame;
8910
8911 if (p->DRAMClockChangeSupport[k] == dml2_dram_clock_change_unsupported)
8912 *p->global_dram_clock_change_supported = false;
8913
8914 s->dst_y_pstate = (unsigned int)(math_ceil2((p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.UrgentLatency) / (h_total / pixel_clock_mhz), 1));
8915 s->src_y_pstate_l = (unsigned int)(math_ceil2(s->dst_y_pstate * v_ratio, p->SwathHeightY[k]));
8916 s->src_y_ahead_l = (unsigned int)(math_floor2(p->DETBufferSizeY[k] / p->BytePerPixelDETY[k] / p->SwathWidthY[k], p->SwathHeightY[k]) + s->LBLatencyHidingSourceLinesY[k]);
8917 s->sub_vp_lines_l = s->src_y_pstate_l + s->src_y_ahead_l + p->meta_row_height_l[k];
8918
8919 #ifdef __DML_VBA_DEBUG__
8920 dml2_printf("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
8921 dml2_printf("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
8922 dml2_printf("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
8923 dml2_printf("DML::%s: k=%u, SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
8924 dml2_printf("DML::%s: k=%u, LBLatencyHidingSourceLinesY = %u\n", __func__, k, s->LBLatencyHidingSourceLinesY[k]);
8925 dml2_printf("DML::%s: k=%u, dst_y_pstate = %u\n", __func__, k, s->dst_y_pstate);
8926 dml2_printf("DML::%s: k=%u, src_y_pstate_l = %u\n", __func__, k, s->src_y_pstate_l);
8927 dml2_printf("DML::%s: k=%u, src_y_ahead_l = %u\n", __func__, k, s->src_y_ahead_l);
8928 dml2_printf("DML::%s: k=%u, meta_row_height_l = %u\n", __func__, p->meta_row_height_l[k]);
8929 dml2_printf("DML::%s: k=%u, sub_vp_lines_l = %u\n", __func__, k, s->sub_vp_lines_l);
8930 #endif
8931 p->SubViewportLinesNeededInMALL[k] = s->sub_vp_lines_l;
8932
8933 if (p->BytePerPixelDETC[k] > 0) {
8934 s->src_y_pstate_c = (unsigned int)(math_ceil2(s->dst_y_pstate * v_ratio_c, p->SwathHeightC[k]));
8935 s->src_y_ahead_c = (unsigned int)(math_floor2(p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k], p->SwathHeightC[k]) + s->LBLatencyHidingSourceLinesC[k]);
8936 s->sub_vp_lines_c = s->src_y_pstate_c + s->src_y_ahead_c + p->meta_row_height_c[k];
8937
8938 if (dml2_core_shared_is_420(p->display_cfg->plane_descriptors[k].pixel_format))
8939 p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, 2 * s->sub_vp_lines_c));
8940 else
8941 p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, s->sub_vp_lines_c));
8942
8943 #ifdef __DML_VBA_DEBUG__
8944 dml2_printf("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, p->meta_row_height_c[k]);
8945 dml2_printf("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c);
8946 dml2_printf("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c);
8947 dml2_printf("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c);
8948 #endif
8949 }
8950 }
8951
8952 bool FoundCriticalSurface = false;
8953 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
8954 if ((!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) && ((!FoundCriticalSurface)
8955 || ((s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency) < *p->MaxActiveFCLKChangeLatencySupported))) {
8956 FoundCriticalSurface = true;
8957 *p->MaxActiveFCLKChangeLatencySupported = s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency;
8958 }
8959 }
8960
8961 #ifdef __DML_VBA_DEBUG__
8962 dml2_printf("DML::%s: DRAMClockChangeSupport = %u\n", __func__, *p->global_dram_clock_change_supported);
8963 dml2_printf("DML::%s: FCLKChangeSupport = %u\n", __func__, *p->global_fclk_change_supported);
8964 dml2_printf("DML::%s: MaxActiveFCLKChangeLatencySupported = %f\n", __func__, *p->MaxActiveFCLKChangeLatencySupported);
8965 dml2_printf("DML::%s: USRRetrainingSupport = %u\n", __func__, *p->USRRetrainingSupport);
8966 #endif
8967 }
8968
uclk_khz_to_dram_bw_mbps(unsigned long uclk_khz,const struct dml2_dram_params * dram_config)8969 static double uclk_khz_to_dram_bw_mbps(unsigned long uclk_khz, const struct dml2_dram_params *dram_config)
8970 {
8971 double bw_mbps = 0;
8972 bw_mbps = ((double)uclk_khz * dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock) / 1000.0;
8973
8974 return bw_mbps;
8975 }
8976
dram_bw_kbps_to_uclk_mhz(unsigned long long bw_kbps,const struct dml2_dram_params * dram_config)8977 static double dram_bw_kbps_to_uclk_mhz(unsigned long long bw_kbps, const struct dml2_dram_params *dram_config)
8978 {
8979 double uclk_mhz = 0;
8980
8981 uclk_mhz = (double)bw_kbps / (dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock) / 1000.0;
8982
8983 return uclk_mhz;
8984 }
8985
get_qos_param_index(unsigned long uclk_freq_khz,const struct dml2_dcn4_uclk_dpm_dependent_qos_params * per_uclk_dpm_params)8986 static unsigned int get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params)
8987 {
8988 unsigned int i;
8989 unsigned int index = 0;
8990
8991 for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) {
8992 dml2_printf("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %d\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz);
8993
8994 if (i == 0)
8995 index = 0;
8996 else
8997 index = i - 1;
8998
8999 if (uclk_freq_khz < per_uclk_dpm_params[i].minimum_uclk_khz ||
9000 per_uclk_dpm_params[i].minimum_uclk_khz == 0) {
9001 break;
9002 }
9003 }
9004 #if defined(__DML_VBA_DEBUG__)
9005 dml2_printf("DML::%s: uclk_freq_khz = %d\n", __func__, uclk_freq_khz);
9006 dml2_printf("DML::%s: index = %d\n", __func__, index);
9007 #endif
9008 return index;
9009 }
9010
get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz,const struct dml2_soc_state_table * clk_table)9011 static unsigned int get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table)
9012 {
9013 unsigned int i;
9014 bool clk_entry_found = 0;
9015
9016 for (i = 0; i < clk_table->uclk.num_clk_values; i++) {
9017 dml2_printf("DML::%s: clk_table.uclk.clk_values_khz[%d] = %d\n", __func__, i, clk_table->uclk.clk_values_khz[i]);
9018
9019 if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) {
9020 clk_entry_found = 1;
9021 break;
9022 }
9023 }
9024
9025 dml2_assert(clk_entry_found);
9026 #if defined(__DML_VBA_DEBUG__)
9027 dml2_printf("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz);
9028 dml2_printf("DML::%s: index = %d\n", __func__, i);
9029 #endif
9030 return i;
9031 }
9032
get_pipe_flip_bytes(double hostvm_inefficiency_factor,unsigned int vm_bytes,unsigned int dpte_row_bytes,unsigned int meta_row_bytes)9033 static unsigned int get_pipe_flip_bytes(
9034 double hostvm_inefficiency_factor,
9035 unsigned int vm_bytes,
9036 unsigned int dpte_row_bytes,
9037 unsigned int meta_row_bytes)
9038 {
9039 unsigned int flip_bytes = 0;
9040
9041 flip_bytes += (unsigned int)((vm_bytes * hostvm_inefficiency_factor) + 2 * meta_row_bytes);
9042 flip_bytes += (unsigned int)(2 * dpte_row_bytes * hostvm_inefficiency_factor);
9043
9044 return flip_bytes;
9045 }
9046
calculate_hostvm_inefficiency_factor(double * HostVMInefficiencyFactor,double * HostVMInefficiencyFactorPrefetch,bool gpuvm_enable,bool hostvm_enable,unsigned int remote_iommu_outstanding_translations,unsigned int max_outstanding_reqs,double urg_bandwidth_avail_active_pixel_and_vm,double urg_bandwidth_avail_active_vm_only)9047 static void calculate_hostvm_inefficiency_factor(
9048 double *HostVMInefficiencyFactor,
9049 double *HostVMInefficiencyFactorPrefetch,
9050
9051 bool gpuvm_enable,
9052 bool hostvm_enable,
9053 unsigned int remote_iommu_outstanding_translations,
9054 unsigned int max_outstanding_reqs,
9055 double urg_bandwidth_avail_active_pixel_and_vm,
9056 double urg_bandwidth_avail_active_vm_only)
9057 {
9058 *HostVMInefficiencyFactor = 1;
9059 *HostVMInefficiencyFactorPrefetch = 1;
9060
9061 if (gpuvm_enable && hostvm_enable) {
9062 *HostVMInefficiencyFactor = urg_bandwidth_avail_active_pixel_and_vm / urg_bandwidth_avail_active_vm_only;
9063 *HostVMInefficiencyFactorPrefetch = *HostVMInefficiencyFactor;
9064
9065 if ((*HostVMInefficiencyFactorPrefetch < 4) && (remote_iommu_outstanding_translations < max_outstanding_reqs))
9066 *HostVMInefficiencyFactorPrefetch = 4;
9067 #ifdef __DML_VBA_DEBUG__
9068 dml2_printf("DML::%s: urg_bandwidth_avail_active_pixel_and_vm = %f\n", __func__, urg_bandwidth_avail_active_pixel_and_vm);
9069 dml2_printf("DML::%s: urg_bandwidth_avail_active_vm_only = %f\n", __func__, urg_bandwidth_avail_active_vm_only);
9070 dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, *HostVMInefficiencyFactor);
9071 dml2_printf("DML::%s: HostVMInefficiencyFactorPrefetch = %f\n", __func__, *HostVMInefficiencyFactorPrefetch);
9072 #endif
9073 }
9074 }
9075
CalculatePixelDeliveryTimes(const struct dml2_display_cfg * display_cfg,const struct core_display_cfg_support_info * cfg_support_info,unsigned int NumberOfActiveSurfaces,double VRatioPrefetchY[],double VRatioPrefetchC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],unsigned int BytePerPixelC[],unsigned int req_per_swath_ub_l[],unsigned int req_per_swath_ub_c[],double DisplayPipeLineDeliveryTimeLuma[],double DisplayPipeLineDeliveryTimeChroma[],double DisplayPipeLineDeliveryTimeLumaPrefetch[],double DisplayPipeLineDeliveryTimeChromaPrefetch[],double DisplayPipeRequestDeliveryTimeLuma[],double DisplayPipeRequestDeliveryTimeChroma[],double DisplayPipeRequestDeliveryTimeLumaPrefetch[],double DisplayPipeRequestDeliveryTimeChromaPrefetch[])9076 static void CalculatePixelDeliveryTimes(
9077 const struct dml2_display_cfg *display_cfg,
9078 const struct core_display_cfg_support_info *cfg_support_info,
9079 unsigned int NumberOfActiveSurfaces,
9080 double VRatioPrefetchY[],
9081 double VRatioPrefetchC[],
9082 unsigned int swath_width_luma_ub[],
9083 unsigned int swath_width_chroma_ub[],
9084 double PSCL_THROUGHPUT[],
9085 double PSCL_THROUGHPUT_CHROMA[],
9086 double Dppclk[],
9087 unsigned int BytePerPixelC[],
9088 unsigned int req_per_swath_ub_l[],
9089 unsigned int req_per_swath_ub_c[],
9090
9091 // Output
9092 double DisplayPipeLineDeliveryTimeLuma[],
9093 double DisplayPipeLineDeliveryTimeChroma[],
9094 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
9095 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
9096 double DisplayPipeRequestDeliveryTimeLuma[],
9097 double DisplayPipeRequestDeliveryTimeChroma[],
9098 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
9099 double DisplayPipeRequestDeliveryTimeChromaPrefetch[])
9100 {
9101 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
9102 double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
9103
9104 #ifdef __DML_VBA_DEBUG__
9105 dml2_printf("DML::%s: k=%u : HRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
9106 dml2_printf("DML::%s: k=%u : VRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
9107 dml2_printf("DML::%s: k=%u : HRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio);
9108 dml2_printf("DML::%s: k=%u : VRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio);
9109 dml2_printf("DML::%s: k=%u : VRatioPrefetchY = %f\n", __func__, k, VRatioPrefetchY[k]);
9110 dml2_printf("DML::%s: k=%u : VRatioPrefetchC = %f\n", __func__, k, VRatioPrefetchC[k]);
9111 dml2_printf("DML::%s: k=%u : swath_width_luma_ub = %u\n", __func__, k, swath_width_luma_ub[k]);
9112 dml2_printf("DML::%s: k=%u : swath_width_chroma_ub = %u\n", __func__, k, swath_width_chroma_ub[k]);
9113 dml2_printf("DML::%s: k=%u : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
9114 dml2_printf("DML::%s: k=%u : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
9115 dml2_printf("DML::%s: k=%u : DPPPerSurface = %u\n", __func__, k, cfg_support_info->plane_support_info[k].dpps_used);
9116 dml2_printf("DML::%s: k=%u : pixel_clock_mhz = %f\n", __func__, k, pixel_clock_mhz);
9117 dml2_printf("DML::%s: k=%u : Dppclk = %f\n", __func__, k, Dppclk[k]);
9118 #endif
9119 if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) {
9120 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz;
9121 } else {
9122 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
9123 }
9124
9125 if (BytePerPixelC[k] == 0) {
9126 DisplayPipeLineDeliveryTimeChroma[k] = 0;
9127 } else {
9128 if (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio <= 1) {
9129 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_clock_mhz;
9130 } else {
9131 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
9132 }
9133 }
9134
9135 if (VRatioPrefetchY[k] <= 1) {
9136 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz;
9137 } else {
9138 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
9139 }
9140
9141 if (BytePerPixelC[k] == 0) {
9142 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
9143 } else {
9144 if (VRatioPrefetchC[k] <= 1) {
9145 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_clock_mhz;
9146 } else {
9147 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
9148 }
9149 }
9150 #ifdef __DML_VBA_DEBUG__
9151 dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
9152 dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
9153 dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
9154 dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
9155 #endif
9156 }
9157
9158 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
9159
9160 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub_l[k];
9161 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub_l[k];
9162 if (BytePerPixelC[k] == 0) {
9163 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
9164 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
9165 } else {
9166 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub_c[k];
9167 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub_c[k];
9168 }
9169 #ifdef __DML_VBA_DEBUG__
9170 dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
9171 dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
9172 dml2_printf("DML::%s: k=%u : req_per_swath_ub_l = %d\n", __func__, k, req_per_swath_ub_l[k]);
9173 dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
9174 dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
9175 dml2_printf("DML::%s: k=%u : req_per_swath_ub_c = %d\n", __func__, k, req_per_swath_ub_c[k]);
9176 #endif
9177 }
9178 }
9179
CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTETimes_params * p)9180 static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTETimes_params *p)
9181 {
9182 unsigned int meta_chunk_width;
9183 unsigned int min_meta_chunk_width;
9184 unsigned int meta_chunk_per_row_int;
9185 unsigned int meta_row_remainder;
9186 unsigned int meta_chunk_threshold;
9187 unsigned int meta_chunks_per_row_ub;
9188 unsigned int meta_chunk_width_chroma;
9189 unsigned int min_meta_chunk_width_chroma;
9190 unsigned int meta_chunk_per_row_int_chroma;
9191 unsigned int meta_row_remainder_chroma;
9192 unsigned int meta_chunk_threshold_chroma;
9193 unsigned int meta_chunks_per_row_ub_chroma;
9194 unsigned int dpte_group_width_luma;
9195 unsigned int dpte_groups_per_row_luma_ub;
9196 unsigned int dpte_group_width_chroma;
9197 unsigned int dpte_groups_per_row_chroma_ub;
9198 double pixel_clock_mhz;
9199
9200 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9201 p->DST_Y_PER_PTE_ROW_NOM_L[k] = p->dpte_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
9202 if (p->BytePerPixelC[k] == 0) {
9203 p->DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
9204 } else {
9205 p->DST_Y_PER_PTE_ROW_NOM_C[k] = p->dpte_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
9206 }
9207 p->DST_Y_PER_META_ROW_NOM_L[k] = p->meta_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
9208 if (p->BytePerPixelC[k] == 0) {
9209 p->DST_Y_PER_META_ROW_NOM_C[k] = 0;
9210 } else {
9211 p->DST_Y_PER_META_ROW_NOM_C[k] = p->meta_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
9212 }
9213 }
9214
9215 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9216 if (p->display_cfg->plane_descriptors[k].surface.dcc.enable == true && p->mrq_present) {
9217 meta_chunk_width = p->MetaChunkSize * 1024 * 256 / p->BytePerPixelY[k] / p->meta_row_height[k];
9218 min_meta_chunk_width = p->MinMetaChunkSizeBytes * 256 / p->BytePerPixelY[k] / p->meta_row_height[k];
9219 meta_chunk_per_row_int = p->meta_row_width[k] / meta_chunk_width;
9220 meta_row_remainder = p->meta_row_width[k] % meta_chunk_width;
9221 if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
9222 meta_chunk_threshold = 2 * min_meta_chunk_width - p->meta_req_width[k];
9223 } else {
9224 meta_chunk_threshold = 2 * min_meta_chunk_width - p->meta_req_height[k];
9225 }
9226 if (meta_row_remainder <= meta_chunk_threshold) {
9227 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
9228 } else {
9229 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
9230 }
9231 p->TimePerMetaChunkNominal[k] = p->meta_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio *
9232 p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total /
9233 (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub;
9234 p->TimePerMetaChunkVBlank[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total /
9235 (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub;
9236 p->TimePerMetaChunkFlip[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total /
9237 (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub;
9238 if (p->BytePerPixelC[k] == 0) {
9239 p->TimePerChromaMetaChunkNominal[k] = 0;
9240 p->TimePerChromaMetaChunkVBlank[k] = 0;
9241 p->TimePerChromaMetaChunkFlip[k] = 0;
9242 } else {
9243 meta_chunk_width_chroma = p->MetaChunkSize * 1024 * 256 / p->BytePerPixelC[k] / p->meta_row_height_chroma[k];
9244 min_meta_chunk_width_chroma = p->MinMetaChunkSizeBytes * 256 / p->BytePerPixelC[k] / p->meta_row_height_chroma[k];
9245 meta_chunk_per_row_int_chroma = (unsigned int)((double)p->meta_row_width_chroma[k] / meta_chunk_width_chroma);
9246 meta_row_remainder_chroma = p->meta_row_width_chroma[k] % meta_chunk_width_chroma;
9247 if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
9248 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - p->meta_req_width_chroma[k];
9249 } else {
9250 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - p->meta_req_height_chroma[k];
9251 }
9252 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
9253 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
9254 } else {
9255 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
9256 }
9257 p->TimePerChromaMetaChunkNominal[k] = p->meta_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma;
9258 p->TimePerChromaMetaChunkVBlank[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma;
9259 p->TimePerChromaMetaChunkFlip[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma;
9260 }
9261 } else {
9262 p->TimePerMetaChunkNominal[k] = 0;
9263 p->TimePerMetaChunkVBlank[k] = 0;
9264 p->TimePerMetaChunkFlip[k] = 0;
9265 p->TimePerChromaMetaChunkNominal[k] = 0;
9266 p->TimePerChromaMetaChunkVBlank[k] = 0;
9267 p->TimePerChromaMetaChunkFlip[k] = 0;
9268 }
9269
9270 #ifdef __DML_VBA_DEBUG__
9271 dml2_printf("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_L[k]);
9272 dml2_printf("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_C[k]);
9273 dml2_printf("DML::%s: k=%d, TimePerMetaChunkNominal = %f\n", __func__, k, p->TimePerMetaChunkNominal[k]);
9274 dml2_printf("DML::%s: k=%d, TimePerMetaChunkVBlank = %f\n", __func__, k, p->TimePerMetaChunkVBlank[k]);
9275 dml2_printf("DML::%s: k=%d, TimePerMetaChunkFlip = %f\n", __func__, k, p->TimePerMetaChunkFlip[k]);
9276 dml2_printf("DML::%s: k=%d, TimePerChromaMetaChunkNominal = %f\n", __func__, k, p->TimePerChromaMetaChunkNominal[k]);
9277 dml2_printf("DML::%s: k=%d, TimePerChromaMetaChunkVBlank = %f\n", __func__, k, p->TimePerChromaMetaChunkVBlank[k]);
9278 dml2_printf("DML::%s: k=%d, TimePerChromaMetaChunkFlip = %f\n", __func__, k, p->TimePerChromaMetaChunkFlip[k]);
9279 #endif
9280 }
9281
9282 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9283 p->DST_Y_PER_PTE_ROW_NOM_L[k] = p->dpte_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
9284 if (p->BytePerPixelC[k] == 0) {
9285 p->DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
9286 } else {
9287 p->DST_Y_PER_PTE_ROW_NOM_C[k] = p->dpte_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
9288 }
9289 }
9290
9291 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9292 pixel_clock_mhz = ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
9293
9294 if (p->display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut)
9295 p->time_per_tdlut_group[k] = 2 * p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / p->tdlut_groups_per_2row_ub[k];
9296 else
9297 p->time_per_tdlut_group[k] = 0;
9298
9299 dml2_printf("DML::%s: k=%u, time_per_tdlut_group = %f\n", __func__, k, p->time_per_tdlut_group[k]);
9300
9301 if (p->display_cfg->gpuvm_enable == true) {
9302 if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
9303 dpte_group_width_luma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeY[k] * p->PixelPTEReqWidthY[k]);
9304 } else {
9305 dpte_group_width_luma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeY[k] * p->PixelPTEReqHeightY[k]);
9306 }
9307 if (p->use_one_row_for_frame[k]) {
9308 dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma / 2.0, 1.0));
9309 } else {
9310 dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma, 1.0));
9311 }
9312
9313 if (dpte_groups_per_row_luma_ub <= 2) {
9314 dpte_groups_per_row_luma_ub = dpte_groups_per_row_luma_ub + 1;
9315 }
9316
9317 dml2_printf("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]);
9318 dml2_printf("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, p->dpte_group_bytes[k]);
9319 dml2_printf("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, p->PTERequestSizeY[k]);
9320 dml2_printf("DML::%s: k=%u, PixelPTEReqWidthY = %u\n", __func__, k, p->PixelPTEReqWidthY[k]);
9321 dml2_printf("DML::%s: k=%u, PixelPTEReqHeightY = %u\n", __func__, k, p->PixelPTEReqHeightY[k]);
9322 dml2_printf("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]);
9323 dml2_printf("DML::%s: k=%u, dpte_group_width_luma = %u\n", __func__, k, dpte_group_width_luma);
9324 dml2_printf("DML::%s: k=%u, dpte_groups_per_row_luma_ub = %u\n", __func__, k, dpte_groups_per_row_luma_ub);
9325
9326 p->time_per_pte_group_nom_luma[k] = p->DST_Y_PER_PTE_ROW_NOM_L[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub;
9327 p->time_per_pte_group_vblank_luma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub;
9328 p->time_per_pte_group_flip_luma[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub;
9329 if (p->BytePerPixelC[k] == 0) {
9330 p->time_per_pte_group_nom_chroma[k] = 0;
9331 p->time_per_pte_group_vblank_chroma[k] = 0;
9332 p->time_per_pte_group_flip_chroma[k] = 0;
9333 } else {
9334 if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
9335 dpte_group_width_chroma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeC[k] * p->PixelPTEReqWidthC[k]);
9336 } else {
9337 dpte_group_width_chroma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeC[k] * p->PixelPTEReqHeightC[k]);
9338 }
9339
9340 if (p->use_one_row_for_frame[k]) {
9341 dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma / 2.0, 1.0));
9342 } else {
9343 dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma, 1.0));
9344 }
9345 if (dpte_groups_per_row_chroma_ub <= 2) {
9346 dpte_groups_per_row_chroma_ub = dpte_groups_per_row_chroma_ub + 1;
9347 }
9348 dml2_printf("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]);
9349 dml2_printf("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma);
9350 dml2_printf("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub);
9351
9352 p->time_per_pte_group_nom_chroma[k] = p->DST_Y_PER_PTE_ROW_NOM_C[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub;
9353 p->time_per_pte_group_vblank_chroma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub;
9354 p->time_per_pte_group_flip_chroma[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub;
9355 }
9356 } else {
9357 p->time_per_pte_group_nom_luma[k] = 0;
9358 p->time_per_pte_group_vblank_luma[k] = 0;
9359 p->time_per_pte_group_flip_luma[k] = 0;
9360 p->time_per_pte_group_nom_chroma[k] = 0;
9361 p->time_per_pte_group_vblank_chroma[k] = 0;
9362 p->time_per_pte_group_flip_chroma[k] = 0;
9363 }
9364 #ifdef __DML_VBA_DEBUG__
9365 dml2_printf("DML::%s: k=%u, dst_y_per_row_vblank = %f\n", __func__, k, p->dst_y_per_row_vblank[k]);
9366 dml2_printf("DML::%s: k=%u, dst_y_per_row_flip = %f\n", __func__, k, p->dst_y_per_row_flip[k]);
9367
9368 dml2_printf("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_L[k]);
9369 dml2_printf("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_C[k]);
9370 dml2_printf("DML::%s: k=%u, time_per_pte_group_nom_luma = %f\n", __func__, k, p->time_per_pte_group_nom_luma[k]);
9371 dml2_printf("DML::%s: k=%u, time_per_pte_group_vblank_luma = %f\n", __func__, k, p->time_per_pte_group_vblank_luma[k]);
9372 dml2_printf("DML::%s: k=%u, time_per_pte_group_flip_luma = %f\n", __func__, k, p->time_per_pte_group_flip_luma[k]);
9373 dml2_printf("DML::%s: k=%u, time_per_pte_group_nom_chroma = %f\n", __func__, k, p->time_per_pte_group_nom_chroma[k]);
9374 dml2_printf("DML::%s: k=%u, time_per_pte_group_vblank_chroma = %f\n", __func__, k, p->time_per_pte_group_vblank_chroma[k]);
9375 dml2_printf("DML::%s: k=%u, time_per_pte_group_flip_chroma = %f\n", __func__, k, p->time_per_pte_group_flip_chroma[k]);
9376 #endif
9377 }
9378 } // CalculateMetaAndPTETimes
9379
CalculateVMGroupAndRequestTimes(const struct dml2_display_cfg * display_cfg,unsigned int NumberOfActiveSurfaces,unsigned int BytePerPixelC[],double dst_y_per_vm_vblank[],double dst_y_per_vm_flip[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],unsigned int vm_group_bytes[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],unsigned int tdlut_pte_bytes_per_frame[],unsigned int meta_pte_bytes_per_frame_ub_l[],unsigned int meta_pte_bytes_per_frame_ub_c[],bool mrq_present,double TimePerVMGroupVBlank[],double TimePerVMGroupFlip[],double TimePerVMRequestVBlank[],double TimePerVMRequestFlip[])9380 static void CalculateVMGroupAndRequestTimes(
9381 const struct dml2_display_cfg *display_cfg,
9382 unsigned int NumberOfActiveSurfaces,
9383 unsigned int BytePerPixelC[],
9384 double dst_y_per_vm_vblank[],
9385 double dst_y_per_vm_flip[],
9386 unsigned int dpte_row_width_luma_ub[],
9387 unsigned int dpte_row_width_chroma_ub[],
9388 unsigned int vm_group_bytes[],
9389 unsigned int dpde0_bytes_per_frame_ub_l[],
9390 unsigned int dpde0_bytes_per_frame_ub_c[],
9391 unsigned int tdlut_pte_bytes_per_frame[],
9392 unsigned int meta_pte_bytes_per_frame_ub_l[],
9393 unsigned int meta_pte_bytes_per_frame_ub_c[],
9394 bool mrq_present,
9395
9396 // Output
9397 double TimePerVMGroupVBlank[],
9398 double TimePerVMGroupFlip[],
9399 double TimePerVMRequestVBlank[],
9400 double TimePerVMRequestFlip[])
9401 {
9402 unsigned int num_group_per_lower_vm_stage = 1;
9403 unsigned int num_req_per_lower_vm_stage = 1;
9404
9405 #ifdef __DML_VBA_DEBUG__
9406 dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
9407 #endif
9408 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
9409 double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
9410 bool dcc_mrq_enable = display_cfg->plane_descriptors[k].surface.dcc.enable && mrq_present;
9411 #ifdef __DML_VBA_DEBUG__
9412 dml2_printf("DML::%s: k=%u, dcc_mrq_enable = %u\n", __func__, k, dcc_mrq_enable);
9413 dml2_printf("DML::%s: k=%u, vm_group_bytes = %u\n", __func__, k, vm_group_bytes[k]);
9414 dml2_printf("DML::%s: k=%u, dpde0_bytes_per_frame_ub_l = %u\n", __func__, k, dpde0_bytes_per_frame_ub_l[k]);
9415 dml2_printf("DML::%s: k=%u, dpde0_bytes_per_frame_ub_c = %u\n", __func__, k, dpde0_bytes_per_frame_ub_c[k]);
9416 dml2_printf("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_l = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
9417 dml2_printf("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_c = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
9418 #endif
9419
9420 if (display_cfg->gpuvm_enable) {
9421 if (display_cfg->gpuvm_max_page_table_levels >= 2) {
9422 num_group_per_lower_vm_stage += (unsigned int)math_ceil2((double)(dpde0_bytes_per_frame_ub_l[k]) / (double)(vm_group_bytes[k]), 1);
9423
9424 if (BytePerPixelC[k] > 0)
9425 num_group_per_lower_vm_stage += (unsigned int)math_ceil2((double)(dpde0_bytes_per_frame_ub_c[k]) / (double)(vm_group_bytes[k]), 1);
9426 }
9427
9428 if (dcc_mrq_enable) {
9429 if (BytePerPixelC[k] > 0) {
9430 num_group_per_lower_vm_stage += (unsigned int)(2.0 /*for each mpde0 group*/ + math_ceil2((double)(meta_pte_bytes_per_frame_ub_l[k]) / (double)(vm_group_bytes[k]), 1) +
9431 math_ceil2((double)(meta_pte_bytes_per_frame_ub_c[k]) / (double)(vm_group_bytes[k]), 1));
9432 } else {
9433 num_group_per_lower_vm_stage += (unsigned int)(1.0 + math_ceil2((double)(meta_pte_bytes_per_frame_ub_l[k]) / (double)(vm_group_bytes[k]), 1));
9434 }
9435 }
9436
9437 unsigned int num_group_per_lower_vm_stage_flip = num_group_per_lower_vm_stage;
9438 unsigned int num_group_per_lower_vm_stage_pref = num_group_per_lower_vm_stage;
9439
9440 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && display_cfg->gpuvm_enable) {
9441 num_group_per_lower_vm_stage_pref += (unsigned int)math_ceil2(tdlut_pte_bytes_per_frame[k] / vm_group_bytes[k], 1);
9442 if (display_cfg->gpuvm_max_page_table_levels >= 2)
9443 num_group_per_lower_vm_stage_pref += 1; // tdpe0 group
9444 }
9445
9446 if (display_cfg->gpuvm_max_page_table_levels >= 2) {
9447 num_req_per_lower_vm_stage += dpde0_bytes_per_frame_ub_l[k] / 64;
9448 if (BytePerPixelC[k] > 0)
9449 num_req_per_lower_vm_stage += dpde0_bytes_per_frame_ub_c[k];
9450 }
9451
9452 if (dcc_mrq_enable) {
9453 num_req_per_lower_vm_stage += meta_pte_bytes_per_frame_ub_l[k] / 64;
9454 if (BytePerPixelC[k] > 0)
9455 num_req_per_lower_vm_stage += meta_pte_bytes_per_frame_ub_c[k] / 64;
9456 }
9457
9458 unsigned int num_req_per_lower_vm_stage_flip = num_req_per_lower_vm_stage;
9459 unsigned int num_req_per_lower_vm_stage_pref = num_req_per_lower_vm_stage;
9460
9461 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && display_cfg->gpuvm_enable) {
9462 num_req_per_lower_vm_stage_pref += tdlut_pte_bytes_per_frame[k] / 64;
9463 }
9464
9465 double line_time = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz;
9466
9467 if (num_group_per_lower_vm_stage_flip <= 2) {
9468 num_group_per_lower_vm_stage_flip = num_group_per_lower_vm_stage_flip + 1;
9469 }
9470
9471 if (num_group_per_lower_vm_stage_pref <= 2) {
9472 num_group_per_lower_vm_stage_pref = num_group_per_lower_vm_stage_pref + 1;
9473 }
9474
9475 TimePerVMGroupVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_group_per_lower_vm_stage_pref;
9476 TimePerVMGroupFlip[k] = dst_y_per_vm_flip[k] * line_time / num_group_per_lower_vm_stage_flip;
9477 TimePerVMRequestVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_req_per_lower_vm_stage_pref;
9478 TimePerVMRequestFlip[k] = dst_y_per_vm_flip[k] * line_time / num_req_per_lower_vm_stage_flip;
9479
9480 dml2_printf("DML::%s: k=%u, dst_y_per_vm_vblank = %f\n", __func__, k, dst_y_per_vm_vblank[k]);
9481 dml2_printf("DML::%s: k=%u, dst_y_per_vm_flip = %f\n", __func__, k, dst_y_per_vm_flip[k]);
9482 dml2_printf("DML::%s: k=%u, line_time = %f\n", __func__, k, line_time);
9483 dml2_printf("DML::%s: k=%u, num_group_per_lower_vm_stage_pref = %f\n", __func__, k, num_group_per_lower_vm_stage_pref);
9484 dml2_printf("DML::%s: k=%u, num_group_per_lower_vm_stage_flip = %f\n", __func__, k, num_group_per_lower_vm_stage_flip);
9485 dml2_printf("DML::%s: k=%u, num_req_per_lower_vm_stage_pref = %f\n", __func__, k, num_req_per_lower_vm_stage_pref);
9486 dml2_printf("DML::%s: k=%u, num_req_per_lower_vm_stage_flip = %f\n", __func__, k, num_req_per_lower_vm_stage_flip);
9487
9488 if (display_cfg->gpuvm_max_page_table_levels > 2) {
9489 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
9490 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
9491 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
9492 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
9493 }
9494
9495 } else {
9496 TimePerVMGroupVBlank[k] = 0;
9497 TimePerVMGroupFlip[k] = 0;
9498 TimePerVMRequestVBlank[k] = 0;
9499 TimePerVMRequestFlip[k] = 0;
9500 }
9501
9502 #ifdef __DML_VBA_DEBUG__
9503 dml2_printf("DML::%s: k=%u, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
9504 dml2_printf("DML::%s: k=%u, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
9505 dml2_printf("DML::%s: k=%u, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
9506 dml2_printf("DML::%s: k=%u, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
9507 #endif
9508 }
9509 }
9510
CalculateStutterEfficiency(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculateStutterEfficiency_params * p)9511 static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratch,
9512 struct dml2_core_calcs_CalculateStutterEfficiency_params *p)
9513 {
9514 struct dml2_core_calcs_CalculateStutterEfficiency_locals *l = &scratch->CalculateStutterEfficiency_locals;
9515
9516 memset(l, 0, sizeof(struct dml2_core_calcs_CalculateStutterEfficiency_locals));
9517
9518 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9519 if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
9520 if (p->display_cfg->plane_descriptors[k].surface.dcc.enable == true) {
9521 if ((dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockWidth256BytesY[k] > p->SwathHeightY[k]) || (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockHeight256BytesY[k] > p->SwathHeightY[k]) || p->DCCYMaxUncompressedBlock[k] < 256) {
9522 l->MaximumEffectiveCompressionLuma = 2;
9523 } else {
9524 l->MaximumEffectiveCompressionLuma = 4;
9525 }
9526 l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0, l->MaximumEffectiveCompressionLuma);
9527 #ifdef __DML_VBA_DEBUG__
9528 dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
9529 dml2_printf("DML::%s: k=%u, NetDCCRateLuma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0);
9530 dml2_printf("DML::%s: k=%u, MaximumEffectiveCompressionLuma = %f\n", __func__, k, l->MaximumEffectiveCompressionLuma);
9531 #endif
9532 l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0;
9533 l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0 / l->MaximumEffectiveCompressionLuma;
9534
9535 if (p->ReadBandwidthSurfaceChroma[k] > 0) {
9536 if ((dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockWidth256BytesC[k] > p->SwathHeightC[k]) || (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockHeight256BytesC[k] > p->SwathHeightC[k]) || p->DCCCMaxUncompressedBlock[k] < 256) {
9537 l->MaximumEffectiveCompressionChroma = 2;
9538 } else {
9539 l->MaximumEffectiveCompressionChroma = 4;
9540 }
9541 l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1, l->MaximumEffectiveCompressionChroma);
9542 #ifdef __DML_VBA_DEBUG__
9543 dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, p->ReadBandwidthSurfaceChroma[k]);
9544 dml2_printf("DML::%s: k=%u, NetDCCRateChroma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1);
9545 dml2_printf("DML::%s: k=%u, MaximumEffectiveCompressionChroma = %f\n", __func__, k, l->MaximumEffectiveCompressionChroma);
9546 #endif
9547 l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1;
9548 l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1 / l->MaximumEffectiveCompressionChroma;
9549 }
9550 } else {
9551 l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] + p->ReadBandwidthSurfaceChroma[k];
9552 }
9553 l->TotalRowReadBandwidth = l->TotalRowReadBandwidth + p->DPPPerSurface[k] * (p->meta_row_bw[k] + p->dpte_row_bw[k]);
9554 }
9555 }
9556
9557 l->AverageDCCCompressionRate = p->TotalDataReadBandwidth / l->TotalCompressedReadBandwidth;
9558 l->AverageDCCZeroSizeFraction = l->TotalZeroSizeRequestReadBandwidth / p->TotalDataReadBandwidth;
9559
9560 #ifdef __DML_VBA_DEBUG__
9561 dml2_printf("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled);
9562 dml2_printf("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, l->TotalCompressedReadBandwidth);
9563 dml2_printf("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, l->TotalZeroSizeRequestReadBandwidth);
9564 dml2_printf("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, l->TotalZeroSizeCompressedReadBandwidth);
9565 dml2_printf("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, l->MaximumEffectiveCompressionLuma);
9566 dml2_printf("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, l->MaximumEffectiveCompressionChroma);
9567 dml2_printf("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate);
9568 dml2_printf("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, l->AverageDCCZeroSizeFraction);
9569
9570 dml2_printf("DML::%s: CompbufReservedSpace64B = %u (%f kbytes)\n", __func__, p->CompbufReservedSpace64B, p->CompbufReservedSpace64B * 64 / 1024.0);
9571 dml2_printf("DML::%s: CompbufReservedSpaceZs = %u\n", __func__, p->CompbufReservedSpaceZs);
9572 dml2_printf("DML::%s: CompressedBufferSizeInkByte = %u kbytes\n", __func__, p->CompressedBufferSizeInkByte);
9573 dml2_printf("DML::%s: ROBBufferSizeInKByte = %u kbytes\n", __func__, p->ROBBufferSizeInKByte);
9574 #endif
9575 if (l->AverageDCCZeroSizeFraction == 1) {
9576 l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth;
9577 l->EffectiveCompressedBufferSize = (double)p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageZeroSizeCompressionRate + ((double)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 * l->AverageZeroSizeCompressionRate;
9578
9579
9580 } else if (l->AverageDCCZeroSizeFraction > 0) {
9581 l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth;
9582 l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate,
9583 (double)p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate)) +
9584 (p->rob_alloc_compressed ? math_min2(((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * l->AverageDCCCompressionRate,
9585 ((double)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate))
9586 : ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64));
9587
9588
9589 #ifdef __DML_VBA_DEBUG__
9590 dml2_printf("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate);
9591 dml2_printf("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate));
9592 dml2_printf("DML::%s: min 3 = %d\n", __func__, (p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64));
9593 dml2_printf("DML::%s: min 4 = %f\n", __func__, (p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate));
9594 #endif
9595 } else {
9596 l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate,
9597 (double)p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate) +
9598 ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * (p->rob_alloc_compressed ? l->AverageDCCCompressionRate : 1.0);
9599
9600 #ifdef __DML_VBA_DEBUG__
9601 dml2_printf("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate);
9602 dml2_printf("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate);
9603 #endif
9604 }
9605
9606 #ifdef __DML_VBA_DEBUG__
9607 dml2_printf("DML::%s: MetaFIFOSizeInKEntries = %u\n", __func__, p->MetaFIFOSizeInKEntries);
9608 dml2_printf("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, l->AverageZeroSizeCompressionRate);
9609 dml2_printf("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0);
9610 #endif
9611
9612 bool FoundCriticalSurface = false;
9613 *p->StutterPeriod = 0;
9614
9615 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9616 if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
9617 l->LinesInDETY = ((double)p->DETBufferSizeY[k] + (p->UnboundedRequestEnabled == true ? l->EffectiveCompressedBufferSize : 0) * p->ReadBandwidthSurfaceLuma[k] / p->TotalDataReadBandwidth) / p->BytePerPixelDETY[k] / p->SwathWidthY[k];
9618 l->LinesInDETYRoundedDownToSwath = math_floor2(l->LinesInDETY, p->SwathHeightY[k]);
9619 l->DETBufferingTimeY = l->LinesInDETYRoundedDownToSwath * ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
9620 #ifdef __DML_VBA_DEBUG__
9621 dml2_printf("DML::%s: k=%u, DETBufferSizeY = %u (%u kbytes)\n", __func__, k, p->DETBufferSizeY[k], p->DETBufferSizeY[k] / 1024);
9622 dml2_printf("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
9623 dml2_printf("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
9624 dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
9625 dml2_printf("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, p->TotalDataReadBandwidth);
9626 dml2_printf("DML::%s: k=%u, LinesInDETY = %f\n", __func__, k, l->LinesInDETY);
9627 dml2_printf("DML::%s: k=%u, LinesInDETYRoundedDownToSwath = %f\n", __func__, k, l->LinesInDETYRoundedDownToSwath);
9628 dml2_printf("DML::%s: k=%u, VRatio = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
9629 dml2_printf("DML::%s: k=%u, DETBufferingTimeY = %f\n", __func__, k, l->DETBufferingTimeY);
9630 #endif
9631
9632 if (!FoundCriticalSurface || l->DETBufferingTimeY < *p->StutterPeriod) {
9633 bool isInterlaceTiming = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !p->ProgressiveToInterlaceUnitInOPP;
9634
9635 FoundCriticalSurface = true;
9636 *p->StutterPeriod = l->DETBufferingTimeY;
9637 l->FrameTimeCriticalSurface = (isInterlaceTiming ? math_floor2((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total / 2.0, 1.0) : p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total) * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
9638 l->VActiveTimeCriticalSurface = (isInterlaceTiming ? math_floor2((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_active / 2.0, 1.0) : p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_active) * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
9639 l->BytePerPixelYCriticalSurface = p->BytePerPixelY[k];
9640 l->SwathWidthYCriticalSurface = p->SwathWidthY[k];
9641 l->SwathHeightYCriticalSurface = p->SwathHeightY[k];
9642 l->BlockWidth256BytesYCriticalSurface = p->BlockWidth256BytesY[k];
9643 l->DETBufferSizeYCriticalSurface = p->DETBufferSizeY[k];
9644 l->MinTTUVBlankCriticalSurface = p->MinTTUVBlank[k];
9645 l->SinglePlaneCriticalSurface = (p->ReadBandwidthSurfaceChroma[k] == 0);
9646 l->SinglePipeCriticalSurface = (p->DPPPerSurface[k] == 1);
9647
9648 #ifdef __DML_VBA_DEBUG__
9649 dml2_printf("DML::%s: k=%u, FoundCriticalSurface = %u\n", __func__, k, FoundCriticalSurface);
9650 dml2_printf("DML::%s: k=%u, StutterPeriod = %f\n", __func__, k, *p->StutterPeriod);
9651 dml2_printf("DML::%s: k=%u, MinTTUVBlankCriticalSurface = %f\n", __func__, k, l->MinTTUVBlankCriticalSurface);
9652 dml2_printf("DML::%s: k=%u, FrameTimeCriticalSurface= %f\n", __func__, k, l->FrameTimeCriticalSurface);
9653 dml2_printf("DML::%s: k=%u, VActiveTimeCriticalSurface = %f\n", __func__, k, l->VActiveTimeCriticalSurface);
9654 dml2_printf("DML::%s: k=%u, BytePerPixelYCriticalSurface = %u\n", __func__, k, l->BytePerPixelYCriticalSurface);
9655 dml2_printf("DML::%s: k=%u, SwathWidthYCriticalSurface = %f\n", __func__, k, l->SwathWidthYCriticalSurface);
9656 dml2_printf("DML::%s: k=%u, SwathHeightYCriticalSurface = %f\n", __func__, k, l->SwathHeightYCriticalSurface);
9657 dml2_printf("DML::%s: k=%u, BlockWidth256BytesYCriticalSurface = %u\n", __func__, k, l->BlockWidth256BytesYCriticalSurface);
9658 dml2_printf("DML::%s: k=%u, SinglePlaneCriticalSurface = %u\n", __func__, k, l->SinglePlaneCriticalSurface);
9659 dml2_printf("DML::%s: k=%u, SinglePipeCriticalSurface = %u\n", __func__, k, l->SinglePipeCriticalSurface);
9660 #endif
9661 }
9662 }
9663 }
9664
9665 // for bounded req, the stutter period is calculated only based on DET size, but during burst there can be some return inside ROB/compressed buffer
9666 // stutter period is calculated only on the det sizing
9667 // if (cdb + rob >= det) the stutter burst will be absorbed by the cdb + rob which is before decompress
9668 // else
9669 // the cdb + rob part will be in compressed rate with urg bw (idea bw)
9670 // the det part will be return at uncompressed rate with 64B/dcfclk
9671 //
9672 // for unbounded req, the stutter period should be calculated as total of CDB+ROB+DET, so the term "PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer"
9673 // should be == EffectiveCompressedBufferSize which will returned a compressed rate, the rest of stutter period is from the DET will be returned at uncompressed rate with 64B/dcfclk
9674
9675 l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = math_min2(*p->StutterPeriod * p->TotalDataReadBandwidth, l->EffectiveCompressedBufferSize);
9676 #ifdef __DML_VBA_DEBUG__
9677 dml2_printf("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate);
9678 dml2_printf("DML::%s: StutterPeriod*TotalDataReadBandwidth = %f (%f kbytes)\n", __func__, *p->StutterPeriod * p->TotalDataReadBandwidth, (*p->StutterPeriod * p->TotalDataReadBandwidth) / 1024.0);
9679 dml2_printf("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0);
9680 dml2_printf("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f (%f kbytes)\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / 1024);
9681 dml2_printf("DML::%s: ReturnBW = %f\n", __func__, p->ReturnBW);
9682 dml2_printf("DML::%s: TotalDataReadBandwidth = %f\n", __func__, p->TotalDataReadBandwidth);
9683 dml2_printf("DML::%s: TotalRowReadBandwidth = %f\n", __func__, l->TotalRowReadBandwidth);
9684 dml2_printf("DML::%s: DCFCLK = %f\n", __func__, p->DCFCLK);
9685 #endif
9686
9687 l->StutterBurstTime = l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer
9688 / (p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) +
9689 (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer)
9690 / math_max2(p->DCFCLK * 64, p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) +
9691 *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW;
9692 #ifdef __DML_VBA_DEBUG__
9693 dml2_printf("DML::%s: Part 1 = %f\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / p->ReturnBW / (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate));
9694 dml2_printf("DML::%s: Part 2 = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64));
9695 dml2_printf("DML::%s: Part 3 = %f\n", __func__, *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW);
9696 dml2_printf("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime);
9697 #endif
9698
9699 l->TotalActiveWriteback = 0;
9700 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9701 if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.enable) {
9702 l->TotalActiveWriteback = l->TotalActiveWriteback + 1;
9703 }
9704 }
9705
9706 if (l->TotalActiveWriteback == 0) {
9707 #ifdef __DML_VBA_DEBUG__
9708 dml2_printf("DML::%s: SRExitTime = %f\n", __func__, p->SRExitTime);
9709 dml2_printf("DML::%s: SRExitZ8Time = %f\n", __func__, p->SRExitZ8Time);
9710 dml2_printf("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
9711 #endif
9712 *p->StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitTime + l->StutterBurstTime) / *p->StutterPeriod) * 100;
9713 *p->Z8StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitZ8Time + l->StutterBurstTime) / *p->StutterPeriod) * 100;
9714 *p->NumberOfStutterBurstsPerFrame = (*p->StutterEfficiencyNotIncludingVBlank > 0 ? (unsigned int)(math_ceil2(l->VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0);
9715 *p->Z8NumberOfStutterBurstsPerFrame = (*p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? (unsigned int)(math_ceil2(l->VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0);
9716 } else {
9717 *p->StutterEfficiencyNotIncludingVBlank = 0.;
9718 *p->Z8StutterEfficiencyNotIncludingVBlank = 0.;
9719 *p->NumberOfStutterBurstsPerFrame = 0;
9720 *p->Z8NumberOfStutterBurstsPerFrame = 0;
9721 }
9722 #ifdef __DML_VBA_DEBUG__
9723 dml2_printf("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, l->VActiveTimeCriticalSurface);
9724 dml2_printf("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
9725 dml2_printf("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank);
9726 dml2_printf("DML::%s: NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->NumberOfStutterBurstsPerFrame);
9727 dml2_printf("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
9728 #endif
9729
9730 unsigned int TotalNumberOfActiveOTG = 0;
9731 double SinglePixelClock = 0;
9732 unsigned int SingleHTotal = 0;
9733 unsigned int SingleVTotal = 0;
9734 bool SameTiming = true;
9735 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9736 if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
9737 if (p->display_cfg->plane_descriptors[k].stream_index == k) {
9738 if (TotalNumberOfActiveOTG == 0) {
9739 SinglePixelClock = ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
9740 SingleHTotal = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total;
9741 SingleVTotal = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total;
9742 } else if (SinglePixelClock != ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) || SingleHTotal != p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total || SingleVTotal != p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total) {
9743 SameTiming = false;
9744 }
9745 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
9746 }
9747 }
9748 }
9749
9750 if (*p->StutterEfficiencyNotIncludingVBlank > 0) {
9751 if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) {
9752 *p->StutterEfficiency = *p->StutterEfficiencyNotIncludingVBlank;
9753 } else {
9754 *p->StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitTime + l->StutterBurstTime * l->VActiveTimeCriticalSurface / *p->StutterPeriod) / l->FrameTimeCriticalSurface) * 100;
9755 }
9756 } else {
9757 *p->StutterEfficiency = 0;
9758 *p->NumberOfStutterBurstsPerFrame = 0;
9759 }
9760
9761 double LastZ8StutterPeriod = 0.0;
9762
9763 if (*p->Z8StutterEfficiencyNotIncludingVBlank > 0) {
9764 LastZ8StutterPeriod = l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod;
9765 if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) {
9766 *p->Z8StutterEfficiency = *p->Z8StutterEfficiencyNotIncludingVBlank;
9767 } else {
9768 *p->Z8StutterEfficiency = (1 - (*p->Z8NumberOfStutterBurstsPerFrame * p->SRExitZ8Time + l->StutterBurstTime * l->VActiveTimeCriticalSurface / *p->StutterPeriod) / l->FrameTimeCriticalSurface) * 100;
9769 }
9770 } else {
9771 *p->Z8StutterEfficiency = 0.;
9772 *p->Z8NumberOfStutterBurstsPerFrame = 0;
9773 }
9774
9775 #ifdef __DML_VBA_DEBUG__
9776 dml2_printf("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
9777 dml2_printf("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Z8StutterEnterPlusExitWatermark);
9778 dml2_printf("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime);
9779 dml2_printf("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
9780 dml2_printf("DML::%s: StutterEfficiency = %f\n", __func__, *p->StutterEfficiency);
9781 dml2_printf("DML::%s: Z8StutterEfficiency = %f\n", __func__, *p->Z8StutterEfficiency);
9782 dml2_printf("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
9783 dml2_printf("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
9784 #endif
9785
9786
9787 unsigned int SwathSizeCriticalSurface;
9788 unsigned int LastChunkOfSwathSize;
9789 unsigned int MissingPartOfLastSwathOfDETSize;
9790
9791 SwathSizeCriticalSurface = (unsigned int)(l->BytePerPixelYCriticalSurface * l->SwathHeightYCriticalSurface * math_ceil2(l->SwathWidthYCriticalSurface, l->BlockWidth256BytesYCriticalSurface));
9792 LastChunkOfSwathSize = SwathSizeCriticalSurface % (p->PixelChunkSizeInKByte * 1024);
9793 MissingPartOfLastSwathOfDETSize = (unsigned int)(math_ceil2(l->DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface) - l->DETBufferSizeYCriticalSurface);
9794
9795 *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && l->SinglePlaneCriticalSurface && l->SinglePipeCriticalSurface && (LastChunkOfSwathSize > 0) &&
9796 (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0) && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
9797
9798 #ifdef __DML_VBA_DEBUG__
9799 dml2_printf("DML::%s: SwathSizeCriticalSurface = %u\n", __func__, SwathSizeCriticalSurface);
9800 dml2_printf("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, l->DETBufferSizeYCriticalSurface);
9801 dml2_printf("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte);
9802 dml2_printf("DML::%s: LastChunkOfSwathSize = %u\n", __func__, LastChunkOfSwathSize);
9803 dml2_printf("DML::%s: MissingPartOfLastSwathOfDETSize = %u\n", __func__, MissingPartOfLastSwathOfDETSize);
9804 dml2_printf("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
9805 #endif
9806 }
9807
dml2_core_shared_mode_programming(struct dml2_core_calcs_mode_programming_ex * in_out_params)9808 bool dml2_core_shared_mode_programming(struct dml2_core_calcs_mode_programming_ex *in_out_params)
9809 {
9810 const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg;
9811 const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table;
9812 const struct core_display_cfg_support_info *cfg_support_info = in_out_params->cfg_support_info;
9813 struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib;
9814 struct dml2_display_cfg_programming *programming = in_out_params->programming;
9815
9816 struct dml2_core_calcs_mode_programming_locals *s = &mode_lib->scratch.dml_core_mode_programming_locals;
9817 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
9818 struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
9819 struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
9820 struct dml2_core_calcs_CalculateStutterEfficiency_params *CalculateStutterEfficiency_params = &mode_lib->scratch.CalculateStutterEfficiency_params;
9821 struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
9822 struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params;
9823 struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params;
9824 struct dml2_core_shared_CalculateMetaAndPTETimes_params *CalculateMetaAndPTETimes_params = &mode_lib->scratch.CalculateMetaAndPTETimes_params;
9825
9826 unsigned int j, k;
9827
9828 dml2_printf("DML::%s: --- START --- \n", __func__);
9829
9830 memset(&mode_lib->mp, 0, sizeof(struct dml2_core_internal_mode_program));
9831
9832 s->num_active_planes = display_cfg->num_planes;
9833 get_stream_output_bpp(s->OutputBpp, display_cfg);
9834
9835 mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(display_cfg->num_planes, cfg_support_info);
9836 dml_calc_pipe_plane_mapping(cfg_support_info, mode_lib->mp.pipe_plane);
9837
9838 mode_lib->mp.Dcfclk = programming->min_clocks.dcn4x.active.dcfclk_khz / 1000.0;
9839 mode_lib->mp.FabricClock = programming->min_clocks.dcn4x.active.fclk_khz / 1000.0;
9840 mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table.dram_config);
9841 mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4x.active.uclk_khz / 1000.0;
9842 mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4x.dpprefclk_khz / 1000.0;
9843 s->SOCCLK = (double)programming->min_clocks.dcn4x.socclk_khz / 1000;
9844 mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4x.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params);
9845 mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table);
9846
9847 for (k = 0; k < s->num_active_planes; ++k) {
9848 unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index;
9849 dml2_assert(cfg_support_info->stream_support_info[stream_index].odms_used <= 4);
9850 dml2_assert(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4 ||
9851 cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 2 ||
9852 cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1);
9853
9854 if (cfg_support_info->stream_support_info[stream_index].odms_used > 1)
9855 dml2_assert(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1);
9856
9857 switch (cfg_support_info->stream_support_info[stream_index].odms_used) {
9858 case (4):
9859 mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_4to1;
9860 break;
9861 case (3):
9862 mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_3to1;
9863 break;
9864 case (2):
9865 mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_2to1;
9866 break;
9867 default:
9868 if (cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4)
9869 mode_lib->mp.ODMMode[k] = dml2_odm_mode_mso_1to4;
9870 else if (cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 2)
9871 mode_lib->mp.ODMMode[k] = dml2_odm_mode_mso_1to2;
9872 else
9873 mode_lib->mp.ODMMode[k] = dml2_odm_mode_bypass;
9874 break;
9875 }
9876 }
9877
9878 for (k = 0; k < s->num_active_planes; ++k) {
9879 mode_lib->mp.NoOfDPP[k] = cfg_support_info->plane_support_info[k].dpps_used;
9880 mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4x.dppclk_khz / 1000.0;
9881 dml2_assert(mode_lib->mp.Dppclk[k] > 0);
9882 }
9883
9884 for (k = 0; k < s->num_active_planes; ++k) {
9885 unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index;
9886 mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4x.dscclk_khz / 1000.0;
9887 dml2_printf("DML::%s: k=%d stream_index=%d, mode_lib->mp.DSCCLK = %f\n", __func__, k, stream_index, mode_lib->mp.DSCCLK[k]);
9888 }
9889
9890 mode_lib->mp.Dispclk = programming->min_clocks.dcn4x.dispclk_khz / 1000.0;
9891 mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4x.deepsleep_dcfclk_khz / 1000.0;
9892
9893 dml2_assert(mode_lib->mp.Dcfclk > 0);
9894 dml2_assert(mode_lib->mp.FabricClock > 0);
9895 dml2_assert(mode_lib->mp.dram_bw_mbps > 0);
9896 dml2_assert(mode_lib->mp.uclk_freq_mhz > 0);
9897 dml2_assert(mode_lib->mp.GlobalDPPCLK > 0);
9898 dml2_assert(mode_lib->mp.Dispclk > 0);
9899 dml2_assert(mode_lib->mp.DCFCLKDeepSleep > 0);
9900 dml2_assert(s->SOCCLK > 0);
9901
9902 #ifdef __DML_VBA_DEBUG__
9903 // dml2_printf_dml_display_cfg_timing(&display_cfg->timing, s->num_active_planes);
9904 // dml2_printf_dml_display_cfg_plane(&display_cfg->plane, s->num_active_planes);
9905 // dml2_printf_dml_display_cfg_surface(&display_cfg->surface, s->num_active_planes);
9906 // dml2_printf_dml_display_cfg_output(&display_cfg->output, s->num_active_planes);
9907 // dml2_printf_dml_display_cfg_hw_resource(&display_cfg->hw, s->num_active_planes);
9908
9909 dml2_printf("DML::%s: num_active_planes = %u\n", __func__, s->num_active_planes);
9910 dml2_printf("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes);
9911 dml2_printf("DML::%s: Dcfclk = %f\n", __func__, mode_lib->mp.Dcfclk);
9912 dml2_printf("DML::%s: FabricClock = %f\n", __func__, mode_lib->mp.FabricClock);
9913 dml2_printf("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->mp.dram_bw_mbps);
9914 dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->mp.uclk_freq_mhz);
9915 dml2_printf("DML::%s: Dispclk = %f\n", __func__, mode_lib->mp.Dispclk);
9916 for (k = 0; k < s->num_active_planes; ++k) {
9917 dml2_printf("DML::%s: Dppclk[%0d] = %f\n", __func__, k, mode_lib->mp.Dppclk[k]);
9918 }
9919 dml2_printf("DML::%s: GlobalDPPCLK = %f\n", __func__, mode_lib->mp.GlobalDPPCLK);
9920 dml2_printf("DML::%s: DCFCLKDeepSleep = %f\n", __func__, mode_lib->mp.DCFCLKDeepSleep);
9921 dml2_printf("DML::%s: SOCCLK = %f\n", __func__, s->SOCCLK);
9922 dml2_printf("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index);
9923 dml2_printf("DML::%s: min_clk_table min_fclk_khz = %d\n", __func__, min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz);
9924 dml2_printf("DML::%s: min_clk_table uclk_mhz = %f\n", __func__, dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config));
9925 for (k = 0; k < mode_lib->mp.num_active_pipes; ++k) {
9926 dml2_printf("DML::%s: pipe=%d is in plane=%d\n", __func__, k, mode_lib->mp.pipe_plane[k]);
9927 dml2_printf("DML::%s: Per-plane DPPPerSurface[%0d] = %d\n", __func__, k, mode_lib->mp.NoOfDPP[k]);
9928 }
9929
9930 for (k = 0; k < s->num_active_planes; k++)
9931 dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
9932 #endif
9933
9934 CalculateMaxDETAndMinCompressedBufferSize(
9935 mode_lib->ip.config_return_buffer_size_in_kbytes,
9936 mode_lib->ip.config_return_buffer_segment_size_in_kbytes,
9937 mode_lib->ip.rob_buffer_size_kbytes,
9938 mode_lib->ip.max_num_dpp,
9939 display_cfg->overrides.hw.force_nom_det_size_kbytes.enable,
9940 display_cfg->overrides.hw.force_nom_det_size_kbytes.value,
9941 mode_lib->ip.dcn_mrq_present,
9942
9943 /* Output */
9944 &s->MaxTotalDETInKByte,
9945 &s->NomDETInKByte,
9946 &s->MinCompressedBufferSizeInKByte);
9947
9948
9949 PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg, mode_lib->ip.ptoi_supported, s->PixelClockBackEnd);
9950
9951 for (k = 0; k < s->num_active_planes; ++k) {
9952 CalculateSinglePipeDPPCLKAndSCLThroughput(
9953 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
9954 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
9955 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
9956 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
9957 mode_lib->ip.max_dchub_pscl_bw_pix_per_clk,
9958 mode_lib->ip.max_pscl_lb_bw_pix_per_clk,
9959 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
9960 display_cfg->plane_descriptors[k].pixel_format,
9961 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps,
9962 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps,
9963 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps,
9964 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps,
9965
9966 /* Output */
9967 &mode_lib->mp.PSCL_THROUGHPUT[k],
9968 &mode_lib->mp.PSCL_THROUGHPUT_CHROMA[k],
9969 &mode_lib->mp.DPPCLKUsingSingleDPP[k]);
9970 }
9971
9972 for (k = 0; k < s->num_active_planes; ++k) {
9973 CalculateBytePerPixelAndBlockSizes(
9974 display_cfg->plane_descriptors[k].pixel_format,
9975 display_cfg->plane_descriptors[k].surface.tiling,
9976 display_cfg->plane_descriptors[k].surface.plane0.pitch,
9977 display_cfg->plane_descriptors[k].surface.plane1.pitch,
9978
9979 // Output
9980 &mode_lib->mp.BytePerPixelY[k],
9981 &mode_lib->mp.BytePerPixelC[k],
9982 &mode_lib->mp.BytePerPixelInDETY[k],
9983 &mode_lib->mp.BytePerPixelInDETC[k],
9984 &mode_lib->mp.Read256BlockHeightY[k],
9985 &mode_lib->mp.Read256BlockHeightC[k],
9986 &mode_lib->mp.Read256BlockWidthY[k],
9987 &mode_lib->mp.Read256BlockWidthC[k],
9988 &mode_lib->mp.MacroTileHeightY[k],
9989 &mode_lib->mp.MacroTileHeightC[k],
9990 &mode_lib->mp.MacroTileWidthY[k],
9991 &mode_lib->mp.MacroTileWidthC[k],
9992 &mode_lib->mp.surf_linear128_l[k],
9993 &mode_lib->mp.surf_linear128_c[k]);
9994 }
9995
9996 CalculateSwathWidth(
9997 display_cfg,
9998 false, // ForceSingleDPP
9999 s->num_active_planes,
10000 mode_lib->mp.ODMMode,
10001 mode_lib->mp.BytePerPixelY,
10002 mode_lib->mp.BytePerPixelC,
10003 mode_lib->mp.Read256BlockHeightY,
10004 mode_lib->mp.Read256BlockHeightC,
10005 mode_lib->mp.Read256BlockWidthY,
10006 mode_lib->mp.Read256BlockWidthC,
10007 mode_lib->mp.surf_linear128_l,
10008 mode_lib->mp.surf_linear128_c,
10009 mode_lib->mp.NoOfDPP,
10010
10011 /* Output */
10012 mode_lib->mp.req_per_swath_ub_l,
10013 mode_lib->mp.req_per_swath_ub_c,
10014 mode_lib->mp.SwathWidthSingleDPPY,
10015 mode_lib->mp.SwathWidthSingleDPPC,
10016 mode_lib->mp.SwathWidthY,
10017 mode_lib->mp.SwathWidthC,
10018 s->dummy_integer_array[0], // unsigned int MaximumSwathHeightY[]
10019 s->dummy_integer_array[1], // unsigned int MaximumSwathHeightC[]
10020 mode_lib->mp.swath_width_luma_ub,
10021 mode_lib->mp.swath_width_chroma_ub);
10022
10023 for (k = 0; k < s->num_active_planes; ++k) {
10024 mode_lib->mp.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width * display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 /
10025 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000));
10026 mode_lib->mp.SurfaceReadBandwidthLuma[k] = mode_lib->mp.SwathWidthSingleDPPY[k] * mode_lib->mp.BytePerPixelY[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
10027 mode_lib->mp.SurfaceReadBandwidthChroma[k] = mode_lib->mp.SwathWidthSingleDPPC[k] * mode_lib->mp.BytePerPixelC[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
10028 dml2_printf("DML::%s: ReadBandwidthSurfaceLuma[%i] = %fBps\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthLuma[k]);
10029 dml2_printf("DML::%s: ReadBandwidthSurfaceChroma[%i] = %fBps\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthChroma[k]);
10030 }
10031
10032 CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg;
10033 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ip.config_return_buffer_size_in_kbytes;
10034 CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = s->MaxTotalDETInKByte;
10035 CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = s->MinCompressedBufferSizeInKByte;
10036 CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
10037 CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
10038 CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
10039 CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
10040
10041 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false;
10042 CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = s->num_active_planes;
10043 CalculateSwathAndDETConfiguration_params->nomDETInKByte = s->NomDETInKByte;
10044 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes;
10045 CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes;
10046 CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->mp.SurfaceReadBandwidthLuma;
10047 CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->mp.SurfaceReadBandwidthChroma;
10048 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = s->dummy_single_array[0];
10049 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = s->dummy_single_array[1];
10050 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->mp.Read256BlockHeightY;
10051 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->mp.Read256BlockHeightC;
10052 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->mp.Read256BlockWidthY;
10053 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->mp.Read256BlockWidthC;
10054 CalculateSwathAndDETConfiguration_params->surf_linear128_l = mode_lib->mp.surf_linear128_l;
10055 CalculateSwathAndDETConfiguration_params->surf_linear128_c = mode_lib->mp.surf_linear128_c;
10056 CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->mp.ODMMode;
10057 CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->mp.NoOfDPP;
10058 CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->mp.BytePerPixelY;
10059 CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->mp.BytePerPixelC;
10060 CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->mp.BytePerPixelInDETY;
10061 CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->mp.BytePerPixelInDETC;
10062
10063 // output
10064 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = mode_lib->mp.req_per_swath_ub_l;
10065 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = mode_lib->mp.req_per_swath_ub_c;
10066 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_long_array[0];
10067 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_long_array[1];
10068 CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_long_array[2];
10069 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_long_array[3];
10070 CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->mp.SwathHeightY;
10071 CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->mp.SwathHeightC;
10072 CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = mode_lib->mp.request_size_bytes_luma;
10073 CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = mode_lib->mp.request_size_bytes_chroma;
10074 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->mp.DETBufferSizeInKByte;
10075 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY;
10076 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC;
10077 CalculateSwathAndDETConfiguration_params->full_swath_bytes_l = s->full_swath_bytes_l;
10078 CalculateSwathAndDETConfiguration_params->full_swath_bytes_c = s->full_swath_bytes_c;
10079 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->mp.UnboundedRequestEnabled;
10080 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &mode_lib->mp.compbuf_reserved_space_64b;
10081 CalculateSwathAndDETConfiguration_params->hw_debug5 = &mode_lib->mp.hw_debug5;
10082 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->mp.CompressedBufferSizeInkByte;
10083 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = &s->dummy_boolean_array[0][0];
10084 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[0];
10085 CalculateSwathAndDETConfiguration_params->funcs = &mode_lib->funcs;
10086
10087 // VBA_DELTA
10088 // Calculate DET size, swath height here. In VBA, they are calculated in mode check stage
10089 CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params);
10090
10091 // DSCCLK
10092 /*
10093 s->DSCFormatFactor = 0;
10094 for (k = 0; k < s->num_active_planes; ++k) {
10095 if ((display_cfg->plane_descriptors[k].stream_index != k) || !cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].dsc_enable) {
10096 } else {
10097 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420)
10098 s->DSCFormatFactor = 2;
10099 else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_444)
10100 s->DSCFormatFactor = 1;
10101 else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 ||
10102 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl)
10103 s->DSCFormatFactor = 2;
10104 else
10105 s->DSCFormatFactor = 1;
10106
10107 s->PixelClockBackEndFactor = 3.0;
10108
10109 if (mode_lib->mp.ODMMode[k] == dml2_odm_mode_combine_4to1)
10110 s->PixelClockBackEndFactor = 12.0;
10111 else if (mode_lib->mp.ODMMode[k] == dml2_odm_mode_combine_3to1)
10112 s->PixelClockBackEndFactor = 9.0;
10113 else if (mode_lib->mp.ODMMode[k] == dml2_odm_mode_combine_2to1)
10114 s->PixelClockBackEndFactor = 6.0;
10115
10116 }
10117 #ifdef __DML_VBA_DEBUG__
10118 dml2_printf("DML::%s: k=%u, DSCEnabled = %u\n", __func__, k, cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].dsc_enable);
10119 dml2_printf("DML::%s: k=%u, BlendingAndTiming = %u\n", __func__, k, display_cfg->plane_descriptors[k].stream_index);
10120 dml2_printf("DML::%s: k=%u, PixelClockBackEndFactor = %f\n", __func__, k, s->PixelClockBackEndFactor);
10121 dml2_printf("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, s->PixelClockBackEnd[k]);
10122 dml2_printf("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor);
10123 dml2_printf("DML::%s: k=%u, DSCCLK = %f\n", __func__, k, mode_lib->mp.DSCCLK[k]);
10124 #endif
10125 }
10126 */
10127
10128 // DSC Delay
10129 for (k = 0; k < s->num_active_planes; ++k) {
10130 mode_lib->mp.DSCDelay[k] = DSCDelayRequirement(cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].dsc_enable,
10131 mode_lib->mp.ODMMode[k],
10132 mode_lib->ip.maximum_dsc_bits_per_component,
10133 s->OutputBpp[k],
10134 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
10135 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
10136 cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].num_dsc_slices,
10137 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
10138 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
10139 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
10140 s->PixelClockBackEnd[k]);
10141 }
10142
10143 for (k = 0; k < s->num_active_planes; ++k)
10144 for (j = 0; j < s->num_active_planes; ++j) // NumberOfSurfaces
10145 if (j != k && display_cfg->plane_descriptors[k].stream_index == j && cfg_support_info->stream_support_info[display_cfg->plane_descriptors[j].stream_index].dsc_enable)
10146 mode_lib->mp.DSCDelay[k] = mode_lib->mp.DSCDelay[j];
10147
10148 // Prefetch
10149 if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0) {
10150 for (k = 0; k < s->num_active_planes; ++k)
10151 mode_lib->mp.SurfaceSizeInTheMALL[k] = 0;
10152 } else {
10153 CalculateSurfaceSizeInMall(
10154 display_cfg,
10155 s->num_active_planes,
10156 mode_lib->soc.mall_allocated_for_dcn_mbytes,
10157 mode_lib->mp.BytePerPixelY,
10158 mode_lib->mp.BytePerPixelC,
10159 mode_lib->mp.Read256BlockWidthY,
10160 mode_lib->mp.Read256BlockWidthC,
10161 mode_lib->mp.Read256BlockHeightY,
10162 mode_lib->mp.Read256BlockHeightC,
10163 mode_lib->mp.MacroTileWidthY,
10164 mode_lib->mp.MacroTileWidthC,
10165 mode_lib->mp.MacroTileHeightY,
10166 mode_lib->mp.MacroTileHeightC,
10167
10168 /* Output */
10169 mode_lib->mp.SurfaceSizeInTheMALL,
10170 &s->dummy_boolean[0]); /* bool *ExceededMALLSize */
10171 }
10172
10173 for (k = 0; k < s->num_active_planes; ++k) {
10174 s->SurfaceParameters[k].PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
10175 s->SurfaceParameters[k].DPPPerSurface = mode_lib->mp.NoOfDPP[k];
10176 s->SurfaceParameters[k].RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
10177 s->SurfaceParameters[k].ViewportHeight = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
10178 s->SurfaceParameters[k].ViewportHeightC = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
10179 s->SurfaceParameters[k].BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY[k];
10180 s->SurfaceParameters[k].BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY[k];
10181 s->SurfaceParameters[k].BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC[k];
10182 s->SurfaceParameters[k].BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC[k];
10183 s->SurfaceParameters[k].BlockWidthY = mode_lib->mp.MacroTileWidthY[k];
10184 s->SurfaceParameters[k].BlockHeightY = mode_lib->mp.MacroTileHeightY[k];
10185 s->SurfaceParameters[k].BlockWidthC = mode_lib->mp.MacroTileWidthC[k];
10186 s->SurfaceParameters[k].BlockHeightC = mode_lib->mp.MacroTileHeightC[k];
10187 s->SurfaceParameters[k].InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
10188 s->SurfaceParameters[k].HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
10189 s->SurfaceParameters[k].DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
10190 s->SurfaceParameters[k].SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
10191 s->SurfaceParameters[k].SurfaceTiling = display_cfg->plane_descriptors[k].surface.tiling;
10192 s->SurfaceParameters[k].BytePerPixelY = mode_lib->mp.BytePerPixelY[k];
10193 s->SurfaceParameters[k].BytePerPixelC = mode_lib->mp.BytePerPixelC[k];
10194 s->SurfaceParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
10195 s->SurfaceParameters[k].VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
10196 s->SurfaceParameters[k].VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
10197 s->SurfaceParameters[k].VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
10198 s->SurfaceParameters[k].VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
10199 s->SurfaceParameters[k].PitchY = display_cfg->plane_descriptors[k].surface.plane0.pitch;
10200 s->SurfaceParameters[k].PitchC = display_cfg->plane_descriptors[k].surface.plane1.pitch;
10201 s->SurfaceParameters[k].ViewportStationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
10202 s->SurfaceParameters[k].ViewportXStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
10203 s->SurfaceParameters[k].ViewportYStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
10204 s->SurfaceParameters[k].ViewportXStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
10205 s->SurfaceParameters[k].ViewportYStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
10206 s->SurfaceParameters[k].FORCE_ONE_ROW_FOR_FRAME = display_cfg->plane_descriptors[k].overrides.hw.force_one_row_for_frame;
10207 s->SurfaceParameters[k].SwathHeightY = mode_lib->mp.SwathHeightY[k];
10208 s->SurfaceParameters[k].SwathHeightC = mode_lib->mp.SwathHeightC[k];
10209 s->SurfaceParameters[k].DCCMetaPitchY = display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch;
10210 s->SurfaceParameters[k].DCCMetaPitchC = display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch;
10211 }
10212
10213 CalculateVMRowAndSwath_params->display_cfg = display_cfg;
10214 CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = s->num_active_planes;
10215 CalculateVMRowAndSwath_params->myPipe = s->SurfaceParameters;
10216 CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->mp.SurfaceSizeInTheMALL;
10217 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
10218 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma;
10219 CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes;
10220 CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->mp.SwathWidthY;
10221 CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->mp.SwathWidthC;
10222 CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
10223 CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes;
10224 CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present;
10225
10226 // output
10227 CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0];
10228 CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = mode_lib->mp.dpte_row_width_luma_ub;
10229 CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = mode_lib->mp.dpte_row_width_chroma_ub;
10230 CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->mp.dpte_row_height;
10231 CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->mp.dpte_row_height_chroma;
10232 CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = mode_lib->mp.dpte_row_height_linear;
10233 CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = mode_lib->mp.dpte_row_height_linear_chroma;
10234 CalculateVMRowAndSwath_params->vm_group_bytes = mode_lib->mp.vm_group_bytes;
10235 CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes;
10236 CalculateVMRowAndSwath_params->PixelPTEReqWidthY = mode_lib->mp.PixelPTEReqWidthY;
10237 CalculateVMRowAndSwath_params->PixelPTEReqHeightY = mode_lib->mp.PixelPTEReqHeightY;
10238 CalculateVMRowAndSwath_params->PTERequestSizeY = mode_lib->mp.PTERequestSizeY;
10239 CalculateVMRowAndSwath_params->PixelPTEReqWidthC = mode_lib->mp.PixelPTEReqWidthC;
10240 CalculateVMRowAndSwath_params->PixelPTEReqHeightC = mode_lib->mp.PixelPTEReqHeightC;
10241 CalculateVMRowAndSwath_params->PTERequestSizeC = mode_lib->mp.PTERequestSizeC;
10242 CalculateVMRowAndSwath_params->vmpg_width_y = s->vmpg_width_y;
10243 CalculateVMRowAndSwath_params->vmpg_height_y = s->vmpg_height_y;
10244 CalculateVMRowAndSwath_params->vmpg_width_c = s->vmpg_width_c;
10245 CalculateVMRowAndSwath_params->vmpg_height_c = s->vmpg_height_c;
10246 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = mode_lib->mp.dpde0_bytes_per_frame_ub_l;
10247 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = mode_lib->mp.dpde0_bytes_per_frame_ub_c;
10248 CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->mp.PrefetchSourceLinesY;
10249 CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->mp.PrefetchSourceLinesC;
10250 CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->mp.VInitPreFillY;
10251 CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->mp.VInitPreFillC;
10252 CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->mp.MaxNumSwathY;
10253 CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->mp.MaxNumSwathC;
10254 CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->mp.dpte_row_bw;
10255 CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->mp.PixelPTEBytesPerRow;
10256 CalculateVMRowAndSwath_params->vm_bytes = mode_lib->mp.vm_bytes;
10257 CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->mp.use_one_row_for_frame;
10258 CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->mp.use_one_row_for_frame_flip;
10259 CalculateVMRowAndSwath_params->is_using_mall_for_ss = mode_lib->mp.is_using_mall_for_ss;
10260 CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = mode_lib->mp.PTE_BUFFER_MODE;
10261 CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = mode_lib->mp.BIGK_FRAGMENT_SIZE;
10262 CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = s->dummy_boolean_array[1];
10263 CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->mp.meta_row_bw;
10264 CalculateVMRowAndSwath_params->meta_row_bytes = mode_lib->mp.meta_row_bytes;
10265 CalculateVMRowAndSwath_params->meta_req_width_luma = mode_lib->mp.meta_req_width;
10266 CalculateVMRowAndSwath_params->meta_req_height_luma = mode_lib->mp.meta_req_height;
10267 CalculateVMRowAndSwath_params->meta_row_width_luma = mode_lib->mp.meta_row_width;
10268 CalculateVMRowAndSwath_params->meta_row_height_luma = mode_lib->mp.meta_row_height;
10269 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = mode_lib->mp.meta_pte_bytes_per_frame_ub_l;
10270 CalculateVMRowAndSwath_params->meta_req_width_chroma = mode_lib->mp.meta_req_width_chroma;
10271 CalculateVMRowAndSwath_params->meta_row_height_chroma = mode_lib->mp.meta_row_height_chroma;
10272 CalculateVMRowAndSwath_params->meta_row_width_chroma = mode_lib->mp.meta_row_width_chroma;
10273 CalculateVMRowAndSwath_params->meta_req_height_chroma = mode_lib->mp.meta_req_height_chroma;
10274 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = mode_lib->mp.meta_pte_bytes_per_frame_ub_c;
10275
10276 CalculateVMRowAndSwath(&mode_lib->scratch, CalculateVMRowAndSwath_params);
10277
10278 memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params));
10279 if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0 || mode_lib->ip.dcn_mrq_present) {
10280 for (k = 0; k < s->num_active_planes; k++) {
10281 mode_lib->mp.mall_prefetch_sdp_overhead_factor[k] = 1.0;
10282 mode_lib->mp.mall_prefetch_dram_overhead_factor[k] = 1.0;
10283 mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0[k] = 1.0;
10284 mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0[k] = 1.0;
10285 mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1[k] = 1.0;
10286 mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1[k] = 1.0;
10287 }
10288 } else {
10289 for (k = 0; k < s->num_active_planes; k++) {
10290 calculate_mcache_setting_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
10291 calculate_mcache_setting_params->num_chans = mode_lib->soc.clk_table.dram_config.channel_count;
10292 calculate_mcache_setting_params->mem_word_bytes = mode_lib->soc.mem_word_bytes;
10293 calculate_mcache_setting_params->mcache_size_bytes = mode_lib->soc.mcache_size_bytes;
10294 calculate_mcache_setting_params->mcache_line_size_bytes = mode_lib->soc.mcache_line_size_bytes;
10295 calculate_mcache_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
10296 calculate_mcache_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
10297
10298 calculate_mcache_setting_params->source_format = display_cfg->plane_descriptors[k].pixel_format;
10299 calculate_mcache_setting_params->surf_vert = dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle);
10300 calculate_mcache_setting_params->vp_stationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
10301 calculate_mcache_setting_params->tiling_mode = display_cfg->plane_descriptors[k].surface.tiling;
10302 calculate_mcache_setting_params->imall_enable = mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall;
10303
10304 calculate_mcache_setting_params->vp_start_x_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
10305 calculate_mcache_setting_params->vp_start_y_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
10306 calculate_mcache_setting_params->full_vp_width_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
10307 calculate_mcache_setting_params->full_vp_height_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
10308 calculate_mcache_setting_params->blk_width_l = mode_lib->mp.MacroTileWidthY[k];
10309 calculate_mcache_setting_params->blk_height_l = mode_lib->mp.MacroTileHeightY[k];
10310 calculate_mcache_setting_params->vmpg_width_l = s->vmpg_width_y[k];
10311 calculate_mcache_setting_params->vmpg_height_l = s->vmpg_height_y[k];
10312 calculate_mcache_setting_params->full_swath_bytes_l = s->full_swath_bytes_l[k];
10313 calculate_mcache_setting_params->bytes_per_pixel_l = mode_lib->mp.BytePerPixelY[k];
10314
10315 calculate_mcache_setting_params->vp_start_x_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
10316 calculate_mcache_setting_params->vp_start_y_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
10317 calculate_mcache_setting_params->full_vp_width_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.width;
10318 calculate_mcache_setting_params->full_vp_height_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
10319 calculate_mcache_setting_params->blk_width_c = mode_lib->mp.MacroTileWidthC[k];
10320 calculate_mcache_setting_params->blk_height_c = mode_lib->mp.MacroTileHeightC[k];
10321 calculate_mcache_setting_params->vmpg_width_c = s->vmpg_width_c[k];
10322 calculate_mcache_setting_params->vmpg_height_c = s->vmpg_height_c[k];
10323 calculate_mcache_setting_params->full_swath_bytes_c = s->full_swath_bytes_c[k];
10324 calculate_mcache_setting_params->bytes_per_pixel_c = mode_lib->mp.BytePerPixelC[k];
10325
10326 // output
10327 calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_l = &mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0[k];
10328 calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_l = &mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0[k];
10329 calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_c = &mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1[k];
10330 calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_c = &mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1[k];
10331
10332 calculate_mcache_setting_params->num_mcaches_l = &mode_lib->mp.num_mcaches_l[k];
10333 calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->mp.mcache_row_bytes_l[k];
10334 calculate_mcache_setting_params->mcache_offsets_l = mode_lib->mp.mcache_offsets_l[k];
10335 calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->mp.mcache_shift_granularity_l[k];
10336
10337 calculate_mcache_setting_params->num_mcaches_c = &mode_lib->mp.num_mcaches_c[k];
10338 calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->mp.mcache_row_bytes_c[k];
10339 calculate_mcache_setting_params->mcache_offsets_c = mode_lib->mp.mcache_offsets_c[k];
10340 calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->mp.mcache_shift_granularity_c[k];
10341
10342 calculate_mcache_setting_params->mall_comb_mcache_l = &mode_lib->mp.mall_comb_mcache_l[k];
10343 calculate_mcache_setting_params->mall_comb_mcache_c = &mode_lib->mp.mall_comb_mcache_c[k];
10344 calculate_mcache_setting_params->lc_comb_mcache = &mode_lib->mp.lc_comb_mcache[k];
10345 calculate_mcache_setting(&mode_lib->scratch, calculate_mcache_setting_params);
10346 }
10347
10348 calculate_mall_bw_overhead_factor(
10349 mode_lib->mp.mall_prefetch_sdp_overhead_factor,
10350 mode_lib->mp.mall_prefetch_dram_overhead_factor,
10351
10352 // input
10353 display_cfg,
10354 s->num_active_planes);
10355 }
10356
10357 // Calculate all the bandwidth availabe
10358 calculate_bandwidth_available(
10359 mode_lib->mp.avg_bandwidth_available_min,
10360 mode_lib->mp.avg_bandwidth_available,
10361 mode_lib->mp.urg_bandwidth_available_min,
10362 mode_lib->mp.urg_bandwidth_available,
10363 mode_lib->mp.urg_bandwidth_available_vm_only,
10364 mode_lib->mp.urg_bandwidth_available_pixel_and_vm,
10365
10366 &mode_lib->soc,
10367 display_cfg->hostvm_enable,
10368 mode_lib->mp.Dcfclk,
10369 mode_lib->mp.FabricClock,
10370 mode_lib->mp.dram_bw_mbps);
10371
10372
10373 calculate_hostvm_inefficiency_factor(
10374 &s->HostVMInefficiencyFactor,
10375 &s->HostVMInefficiencyFactorPrefetch,
10376
10377 display_cfg->gpuvm_enable,
10378 display_cfg->hostvm_enable,
10379 mode_lib->ip.remote_iommu_outstanding_translations,
10380 mode_lib->soc.max_outstanding_reqs,
10381 mode_lib->mp.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active],
10382 mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]);
10383
10384 s->TotalDCCActiveDPP = 0;
10385 s->TotalActiveDPP = 0;
10386 for (k = 0; k < s->num_active_planes; ++k) {
10387 s->TotalActiveDPP = s->TotalActiveDPP + mode_lib->mp.NoOfDPP[k];
10388 if (display_cfg->plane_descriptors[k].surface.dcc.enable)
10389 s->TotalDCCActiveDPP = s->TotalDCCActiveDPP + mode_lib->mp.NoOfDPP[k];
10390 }
10391 // Calculate tdlut schedule related terms
10392 for (k = 0; k <= s->num_active_planes - 1; k++) {
10393 calculate_tdlut_setting_params->dispclk_mhz = mode_lib->mp.Dispclk;
10394 calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
10395 calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode;
10396 calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode;
10397 calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size;
10398 calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
10399 calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
10400
10401 // output
10402 calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k];
10403 calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k];
10404 calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k];
10405 calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k];
10406 calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k];
10407 calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k];
10408
10409 calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params);
10410 }
10411
10412 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3)
10413 s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes,
10414 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes,
10415 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes));
10416
10417 CalculateExtraLatency(
10418 display_cfg,
10419 mode_lib->ip.rob_buffer_size_kbytes,
10420 mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles,
10421 s->ReorderingBytes,
10422 mode_lib->mp.Dcfclk,
10423 mode_lib->mp.FabricClock,
10424 mode_lib->ip.pixel_chunk_size_kbytes,
10425 mode_lib->mp.urg_bandwidth_available_min[dml2_core_internal_soc_state_sys_active],
10426 s->num_active_planes,
10427 mode_lib->mp.NoOfDPP,
10428 mode_lib->mp.dpte_group_bytes,
10429 s->tdlut_bytes_per_group,
10430 s->HostVMInefficiencyFactor,
10431 s->HostVMInefficiencyFactorPrefetch,
10432 mode_lib->soc.hostvm_min_page_size_kbytes,
10433 mode_lib->soc.qos_parameters.qos_type,
10434 !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable),
10435 mode_lib->soc.max_outstanding_reqs,
10436 mode_lib->mp.request_size_bytes_luma,
10437 mode_lib->mp.request_size_bytes_chroma,
10438 mode_lib->ip.meta_chunk_size_kbytes,
10439 mode_lib->ip.dchub_arb_to_ret_delay,
10440 mode_lib->mp.TripToMemory,
10441 mode_lib->ip.hostvm_mode,
10442
10443 // output
10444 &mode_lib->mp.ExtraLatency,
10445 &mode_lib->mp.ExtraLatency_sr,
10446 &mode_lib->mp.ExtraLatencyPrefetch);
10447
10448 mode_lib->mp.TCalc = 24.0 / mode_lib->mp.DCFCLKDeepSleep;
10449
10450 for (k = 0; k < s->num_active_planes; ++k) {
10451 if (display_cfg->plane_descriptors[k].stream_index == k) {
10452 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) {
10453 mode_lib->mp.WritebackDelay[k] =
10454 mode_lib->soc.qos_parameters.writeback.base_latency_us
10455 + CalculateWriteBackDelay(
10456 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format,
10457 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio,
10458 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio,
10459 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps,
10460 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width,
10461 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height,
10462 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height,
10463 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->mp.Dispclk;
10464 } else
10465 mode_lib->mp.WritebackDelay[k] = 0;
10466
10467 for (j = 0; j < s->num_active_planes; ++j) {
10468 if (display_cfg->plane_descriptors[j].stream_index == k
10469 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[j].stream_index].writeback.enable == true) {
10470 mode_lib->mp.WritebackDelay[k] =
10471 math_max2(
10472 mode_lib->mp.WritebackDelay[k],
10473 mode_lib->soc.qos_parameters.writeback.base_latency_us
10474 + CalculateWriteBackDelay(
10475 display_cfg->stream_descriptors[display_cfg->plane_descriptors[j].stream_index].writeback.pixel_format,
10476 display_cfg->stream_descriptors[display_cfg->plane_descriptors[j].stream_index].writeback.scaling_info.h_ratio,
10477 display_cfg->stream_descriptors[display_cfg->plane_descriptors[j].stream_index].writeback.scaling_info.v_ratio,
10478 display_cfg->stream_descriptors[display_cfg->plane_descriptors[j].stream_index].writeback.scaling_info.v_taps,
10479 display_cfg->stream_descriptors[display_cfg->plane_descriptors[j].stream_index].writeback.scaling_info.output_width,
10480 display_cfg->stream_descriptors[display_cfg->plane_descriptors[j].stream_index].writeback.scaling_info.output_height,
10481 display_cfg->stream_descriptors[display_cfg->plane_descriptors[j].stream_index].writeback.scaling_info.input_height,
10482 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->mp.Dispclk);
10483 }
10484 }
10485 }
10486 }
10487
10488 for (k = 0; k < s->num_active_planes; ++k)
10489 for (j = 0; j < s->num_active_planes; ++j)
10490 if (display_cfg->plane_descriptors[k].stream_index == j)
10491 mode_lib->mp.WritebackDelay[k] = mode_lib->mp.WritebackDelay[j];
10492
10493 mode_lib->mp.UrgentLatency = CalculateUrgentLatency(
10494 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us,
10495 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us,
10496 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us,
10497 mode_lib->soc.do_urgent_latency_adjustment,
10498 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us,
10499 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz,
10500 mode_lib->mp.FabricClock,
10501 mode_lib->mp.uclk_freq_mhz,
10502 mode_lib->soc.qos_parameters.qos_type,
10503 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles,
10504 mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles,
10505 mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
10506 mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
10507 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin,
10508 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
10509
10510 mode_lib->mp.TripToMemory = CalculateTripToMemory(
10511 mode_lib->mp.UrgentLatency,
10512 mode_lib->mp.FabricClock,
10513 mode_lib->mp.uclk_freq_mhz,
10514 mode_lib->soc.qos_parameters.qos_type,
10515 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles,
10516 mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
10517 mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
10518 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin,
10519 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
10520
10521 mode_lib->mp.TripToMemory = math_max2(mode_lib->mp.UrgentLatency, mode_lib->mp.TripToMemory);
10522
10523 mode_lib->mp.MetaTripToMemory = CalculateMetaTripToMemory(
10524 mode_lib->mp.UrgentLatency,
10525 mode_lib->mp.FabricClock,
10526 mode_lib->mp.uclk_freq_mhz,
10527 mode_lib->soc.qos_parameters.qos_type,
10528 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles,
10529 mode_lib->soc.qos_parameters.qos_params.dcn4x.meta_trip_adder_fclk_cycles,
10530 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin,
10531 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
10532
10533 for (k = 0; k < s->num_active_planes; ++k) {
10534 calculate_cursor_req_attributes(
10535 display_cfg->plane_descriptors[k].cursor.cursor_width,
10536 display_cfg->plane_descriptors[k].cursor.cursor_bpp,
10537
10538 // output
10539 &s->cursor_lines_per_chunk[k],
10540 &s->cursor_bytes_per_line[k],
10541 &s->cursor_bytes_per_chunk[k],
10542 &s->cursor_bytes[k]);
10543
10544 bool cursor_not_enough_urgent_latency_hiding = 0;
10545 double line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
10546
10547 calculate_cursor_urgent_burst_factor(
10548 mode_lib->ip.cursor_buffer_size,
10549 display_cfg->plane_descriptors[k].cursor.cursor_width,
10550 s->cursor_bytes_per_chunk[k],
10551 s->cursor_lines_per_chunk[k],
10552 line_time_us,
10553 mode_lib->mp.UrgentLatency,
10554
10555 // output
10556 &mode_lib->mp.UrgentBurstFactorCursor[k],
10557 &cursor_not_enough_urgent_latency_hiding);
10558 mode_lib->mp.UrgentBurstFactorCursorPre[k] = mode_lib->mp.UrgentBurstFactorCursor[k];
10559
10560 CalculateUrgentBurstFactor(
10561 &display_cfg->plane_descriptors[k],
10562 mode_lib->mp.swath_width_luma_ub[k],
10563 mode_lib->mp.swath_width_chroma_ub[k],
10564 mode_lib->mp.SwathHeightY[k],
10565 mode_lib->mp.SwathHeightC[k],
10566 line_time_us,
10567 mode_lib->mp.UrgentLatency,
10568 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
10569 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
10570 mode_lib->mp.BytePerPixelInDETY[k],
10571 mode_lib->mp.BytePerPixelInDETC[k],
10572 mode_lib->mp.DETBufferSizeY[k],
10573 mode_lib->mp.DETBufferSizeC[k],
10574
10575 /* output */
10576 &mode_lib->mp.UrgentBurstFactorLuma[k],
10577 &mode_lib->mp.UrgentBurstFactorChroma[k],
10578 &mode_lib->mp.NotEnoughUrgentLatencyHiding[k]);
10579
10580 mode_lib->mp.NotEnoughUrgentLatencyHiding[k] = mode_lib->mp.NotEnoughUrgentLatencyHiding[k] || cursor_not_enough_urgent_latency_hiding;
10581 }
10582
10583 for (k = 0; k < s->num_active_planes; ++k) {
10584 s->MaxVStartupLines[k] = CalculateMaxVStartup(
10585 mode_lib->ip.ptoi_supported,
10586 mode_lib->ip.vblank_nom_default_us,
10587 &display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing,
10588 mode_lib->mp.WritebackDelay[k]);
10589
10590 #ifdef __DML_VBA_DEBUG__
10591 dml2_printf("DML::%s: k=%u MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
10592 dml2_printf("DML::%s: k=%u WritebackDelay = %f\n", __func__, k, mode_lib->mp.WritebackDelay[k]);
10593 #endif
10594 }
10595
10596 s->immediate_flip_required = false;
10597 for (k = 0; k < s->num_active_planes; ++k) {
10598 s->immediate_flip_required = s->immediate_flip_required || display_cfg->plane_descriptors[k].immediate_flip;
10599 }
10600 #ifdef __DML_VBA_DEBUG__
10601 dml2_printf("DML::%s: immediate_flip_required = %u\n", __func__, s->immediate_flip_required);
10602 #endif
10603
10604 {
10605 s->DestinationLineTimesForPrefetchLessThan2 = false;
10606 s->VRatioPrefetchMoreThanMax = false;
10607
10608 dml2_printf("DML::%s: Start one iteration of prefetch schedule evaluation\n", __func__);
10609
10610 for (k = 0; k < s->num_active_planes; ++k) {
10611 dml2_printf("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
10612
10613 mode_lib->mp.TWait[k] = CalculateTWait(
10614 display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns,
10615 mode_lib->mp.UrgentLatency,
10616 mode_lib->mp.TripToMemory);
10617
10618 struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe;
10619 myPipe->Dppclk = mode_lib->mp.Dppclk[k];
10620 myPipe->Dispclk = mode_lib->mp.Dispclk;
10621 myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
10622 myPipe->DCFClkDeepSleep = mode_lib->mp.DCFCLKDeepSleep;
10623 myPipe->DPPPerSurface = mode_lib->mp.NoOfDPP[k];
10624 myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled;
10625 myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
10626 myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
10627 myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
10628 myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
10629 myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
10630 myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored;
10631 myPipe->BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY[k];
10632 myPipe->BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY[k];
10633 myPipe->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC[k];
10634 myPipe->BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC[k];
10635 myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
10636 myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors;
10637 myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active;
10638 myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
10639 myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active;
10640 myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
10641 myPipe->ODMMode = mode_lib->mp.ODMMode[k];
10642 myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
10643 myPipe->BytePerPixelY = mode_lib->mp.BytePerPixelY[k];
10644 myPipe->BytePerPixelC = mode_lib->mp.BytePerPixelC[k];
10645 myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
10646
10647 #ifdef __DML_VBA_DEBUG__
10648 dml2_printf("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k);
10649 #endif
10650 CalculatePrefetchSchedule_params->display_cfg = display_cfg;
10651 CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch;
10652 CalculatePrefetchSchedule_params->myPipe = myPipe;
10653 CalculatePrefetchSchedule_params->DSCDelay = mode_lib->mp.DSCDelay[k];
10654 CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter;
10655 CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl;
10656 CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only;
10657 CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor;
10658 CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal;
10659 CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->mp.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
10660 CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format;
10661 CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters;
10662 CalculatePrefetchSchedule_params->VStartup = s->MaxVStartupLines[k];
10663 CalculatePrefetchSchedule_params->MaxVStartup = s->MaxVStartupLines[k];
10664 CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
10665 CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable;
10666 CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled;
10667 CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required;
10668 CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes;
10669 CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->mp.UrgentLatency;
10670 CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->mp.ExtraLatencyPrefetch;
10671 CalculatePrefetchSchedule_params->TCalc = mode_lib->mp.TCalc;
10672 CalculatePrefetchSchedule_params->vm_bytes = mode_lib->mp.vm_bytes[k];
10673 CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->mp.PixelPTEBytesPerRow[k];
10674 CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->mp.PrefetchSourceLinesY[k];
10675 CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->mp.VInitPreFillY[k];
10676 CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->mp.MaxNumSwathY[k];
10677 CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->mp.PrefetchSourceLinesC[k];
10678 CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->mp.VInitPreFillC[k];
10679 CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->mp.MaxNumSwathC[k];
10680 CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->mp.swath_width_luma_ub[k];
10681 CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->mp.swath_width_chroma_ub[k];
10682 CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->mp.SwathHeightY[k];
10683 CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->mp.SwathHeightC[k];
10684 CalculatePrefetchSchedule_params->TWait = mode_lib->mp.TWait[k];
10685 CalculatePrefetchSchedule_params->Ttrip = mode_lib->mp.TripToMemory;
10686 CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
10687 CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k];
10688 CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k];
10689 CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k];
10690 CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k];
10691 CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0);
10692 CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k];
10693 CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k];
10694 CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
10695 CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present;
10696 CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->mp.meta_row_bytes[k];
10697 CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor[k];
10698
10699 // output
10700 CalculatePrefetchSchedule_params->DSTXAfterScaler = &mode_lib->mp.DSTXAfterScaler[k];
10701 CalculatePrefetchSchedule_params->DSTYAfterScaler = &mode_lib->mp.DSTYAfterScaler[k];
10702 CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->mp.dst_y_prefetch[k];
10703 CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->mp.dst_y_per_vm_vblank[k];
10704 CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->mp.dst_y_per_row_vblank[k];
10705 CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->mp.VRatioPrefetchY[k];
10706 CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->mp.VRatioPrefetchC[k];
10707 CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k];
10708 CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k];
10709 CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->mp.NotEnoughTimeForDynamicMetadata[k];
10710 CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->mp.Tno_bw[k];
10711 CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->mp.Tno_bw_flip[k];
10712 CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->mp.prefetch_vmrow_bw[k];
10713 CalculatePrefetchSchedule_params->Tdmdl_vm = &mode_lib->mp.Tdmdl_vm[k];
10714 CalculatePrefetchSchedule_params->Tdmdl = &mode_lib->mp.Tdmdl[k];
10715 CalculatePrefetchSchedule_params->TSetup = &mode_lib->mp.TSetup[k];
10716 CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k];
10717 CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k];
10718 CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k];
10719 CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k];
10720 CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k];
10721 CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k];
10722 CalculatePrefetchSchedule_params->VUpdateOffsetPix = &mode_lib->mp.VUpdateOffsetPix[k];
10723 CalculatePrefetchSchedule_params->VUpdateWidthPix = &mode_lib->mp.VUpdateWidthPix[k];
10724 CalculatePrefetchSchedule_params->VReadyOffsetPix = &mode_lib->mp.VReadyOffsetPix[k];
10725 CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->mp.prefetch_cursor_bw[k];
10726
10727 mode_lib->mp.NoTimeToPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params);
10728
10729 #ifdef __DML_VBA_DEBUG__
10730 dml2_printf("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]);
10731 #endif
10732 mode_lib->mp.VStartupMin[k] = s->MaxVStartupLines[k];
10733 } // for k
10734
10735 mode_lib->mp.PrefetchModeSupported = true;
10736 for (k = 0; k < s->num_active_planes; ++k) {
10737 if (mode_lib->mp.NoTimeToPrefetch[k] == true ||
10738 mode_lib->mp.NotEnoughTimeForDynamicMetadata[k] ||
10739 mode_lib->mp.DSTYAfterScaler[k] > 8) {
10740 dml2_printf("DML::%s: k=%u, NoTimeToPrefetch = %0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]);
10741 dml2_printf("DML::%s: k=%u, NotEnoughTimeForDynamicMetadata=%u\n", __func__, k, mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]);
10742 dml2_printf("DML::%s: k=%u, DSTYAfterScaler=%u (should be <= 0)\n", __func__, k, mode_lib->mp.DSTYAfterScaler[k]);
10743 mode_lib->mp.PrefetchModeSupported = false;
10744 }
10745 if (mode_lib->mp.dst_y_prefetch[k] < 2)
10746 s->DestinationLineTimesForPrefetchLessThan2 = true;
10747
10748 if (mode_lib->mp.VRatioPrefetchY[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ ||
10749 mode_lib->mp.VRatioPrefetchC[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__)
10750 s->VRatioPrefetchMoreThanMax = true;
10751
10752 if (mode_lib->mp.NotEnoughUrgentLatencyHiding[k]) {
10753 dml2_printf("DML::%s: k=%u, NotEnoughUrgentLatencyHiding = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHiding[k]);
10754 mode_lib->mp.PrefetchModeSupported = false;
10755 }
10756 }
10757
10758 if (s->VRatioPrefetchMoreThanMax == true || s->DestinationLineTimesForPrefetchLessThan2 == true) {
10759 dml2_printf("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax);
10760 dml2_printf("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2);
10761 mode_lib->mp.PrefetchModeSupported = false;
10762 }
10763
10764 dml2_printf("DML::%s: Prefetch schedule is %sOK at vstartup = %u\n", __func__,
10765 mode_lib->mp.PrefetchModeSupported ? "" : "NOT ", CalculatePrefetchSchedule_params->VStartup);
10766
10767 // Prefetch schedule OK, now check prefetch bw
10768 if (mode_lib->mp.PrefetchModeSupported == true) {
10769 for (k = 0; k < s->num_active_planes; ++k) {
10770 double line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
10771 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
10772 CalculateUrgentBurstFactor(
10773 &display_cfg->plane_descriptors[k],
10774 mode_lib->mp.swath_width_luma_ub[k],
10775 mode_lib->mp.swath_width_chroma_ub[k],
10776 mode_lib->mp.SwathHeightY[k],
10777 mode_lib->mp.SwathHeightC[k],
10778 line_time_us,
10779 mode_lib->mp.UrgentLatency,
10780 mode_lib->mp.VRatioPrefetchY[k],
10781 mode_lib->mp.VRatioPrefetchC[k],
10782 mode_lib->mp.BytePerPixelInDETY[k],
10783 mode_lib->mp.BytePerPixelInDETC[k],
10784 mode_lib->mp.DETBufferSizeY[k],
10785 mode_lib->mp.DETBufferSizeC[k],
10786 /* Output */
10787 &mode_lib->mp.UrgentBurstFactorLumaPre[k],
10788 &mode_lib->mp.UrgentBurstFactorChromaPre[k],
10789 &mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]);
10790
10791 #ifdef __DML_VBA_DEBUG__
10792 dml2_printf("DML::%s: k=%0u DPPPerSurface=%u\n", __func__, k, mode_lib->mp.NoOfDPP[k]);
10793 dml2_printf("DML::%s: k=%0u UrgentBurstFactorLuma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLuma[k]);
10794 dml2_printf("DML::%s: k=%0u UrgentBurstFactorChroma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChroma[k]);
10795 dml2_printf("DML::%s: k=%0u UrgentBurstFactorLumaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLumaPre[k]);
10796 dml2_printf("DML::%s: k=%0u UrgentBurstFactorChromaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChromaPre[k]);
10797
10798 dml2_printf("DML::%s: k=%0u VRatioPrefetchY=%f\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k]);
10799 dml2_printf("DML::%s: k=%0u VRatioY=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
10800
10801 dml2_printf("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, mode_lib->mp.prefetch_vmrow_bw[k]);
10802 dml2_printf("DML::%s: k=%0u ReadBandwidthSurfaceLuma=%f\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthLuma[k]);
10803 dml2_printf("DML::%s: k=%0u ReadBandwidthSurfaceChroma=%f\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthChroma[k]);
10804 dml2_printf("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, mode_lib->mp.cursor_bw[k]);
10805 dml2_printf("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, mode_lib->mp.dpte_row_bw[k]);
10806 dml2_printf("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, mode_lib->mp.meta_row_bw[k]);
10807 dml2_printf("DML::%s: k=%0u RequiredPrefetchPixelDataBWLuma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]);
10808 dml2_printf("DML::%s: k=%0u RequiredPrefetchPixelDataBWChroma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]);
10809 dml2_printf("DML::%s: k=%0u prefetch_cursor_bw=%f\n", __func__, k, mode_lib->mp.prefetch_cursor_bw[k]);
10810 #endif
10811 }
10812
10813 for (k = 0; k <= s->num_active_planes - 1; k++)
10814 mode_lib->mp.final_flip_bw[k] = 0;
10815
10816 calculate_peak_bandwidth_required(
10817 &mode_lib->scratch,
10818 mode_lib->mp.urg_vactive_bandwidth_required,
10819 mode_lib->mp.urg_bandwidth_required,
10820 mode_lib->mp.non_urg_bandwidth_required,
10821
10822 // Input
10823 display_cfg,
10824 0, // inc_flip_bw
10825 s->num_active_planes,
10826 mode_lib->mp.NoOfDPP,
10827 mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0,
10828 mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1,
10829 mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0,
10830 mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1,
10831 mode_lib->mp.mall_prefetch_sdp_overhead_factor,
10832 mode_lib->mp.mall_prefetch_dram_overhead_factor,
10833 mode_lib->mp.SurfaceReadBandwidthLuma,
10834 mode_lib->mp.SurfaceReadBandwidthChroma,
10835 mode_lib->mp.RequiredPrefetchPixelDataBWLuma,
10836 mode_lib->mp.RequiredPrefetchPixelDataBWChroma,
10837 mode_lib->mp.cursor_bw,
10838 mode_lib->mp.dpte_row_bw,
10839 mode_lib->mp.meta_row_bw,
10840 mode_lib->mp.prefetch_cursor_bw,
10841 mode_lib->mp.prefetch_vmrow_bw,
10842 mode_lib->mp.final_flip_bw,
10843 mode_lib->mp.UrgentBurstFactorLuma,
10844 mode_lib->mp.UrgentBurstFactorChroma,
10845 mode_lib->mp.UrgentBurstFactorCursor,
10846 mode_lib->mp.UrgentBurstFactorLumaPre,
10847 mode_lib->mp.UrgentBurstFactorChromaPre,
10848 mode_lib->mp.UrgentBurstFactorCursorPre);
10849
10850 // Check urg peak bandwidth against available urg bw
10851 // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active)
10852 check_urgent_bandwidth_support(
10853 &mode_lib->mp.FractionOfUrgentBandwidth, // double* frac_urg_bandwidth
10854 &mode_lib->mp.FractionOfUrgentBandwidthMALL, // double* frac_urg_bandwidth_mall
10855 &s->dummy_boolean[1], // vactive bw ok
10856 &mode_lib->mp.PrefetchModeSupported, // prefetch bw ok
10857
10858 mode_lib->soc.mall_allocated_for_dcn_mbytes,
10859 mode_lib->mp.non_urg_bandwidth_required,
10860 mode_lib->mp.urg_vactive_bandwidth_required,
10861 mode_lib->mp.urg_bandwidth_required,
10862 mode_lib->mp.urg_bandwidth_available);
10863
10864 for (k = 0; k < s->num_active_planes; ++k) {
10865 if (mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]) {
10866 dml2_printf("DML::%s: k=%u, NotEnoughUrgentLatencyHidingPre = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]);
10867 mode_lib->mp.PrefetchModeSupported = false;
10868 }
10869 }
10870 } // prefetch schedule ok
10871
10872 // Prefetch schedule and prefetch bw ok, now check flip bw
10873 if (mode_lib->mp.PrefetchModeSupported == true) { // prefetch schedule and prefetch bw ok, now check flip bw
10874
10875 mode_lib->mp.BandwidthAvailableForImmediateFlip =
10876 get_bandwidth_available_for_immediate_flip(dml2_core_internal_soc_state_sys_active,
10877 mode_lib->mp.urg_bandwidth_required, // no flip
10878 mode_lib->mp.urg_bandwidth_available);
10879 mode_lib->mp.TotImmediateFlipBytes = 0;
10880 for (k = 0; k < s->num_active_planes; ++k) {
10881 if (display_cfg->plane_descriptors[k].immediate_flip) {
10882 s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes(
10883 s->HostVMInefficiencyFactor,
10884 mode_lib->mp.vm_bytes[k],
10885 mode_lib->mp.PixelPTEBytesPerRow[k],
10886 mode_lib->mp.meta_row_bytes[k]);
10887 } else {
10888 s->per_pipe_flip_bytes[k] = 0;
10889 }
10890 mode_lib->mp.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->mp.NoOfDPP[k];
10891 #ifdef __DML_VBA_DEBUG__
10892 dml2_printf("DML::%s: k = %u\n", __func__, k);
10893 dml2_printf("DML::%s: DPPPerSurface = %u\n", __func__, mode_lib->mp.NoOfDPP[k]);
10894 dml2_printf("DML::%s: vm_bytes = %u\n", __func__, mode_lib->mp.vm_bytes[k]);
10895 dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, mode_lib->mp.PixelPTEBytesPerRow[k]);
10896 dml2_printf("DML::%s: meta_row_bytes = %u\n", __func__, mode_lib->mp.meta_row_bytes[k]);
10897 dml2_printf("DML::%s: TotImmediateFlipBytes = %u\n", __func__, mode_lib->mp.TotImmediateFlipBytes);
10898 #endif
10899 }
10900 for (k = 0; k < s->num_active_planes; ++k) {
10901 CalculateFlipSchedule(
10902 &mode_lib->scratch,
10903 display_cfg->plane_descriptors[k].immediate_flip,
10904 0, // use_lb_flip_bw
10905 s->HostVMInefficiencyFactor,
10906 s->Tvm_trips_flip[k],
10907 s->Tr0_trips_flip[k],
10908 s->Tvm_trips_flip_rounded[k],
10909 s->Tr0_trips_flip_rounded[k],
10910 display_cfg->gpuvm_enable,
10911 mode_lib->mp.vm_bytes[k],
10912 mode_lib->mp.PixelPTEBytesPerRow[k],
10913 mode_lib->mp.BandwidthAvailableForImmediateFlip,
10914 mode_lib->mp.TotImmediateFlipBytes,
10915 display_cfg->plane_descriptors[k].pixel_format,
10916 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
10917 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
10918 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
10919 mode_lib->mp.Tno_bw[k],
10920 mode_lib->mp.dpte_row_height[k],
10921 mode_lib->mp.dpte_row_height_chroma[k],
10922 mode_lib->mp.use_one_row_for_frame_flip[k],
10923 mode_lib->ip.max_flip_time_us,
10924 s->per_pipe_flip_bytes[k],
10925 mode_lib->mp.meta_row_bytes[k],
10926 mode_lib->mp.meta_row_height[k],
10927 mode_lib->mp.meta_row_height_chroma[k],
10928 mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable,
10929
10930 // Output
10931 &mode_lib->mp.dst_y_per_vm_flip[k],
10932 &mode_lib->mp.dst_y_per_row_flip[k],
10933 &mode_lib->mp.final_flip_bw[k],
10934 &mode_lib->mp.ImmediateFlipSupportedForPipe[k]);
10935 }
10936
10937 calculate_peak_bandwidth_required(
10938 &mode_lib->scratch,
10939 s->dummy_bw,
10940 mode_lib->mp.urg_bandwidth_required_flip,
10941 mode_lib->mp.non_urg_bandwidth_required_flip,
10942
10943 // Input
10944 display_cfg,
10945 1, // inc_flip_bw
10946 s->num_active_planes,
10947 mode_lib->mp.NoOfDPP,
10948 mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0,
10949 mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1,
10950 mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0,
10951 mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1,
10952 mode_lib->mp.mall_prefetch_sdp_overhead_factor,
10953 mode_lib->mp.mall_prefetch_dram_overhead_factor,
10954 mode_lib->mp.SurfaceReadBandwidthLuma,
10955 mode_lib->mp.SurfaceReadBandwidthChroma,
10956 mode_lib->mp.RequiredPrefetchPixelDataBWLuma,
10957 mode_lib->mp.RequiredPrefetchPixelDataBWChroma,
10958 mode_lib->mp.cursor_bw,
10959 mode_lib->mp.dpte_row_bw,
10960 mode_lib->mp.meta_row_bw,
10961 mode_lib->mp.prefetch_cursor_bw,
10962 mode_lib->mp.prefetch_vmrow_bw,
10963 mode_lib->mp.final_flip_bw,
10964 mode_lib->mp.UrgentBurstFactorLuma,
10965 mode_lib->mp.UrgentBurstFactorChroma,
10966 mode_lib->mp.UrgentBurstFactorCursor,
10967 mode_lib->mp.UrgentBurstFactorLumaPre,
10968 mode_lib->mp.UrgentBurstFactorChromaPre,
10969 mode_lib->mp.UrgentBurstFactorCursorPre);
10970
10971 calculate_immediate_flip_bandwidth_support(
10972 &mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip, // double* frac_urg_bandwidth_flip
10973 &mode_lib->mp.ImmediateFlipSupported, // bool* flip_bandwidth_support_ok
10974
10975 dml2_core_internal_soc_state_sys_active,
10976 mode_lib->mp.urg_bandwidth_required_flip,
10977 mode_lib->mp.non_urg_bandwidth_required_flip,
10978 mode_lib->mp.urg_bandwidth_available);
10979
10980 for (k = 0; k < s->num_active_planes; ++k) {
10981 if (display_cfg->plane_descriptors[k].immediate_flip && mode_lib->mp.ImmediateFlipSupportedForPipe[k] == false) {
10982 mode_lib->mp.ImmediateFlipSupported = false;
10983 #ifdef __DML_VBA_DEBUG__
10984 dml2_printf("DML::%s: Pipe %0d not supporting iflip!\n", __func__, k);
10985 #endif
10986 }
10987 }
10988 } else { // flip or prefetch not support
10989 mode_lib->mp.ImmediateFlipSupported = false;
10990 }
10991
10992 // consider flip support is okay if the flip bw is ok or (when user does't require a iflip and there is no host vm)
10993 bool must_support_iflip = display_cfg->hostvm_enable || s->immediate_flip_required;
10994 mode_lib->mp.PrefetchAndImmediateFlipSupported = (mode_lib->mp.PrefetchModeSupported == true && (!must_support_iflip || mode_lib->mp.ImmediateFlipSupported));
10995
10996 #ifdef __DML_VBA_DEBUG__
10997 dml2_printf("DML::%s: PrefetchModeSupported = %u\n", __func__, mode_lib->mp.PrefetchModeSupported);
10998 for (k = 0; k < s->num_active_planes; ++k)
10999 dml2_printf("DML::%s: immediate_flip_required[%u] = %u\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip);
11000 dml2_printf("DML::%s: HostVMEnable = %u\n", __func__, display_cfg->hostvm_enable);
11001 dml2_printf("DML::%s: ImmediateFlipSupported = %u\n", __func__, mode_lib->mp.ImmediateFlipSupported);
11002 dml2_printf("DML::%s: PrefetchAndImmediateFlipSupported = %u\n", __func__, mode_lib->mp.PrefetchAndImmediateFlipSupported);
11003 #endif
11004 dml2_printf("DML::%s: Done one iteration: k=%d, MaxVStartupLines=%u\n", __func__, k, s->MaxVStartupLines[k]);
11005 }
11006
11007 for (k = 0; k < s->num_active_planes; ++k)
11008 dml2_printf("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
11009
11010 if (!mode_lib->mp.PrefetchAndImmediateFlipSupported) {
11011 dml2_printf("DML::%s: Bad, Prefetch and flip scheduling solution NOT found!\n", __func__);
11012 } else {
11013 dml2_printf("DML::%s: Good, Prefetch and flip scheduling solution found\n", __func__);
11014
11015 // DCC Configuration
11016 for (k = 0; k < s->num_active_planes; ++k) {
11017 #ifdef __DML_VBA_DEBUG__
11018 dml2_printf("DML::%s: Calculate DCC configuration for surface k=%u\n", __func__, k);
11019 #endif
11020 CalculateDCCConfiguration(
11021 display_cfg->plane_descriptors[k].surface.dcc.enable,
11022 display_cfg->overrides.dcc_programming_assumes_scan_direction_unknown,
11023 display_cfg->plane_descriptors[k].pixel_format,
11024 display_cfg->plane_descriptors[k].surface.plane0.width,
11025 display_cfg->plane_descriptors[k].surface.plane1.width,
11026 display_cfg->plane_descriptors[k].surface.plane0.height,
11027 display_cfg->plane_descriptors[k].surface.plane1.height,
11028 s->NomDETInKByte,
11029 mode_lib->mp.Read256BlockHeightY[k],
11030 mode_lib->mp.Read256BlockHeightC[k],
11031 display_cfg->plane_descriptors[k].surface.tiling,
11032 mode_lib->mp.BytePerPixelY[k],
11033 mode_lib->mp.BytePerPixelC[k],
11034 mode_lib->mp.BytePerPixelInDETY[k],
11035 mode_lib->mp.BytePerPixelInDETC[k],
11036 display_cfg->plane_descriptors[k].composition.rotation_angle,
11037
11038 /* Output */
11039 &mode_lib->mp.RequestLuma[k],
11040 &mode_lib->mp.RequestChroma[k],
11041 &mode_lib->mp.DCCYMaxUncompressedBlock[k],
11042 &mode_lib->mp.DCCCMaxUncompressedBlock[k],
11043 &mode_lib->mp.DCCYMaxCompressedBlock[k],
11044 &mode_lib->mp.DCCCMaxCompressedBlock[k],
11045 &mode_lib->mp.DCCYIndependentBlock[k],
11046 &mode_lib->mp.DCCCIndependentBlock[k]);
11047 }
11048
11049 //Watermarks and NB P-State/DRAM Clock Change Support
11050 s->mmSOCParameters.UrgentLatency = mode_lib->mp.UrgentLatency;
11051 s->mmSOCParameters.ExtraLatency = mode_lib->mp.ExtraLatency;
11052 s->mmSOCParameters.ExtraLatency_sr = mode_lib->mp.ExtraLatency_sr;
11053 s->mmSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us;
11054 s->mmSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
11055 s->mmSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us;
11056 s->mmSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us;
11057 s->mmSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us;
11058 s->mmSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us;
11059 s->mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us;
11060 s->mmSOCParameters.USRRetrainingLatency = 0; //0; //FIXME_STAGE2
11061 s->mmSOCParameters.SMNLatency = 0; //mode_lib->soc.smn_latency_us; //FIXME_STAGE2
11062
11063 CalculateWatermarks_params->display_cfg = display_cfg;
11064 CalculateWatermarks_params->USRRetrainingRequired = false/*FIXME_STAGE2 was: mode_lib->ms.policy.USRRetrainingRequired, no new dml2 replacement*/;
11065 CalculateWatermarks_params->NumberOfActiveSurfaces = s->num_active_planes;
11066 CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines;
11067 CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits;
11068 CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes;
11069 CalculateWatermarks_params->DCFCLK = mode_lib->mp.Dcfclk;
11070 CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings;
11071 CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change;
11072 CalculateWatermarks_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes;
11073 CalculateWatermarks_params->mmSOCParameters = s->mmSOCParameters;
11074 CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes;
11075 CalculateWatermarks_params->SOCCLK = s->SOCCLK;
11076 CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->mp.DCFCLKDeepSleep;
11077 CalculateWatermarks_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY;
11078 CalculateWatermarks_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC;
11079 CalculateWatermarks_params->SwathHeightY = mode_lib->mp.SwathHeightY;
11080 CalculateWatermarks_params->SwathHeightC = mode_lib->mp.SwathHeightC;
11081 //CalculateWatermarks_params->LBBitPerPixel = 57; //FIXME_STAGE2
11082 CalculateWatermarks_params->SwathWidthY = mode_lib->mp.SwathWidthY;
11083 CalculateWatermarks_params->SwathWidthC = mode_lib->mp.SwathWidthC;
11084 CalculateWatermarks_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY;
11085 CalculateWatermarks_params->BytePerPixelDETC = mode_lib->mp.BytePerPixelInDETC;
11086 CalculateWatermarks_params->DSTXAfterScaler = mode_lib->mp.DSTXAfterScaler;
11087 CalculateWatermarks_params->DSTYAfterScaler = mode_lib->mp.DSTYAfterScaler;
11088 CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->mp.UnboundedRequestEnabled;
11089 CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->mp.CompressedBufferSizeInkByte;
11090 CalculateWatermarks_params->meta_row_height_l = mode_lib->mp.meta_row_height;
11091 CalculateWatermarks_params->meta_row_height_c = mode_lib->mp.meta_row_height_chroma;
11092
11093 // Output
11094 CalculateWatermarks_params->Watermark = &mode_lib->mp.Watermark;
11095 CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->mp.DRAMClockChangeSupport;
11096 CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->mp.global_dram_clock_change_supported;
11097 CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported;
11098 CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->mp.SubViewportLinesNeededInMALL;
11099 CalculateWatermarks_params->FCLKChangeSupport = mode_lib->mp.FCLKChangeSupport;
11100 CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->mp.global_fclk_change_supported;
11101 CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &mode_lib->mp.MaxActiveFCLKChangeLatencySupported;
11102 CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->mp.USRRetrainingSupport;
11103 CalculateWatermarks_params->VActiveLatencyHidingMargin = 0;
11104
11105 CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params);
11106
11107 for (k = 0; k < s->num_active_planes; ++k) {
11108 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) {
11109 mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
11110 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark);
11111 mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
11112 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackFCLKChangeWatermark);
11113 } else {
11114 mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = 0;
11115 mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = 0;
11116 }
11117 }
11118
11119 dml2_printf("DML::%s: DEBUG stream_index = %0d\n", __func__, display_cfg->plane_descriptors[0].stream_index);
11120 dml2_printf("DML::%s: DEBUG PixelClock = %d kHz\n", __func__, (display_cfg->stream_descriptors[display_cfg->plane_descriptors[0].stream_index].timing.pixel_clock_khz));
11121
11122 //Display Pipeline Delivery Time in Prefetch, Groups
11123 CalculatePixelDeliveryTimes(
11124 display_cfg,
11125 cfg_support_info,
11126 s->num_active_planes,
11127 mode_lib->mp.VRatioPrefetchY,
11128 mode_lib->mp.VRatioPrefetchC,
11129 mode_lib->mp.swath_width_luma_ub,
11130 mode_lib->mp.swath_width_chroma_ub,
11131 mode_lib->mp.PSCL_THROUGHPUT,
11132 mode_lib->mp.PSCL_THROUGHPUT_CHROMA,
11133 mode_lib->mp.Dppclk,
11134 mode_lib->mp.BytePerPixelC,
11135 mode_lib->mp.req_per_swath_ub_l,
11136 mode_lib->mp.req_per_swath_ub_c,
11137
11138 /* Output */
11139 mode_lib->mp.DisplayPipeLineDeliveryTimeLuma,
11140 mode_lib->mp.DisplayPipeLineDeliveryTimeChroma,
11141 mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch,
11142 mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch,
11143 mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma,
11144 mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma,
11145 mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch,
11146 mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch);
11147
11148 CalculateMetaAndPTETimes_params->scratch = &mode_lib->scratch;
11149 CalculateMetaAndPTETimes_params->display_cfg = display_cfg;
11150 CalculateMetaAndPTETimes_params->NumberOfActiveSurfaces = s->num_active_planes;
11151 CalculateMetaAndPTETimes_params->use_one_row_for_frame = mode_lib->mp.use_one_row_for_frame;
11152 CalculateMetaAndPTETimes_params->dst_y_per_row_vblank = mode_lib->mp.dst_y_per_row_vblank;
11153 CalculateMetaAndPTETimes_params->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip;
11154 CalculateMetaAndPTETimes_params->BytePerPixelY = mode_lib->mp.BytePerPixelY;
11155 CalculateMetaAndPTETimes_params->BytePerPixelC = mode_lib->mp.BytePerPixelC;
11156 CalculateMetaAndPTETimes_params->dpte_row_height = mode_lib->mp.dpte_row_height;
11157 CalculateMetaAndPTETimes_params->dpte_row_height_chroma = mode_lib->mp.dpte_row_height_chroma;
11158 CalculateMetaAndPTETimes_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes;
11159 CalculateMetaAndPTETimes_params->PTERequestSizeY = mode_lib->mp.PTERequestSizeY;
11160 CalculateMetaAndPTETimes_params->PTERequestSizeC = mode_lib->mp.PTERequestSizeC;
11161 CalculateMetaAndPTETimes_params->PixelPTEReqWidthY = mode_lib->mp.PixelPTEReqWidthY;
11162 CalculateMetaAndPTETimes_params->PixelPTEReqHeightY = mode_lib->mp.PixelPTEReqHeightY;
11163 CalculateMetaAndPTETimes_params->PixelPTEReqWidthC = mode_lib->mp.PixelPTEReqWidthC;
11164 CalculateMetaAndPTETimes_params->PixelPTEReqHeightC = mode_lib->mp.PixelPTEReqHeightC;
11165 CalculateMetaAndPTETimes_params->dpte_row_width_luma_ub = mode_lib->mp.dpte_row_width_luma_ub;
11166 CalculateMetaAndPTETimes_params->dpte_row_width_chroma_ub = mode_lib->mp.dpte_row_width_chroma_ub;
11167 CalculateMetaAndPTETimes_params->tdlut_groups_per_2row_ub = s->tdlut_groups_per_2row_ub;
11168 CalculateMetaAndPTETimes_params->mrq_present = mode_lib->ip.dcn_mrq_present;
11169
11170 CalculateMetaAndPTETimes_params->MetaChunkSize = mode_lib->ip.meta_chunk_size_kbytes;
11171 CalculateMetaAndPTETimes_params->MinMetaChunkSizeBytes = mode_lib->ip.min_meta_chunk_size_bytes;
11172 CalculateMetaAndPTETimes_params->meta_row_width = mode_lib->mp.meta_row_width;
11173 CalculateMetaAndPTETimes_params->meta_row_width_chroma = mode_lib->mp.meta_row_width_chroma;
11174 CalculateMetaAndPTETimes_params->meta_row_height = mode_lib->mp.meta_row_height;
11175 CalculateMetaAndPTETimes_params->meta_row_height_chroma = mode_lib->mp.meta_row_height_chroma;
11176 CalculateMetaAndPTETimes_params->meta_req_width = mode_lib->mp.meta_req_width;
11177 CalculateMetaAndPTETimes_params->meta_req_width_chroma = mode_lib->mp.meta_req_width_chroma;
11178 CalculateMetaAndPTETimes_params->meta_req_height = mode_lib->mp.meta_req_height;
11179 CalculateMetaAndPTETimes_params->meta_req_height_chroma = mode_lib->mp.meta_req_height_chroma;
11180
11181 CalculateMetaAndPTETimes_params->time_per_tdlut_group = mode_lib->mp.time_per_tdlut_group;
11182 CalculateMetaAndPTETimes_params->DST_Y_PER_PTE_ROW_NOM_L = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L;
11183 CalculateMetaAndPTETimes_params->DST_Y_PER_PTE_ROW_NOM_C = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C;
11184 CalculateMetaAndPTETimes_params->time_per_pte_group_nom_luma = mode_lib->mp.time_per_pte_group_nom_luma;
11185 CalculateMetaAndPTETimes_params->time_per_pte_group_vblank_luma = mode_lib->mp.time_per_pte_group_vblank_luma;
11186 CalculateMetaAndPTETimes_params->time_per_pte_group_flip_luma = mode_lib->mp.time_per_pte_group_flip_luma;
11187 CalculateMetaAndPTETimes_params->time_per_pte_group_nom_chroma = mode_lib->mp.time_per_pte_group_nom_chroma;
11188 CalculateMetaAndPTETimes_params->time_per_pte_group_vblank_chroma = mode_lib->mp.time_per_pte_group_vblank_chroma;
11189 CalculateMetaAndPTETimes_params->time_per_pte_group_flip_chroma = mode_lib->mp.time_per_pte_group_flip_chroma;
11190 CalculateMetaAndPTETimes_params->DST_Y_PER_META_ROW_NOM_L = mode_lib->mp.DST_Y_PER_META_ROW_NOM_L;
11191 CalculateMetaAndPTETimes_params->DST_Y_PER_META_ROW_NOM_C = mode_lib->mp.DST_Y_PER_META_ROW_NOM_C;
11192 CalculateMetaAndPTETimes_params->TimePerMetaChunkNominal = mode_lib->mp.TimePerMetaChunkNominal;
11193 CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkNominal = mode_lib->mp.TimePerChromaMetaChunkNominal;
11194 CalculateMetaAndPTETimes_params->TimePerMetaChunkVBlank = mode_lib->mp.TimePerMetaChunkVBlank;
11195 CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkVBlank = mode_lib->mp.TimePerChromaMetaChunkVBlank;
11196 CalculateMetaAndPTETimes_params->TimePerMetaChunkFlip = mode_lib->mp.TimePerMetaChunkFlip;
11197 CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkFlip = mode_lib->mp.TimePerChromaMetaChunkFlip;
11198
11199 CalculateMetaAndPTETimes(CalculateMetaAndPTETimes_params);
11200
11201 CalculateVMGroupAndRequestTimes(
11202 display_cfg,
11203 s->num_active_planes,
11204 mode_lib->mp.BytePerPixelC,
11205 mode_lib->mp.dst_y_per_vm_vblank,
11206 mode_lib->mp.dst_y_per_vm_flip,
11207 mode_lib->mp.dpte_row_width_luma_ub,
11208 mode_lib->mp.dpte_row_width_chroma_ub,
11209 mode_lib->mp.vm_group_bytes,
11210 mode_lib->mp.dpde0_bytes_per_frame_ub_l,
11211 mode_lib->mp.dpde0_bytes_per_frame_ub_c,
11212 s->tdlut_pte_bytes_per_frame,
11213 mode_lib->mp.meta_pte_bytes_per_frame_ub_l,
11214 mode_lib->mp.meta_pte_bytes_per_frame_ub_c,
11215 mode_lib->ip.dcn_mrq_present,
11216
11217 /* Output */
11218 mode_lib->mp.TimePerVMGroupVBlank,
11219 mode_lib->mp.TimePerVMGroupFlip,
11220 mode_lib->mp.TimePerVMRequestVBlank,
11221 mode_lib->mp.TimePerVMRequestFlip);
11222
11223 // VStartup Adjustment
11224 for (k = 0; k < s->num_active_planes; ++k) {
11225
11226 mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TWait[k] + mode_lib->mp.ExtraLatency;
11227 if (!display_cfg->plane_descriptors[k].dynamic_meta_data.enable)
11228 mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TCalc + mode_lib->mp.MinTTUVBlank[k];
11229
11230 #ifdef __DML_VBA_DEBUG__
11231 dml2_printf("DML::%s: k=%u, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]);
11232 #endif
11233 s->Tvstartup_margin = (s->MaxVStartupLines[k] - mode_lib->mp.VStartupMin[k]) * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
11234 mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.MinTTUVBlank[k] + s->Tvstartup_margin;
11235
11236 #ifdef __DML_VBA_DEBUG__
11237 dml2_printf("DML::%s: k=%u, Tvstartup_margin = %f\n", __func__, k, s->Tvstartup_margin);
11238 dml2_printf("DML::%s: k=%u, MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
11239 dml2_printf("DML::%s: k=%u, MinTTUVBlank = %f\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]);
11240 #endif
11241
11242 mode_lib->mp.Tdmdl[k] = mode_lib->mp.Tdmdl[k] + s->Tvstartup_margin;
11243 if (display_cfg->plane_descriptors[k].dynamic_meta_data.enable && mode_lib->ip.dynamic_metadata_vm_enabled) {
11244 mode_lib->mp.Tdmdl_vm[k] = mode_lib->mp.Tdmdl_vm[k] + s->Tvstartup_margin;
11245 }
11246
11247 bool isInterlaceTiming = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !mode_lib->ip.ptoi_supported);
11248
11249 // The actual positioning of the vstartup
11250 mode_lib->mp.VStartup[k] = (isInterlaceTiming ? (2 * s->MaxVStartupLines[k]) : s->MaxVStartupLines[k]);
11251
11252 s->dlg_vblank_start = ((isInterlaceTiming ? math_floor2((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch) / 2.0, 1.0) :
11253 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total) - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch);
11254 s->LSetup = math_floor2(4.0 * mode_lib->mp.TSetup[k] / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)), 1.0) / 4.0;
11255 s->blank_lines_remaining = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active) - mode_lib->mp.VStartup[k];
11256
11257 if (s->blank_lines_remaining < 0) {
11258 dml2_printf("ERROR: Vstartup is larger than vblank!?\n");
11259 s->blank_lines_remaining = 0;
11260 DML2_ASSERT(0);
11261 }
11262 mode_lib->mp.MIN_DST_Y_NEXT_START[k] = s->dlg_vblank_start + s->blank_lines_remaining + s->LSetup;
11263
11264 // debug only
11265 if (((mode_lib->mp.VUpdateOffsetPix[k] + mode_lib->mp.VUpdateWidthPix[k] + (double) mode_lib->mp.VReadyOffsetPix[k]) / display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) <=
11266 (isInterlaceTiming ?
11267 math_floor2((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch - mode_lib->mp.VStartup[k]) / 2.0, 1.0) :
11268 (int)(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch - mode_lib->mp.VStartup[k]))) {
11269 mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = true;
11270 } else {
11271 mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = false;
11272 }
11273 #ifdef __DML_VBA_DEBUG__
11274 dml2_printf("DML::%s: k=%u, VStartup = %u (max)\n", __func__, k, mode_lib->mp.VStartup[k]);
11275 dml2_printf("DML::%s: k=%u, VStartupMin = %u (max)\n", __func__, k, mode_lib->mp.VStartupMin[k]);
11276 dml2_printf("DML::%s: k=%u, VUpdateOffsetPix = %u\n", __func__, k, mode_lib->mp.VUpdateOffsetPix[k]);
11277 dml2_printf("DML::%s: k=%u, VUpdateWidthPix = %u\n", __func__, k, mode_lib->mp.VUpdateWidthPix[k]);
11278 dml2_printf("DML::%s: k=%u, VReadyOffsetPix = %u\n", __func__, k, mode_lib->mp.VReadyOffsetPix[k]);
11279 dml2_printf("DML::%s: k=%u, HTotal = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total);
11280 dml2_printf("DML::%s: k=%u, VTotal = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total);
11281 dml2_printf("DML::%s: k=%u, VActive = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active);
11282 dml2_printf("DML::%s: k=%u, VFrontPorch = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch);
11283 dml2_printf("DML::%s: k=%u, TSetup = %f\n", __func__, k, mode_lib->mp.TSetup[k]);
11284 dml2_printf("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f\n", __func__, k, mode_lib->mp.MIN_DST_Y_NEXT_START[k]);
11285 dml2_printf("DML::%s: k=%u, VREADY_AT_OR_AFTER_VSYNC = %u\n", __func__, k, mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k]);
11286 #endif
11287 }
11288
11289 //Maximum Bandwidth Used
11290 s->TotalWRBandwidth = 0;
11291 s->WRBandwidth = 0;
11292 for (k = 0; k < s->num_active_planes; ++k) {
11293 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format == dml2_444_32) {
11294 s->WRBandwidth = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width /
11295 (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 4;
11296 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) {
11297 s->WRBandwidth = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width /
11298 (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 8;
11299 }
11300 s->TotalWRBandwidth = s->TotalWRBandwidth + s->WRBandwidth;
11301 }
11302
11303 mode_lib->mp.TotalDataReadBandwidth = 0;
11304 for (k = 0; k < s->num_active_planes; ++k) {
11305 mode_lib->mp.TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth + mode_lib->mp.SurfaceReadBandwidthLuma[k] + mode_lib->mp.SurfaceReadBandwidthChroma[k];
11306 #ifdef __DML_VBA_DEBUG__
11307 dml2_printf("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, mode_lib->mp.TotalDataReadBandwidth);
11308 dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthLuma[k]);
11309 dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthChroma[k]);
11310 #endif
11311 }
11312
11313 CalculateStutterEfficiency_params->display_cfg = display_cfg;
11314 CalculateStutterEfficiency_params->CompressedBufferSizeInkByte = mode_lib->mp.CompressedBufferSizeInkByte;
11315 CalculateStutterEfficiency_params->UnboundedRequestEnabled = mode_lib->mp.UnboundedRequestEnabled;
11316 CalculateStutterEfficiency_params->MetaFIFOSizeInKEntries = mode_lib->ip.meta_fifo_size_in_kentries;
11317 CalculateStutterEfficiency_params->ZeroSizeBufferEntries = mode_lib->ip.zero_size_buffer_entries;
11318 CalculateStutterEfficiency_params->PixelChunkSizeInKByte = mode_lib->ip.pixel_chunk_size_kbytes;
11319 CalculateStutterEfficiency_params->NumberOfActiveSurfaces = s->num_active_planes;
11320 CalculateStutterEfficiency_params->ROBBufferSizeInKByte = mode_lib->ip.rob_buffer_size_kbytes;
11321 CalculateStutterEfficiency_params->TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth;
11322 CalculateStutterEfficiency_params->DCFCLK = mode_lib->mp.Dcfclk;
11323 CalculateStutterEfficiency_params->ReturnBW = mode_lib->mp.urg_bandwidth_available_min[dml2_core_internal_soc_state_sys_active];
11324 CalculateStutterEfficiency_params->CompbufReservedSpace64B = mode_lib->mp.compbuf_reserved_space_64b;
11325 CalculateStutterEfficiency_params->CompbufReservedSpaceZs = mode_lib->ip.compbuf_reserved_space_zs;
11326 CalculateStutterEfficiency_params->SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us;
11327 CalculateStutterEfficiency_params->SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us;
11328 CalculateStutterEfficiency_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings;
11329 CalculateStutterEfficiency_params->StutterEnterPlusExitWatermark = mode_lib->mp.Watermark.StutterEnterPlusExitWatermark;
11330 CalculateStutterEfficiency_params->Z8StutterEnterPlusExitWatermark = mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark;
11331 CalculateStutterEfficiency_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
11332 CalculateStutterEfficiency_params->MinTTUVBlank = mode_lib->mp.MinTTUVBlank;
11333 CalculateStutterEfficiency_params->DPPPerSurface = mode_lib->mp.NoOfDPP;
11334 CalculateStutterEfficiency_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY;
11335 CalculateStutterEfficiency_params->BytePerPixelY = mode_lib->mp.BytePerPixelY;
11336 CalculateStutterEfficiency_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY;
11337 CalculateStutterEfficiency_params->SwathWidthY = mode_lib->mp.SwathWidthY;
11338 CalculateStutterEfficiency_params->SwathHeightY = mode_lib->mp.SwathHeightY;
11339 CalculateStutterEfficiency_params->SwathHeightC = mode_lib->mp.SwathHeightC;
11340 CalculateStutterEfficiency_params->BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY;
11341 CalculateStutterEfficiency_params->BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY;
11342 CalculateStutterEfficiency_params->BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC;
11343 CalculateStutterEfficiency_params->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC;
11344 CalculateStutterEfficiency_params->DCCYMaxUncompressedBlock = mode_lib->mp.DCCYMaxUncompressedBlock;
11345 CalculateStutterEfficiency_params->DCCCMaxUncompressedBlock = mode_lib->mp.DCCCMaxUncompressedBlock;
11346 CalculateStutterEfficiency_params->ReadBandwidthSurfaceLuma = mode_lib->mp.SurfaceReadBandwidthLuma;
11347 CalculateStutterEfficiency_params->ReadBandwidthSurfaceChroma = mode_lib->mp.SurfaceReadBandwidthChroma;
11348 CalculateStutterEfficiency_params->dpte_row_bw = mode_lib->mp.dpte_row_bw;
11349 CalculateStutterEfficiency_params->meta_row_bw = mode_lib->mp.meta_row_bw;
11350 CalculateStutterEfficiency_params->rob_alloc_compressed = mode_lib->ip.dcn_mrq_present;
11351
11352 // output
11353 CalculateStutterEfficiency_params->StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.StutterEfficiencyNotIncludingVBlank;
11354 CalculateStutterEfficiency_params->StutterEfficiency = &mode_lib->mp.StutterEfficiency;
11355 CalculateStutterEfficiency_params->NumberOfStutterBurstsPerFrame = &mode_lib->mp.NumberOfStutterBurstsPerFrame;
11356 CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank;
11357 CalculateStutterEfficiency_params->Z8StutterEfficiency = &mode_lib->mp.Z8StutterEfficiency;
11358 CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &mode_lib->mp.Z8NumberOfStutterBurstsPerFrame;
11359 CalculateStutterEfficiency_params->StutterPeriod = &mode_lib->mp.StutterPeriod;
11360 CalculateStutterEfficiency_params->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = &mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE;
11361
11362 // Stutter Efficiency
11363 CalculateStutterEfficiency(&mode_lib->scratch, CalculateStutterEfficiency_params);
11364
11365 #ifdef __DML_VBA_ALLOW_DELTA__
11366 // Calculate z8 stutter eff assuming 0 reserved space
11367 CalculateStutterEfficiency_params->CompbufReservedSpace64B = 0;
11368 CalculateStutterEfficiency_params->CompbufReservedSpaceZs = 0;
11369
11370 CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlankBestCase;
11371 CalculateStutterEfficiency_params->Z8StutterEfficiency = &mode_lib->mp.Z8StutterEfficiencyBestCase;
11372 CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase;
11373 CalculateStutterEfficiency_params->StutterPeriod = &mode_lib->mp.StutterPeriodBestCase;
11374
11375 // Stutter Efficiency
11376 CalculateStutterEfficiency(&mode_lib->scratch, CalculateStutterEfficiency_params);
11377 #else
11378 mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlankBestCase = mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank;
11379 mode_lib->mp.Z8StutterEfficiencyBestCase = mode_lib->mp.Z8StutterEfficiency;
11380 mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase = mode_lib->mp.Z8NumberOfStutterBurstsPerFrame;
11381 mode_lib->mp.StutterPeriodBestCase = mode_lib->mp.StutterPeriod;
11382 #endif
11383 } // PrefetchAndImmediateFlipSupported
11384
11385 const long min_return_uclk_cycles = 83;
11386 const long min_return_fclk_cycles = 75;
11387 double max_fclk_mhz = min_clk_table->max_clocks_khz.fclk / 1000.0;
11388 double max_uclk_mhz = mode_lib->soc.clk_table.uclk.clk_values_khz[mode_lib->soc.clk_table.uclk.num_clk_values - 1] / 1000.0;
11389 double hard_minimum_dcfclk_mhz = (double)min_clk_table->dram_bw_table.entries[0].min_dcfclk_khz / 1000.0;
11390 double min_return_latency_in_DCFCLK_cycles = (min_return_uclk_cycles / max_uclk_mhz + min_return_fclk_cycles / max_fclk_mhz) * hard_minimum_dcfclk_mhz;
11391 mode_lib->mp.min_return_latency_in_dcfclk = (unsigned int)min_return_latency_in_DCFCLK_cycles;
11392 mode_lib->mp.dcfclk_deep_sleep_hysteresis = (unsigned int)math_max2(32, (double)mode_lib->ip.pixel_chunk_size_kbytes * 1024 * 3 / 4 / 64 - min_return_latency_in_DCFCLK_cycles);
11393 mode_lib->mp.dcfclk_deep_sleep_hysteresis = 255;
11394 DML2_ASSERT(mode_lib->mp.dcfclk_deep_sleep_hysteresis < 256);
11395
11396 #ifdef __DML_VBA_DEBUG__
11397 dml2_printf("DML::%s: max_fclk_mhz = %f\n", __func__, max_fclk_mhz);
11398 dml2_printf("DML::%s: max_uclk_mhz = %f\n", __func__, max_uclk_mhz);
11399 dml2_printf("DML::%s: hard_minimum_dcfclk_mhz = %f\n", __func__, hard_minimum_dcfclk_mhz);
11400 dml2_printf("DML::%s: min_return_uclk_cycles = %d\n", __func__, min_return_uclk_cycles);
11401 dml2_printf("DML::%s: min_return_fclk_cycles = %d\n", __func__, min_return_fclk_cycles);
11402 dml2_printf("DML::%s: min_return_latency_in_DCFCLK_cycles = %f\n", __func__, min_return_latency_in_DCFCLK_cycles);
11403 dml2_printf("DML::%s: dcfclk_deep_sleep_hysteresis = %d \n", __func__, mode_lib->mp.dcfclk_deep_sleep_hysteresis);
11404 dml2_printf("DML::%s: --- END --- \n", __func__);
11405 #endif
11406
11407 return (in_out_params->mode_lib->mp.PrefetchAndImmediateFlipSupported);
11408 }
11409
dml_is_dual_plane(enum dml2_source_format_class source_format)11410 static bool dml_is_dual_plane(enum dml2_source_format_class source_format)
11411 {
11412 bool ret_val = 0;
11413
11414 if ((source_format == dml2_420_12) || (source_format == dml2_420_8) || (source_format == dml2_420_10) || (source_format == dml2_rgbe_alpha))
11415 ret_val = 1;
11416
11417 return ret_val;
11418 }
11419
dml_get_plane_idx(const struct dml2_core_internal_display_mode_lib * mode_lib,unsigned int pipe_idx)11420 static unsigned int dml_get_plane_idx(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx)
11421 {
11422 unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx];
11423 return plane_idx;
11424 }
11425
rq_dlg_get_wm_regs(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_dchub_watermark_regs * wm_regs)11426 static void rq_dlg_get_wm_regs(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *wm_regs)
11427 {
11428 double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz;
11429
11430 wm_regs->fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz);
11431 wm_regs->sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz);
11432 wm_regs->sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz);
11433 wm_regs->temp_read_or_ppt = 0;
11434 wm_regs->uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz);
11435 wm_regs->urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz);
11436 }
11437
log_and_substract_if_non_zero(unsigned int a,unsigned int subtrahend)11438 static unsigned int log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend)
11439 {
11440 if (a == 0)
11441 return 0;
11442
11443 return (math_log2_approx(a) - subtrahend);
11444 }
11445
dml2_core_shared_cursor_dlg_reg(struct dml2_cursor_dlg_regs * cursor_dlg_regs,const struct dml2_get_cursor_dlg_reg * p)11446 void dml2_core_shared_cursor_dlg_reg(struct dml2_cursor_dlg_regs *cursor_dlg_regs, const struct dml2_get_cursor_dlg_reg *p)
11447 {
11448 int dst_x_offset = (int)((p->cursor_x_position + (p->cursor_stereo_en == 0 ? 0 : math_max2(p->cursor_primary_offset, p->cursor_secondary_offset)) -
11449 (p->cursor_hotspot_x * (p->cursor_2x_magnify == 0 ? 1 : 2))) * p->dlg_refclk_mhz / p->pixel_rate_mhz / p->hratio);
11450 cursor_dlg_regs->dst_x_offset = (unsigned int)((dst_x_offset > 0) ? dst_x_offset : 0);
11451
11452 #ifdef __DML_VBA_DEBUG__
11453 dml2_printf("DML_DLG::%s: cursor_x_position=%d\n", __func__, p->cursor_x_position);
11454 dml2_printf("DML_DLG::%s: dlg_refclk_mhz=%f\n", __func__, p->dlg_refclk_mhz);
11455 dml2_printf("DML_DLG::%s: pixel_rate_mhz=%f\n", __func__, p->pixel_rate_mhz);
11456 dml2_printf("DML_DLG::%s: dst_x_offset=%d\n", __func__, dst_x_offset);
11457 dml2_printf("DML_DLG::%s: dst_x_offset=%d (reg)\n", __func__, cursor_dlg_regs->dst_x_offset);
11458 #endif
11459
11460 cursor_dlg_regs->chunk_hdl_adjust = 3;
11461 cursor_dlg_regs->dst_y_offset = 0;
11462
11463 cursor_dlg_regs->qos_level_fixed = 8;
11464 cursor_dlg_regs->qos_ramp_disable = 0;
11465 }
11466
rq_dlg_get_rq_reg(struct dml2_display_rq_regs * rq_regs,const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,unsigned int pipe_idx)11467 static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs,
11468 const struct dml2_display_cfg *display_cfg,
11469 const struct dml2_core_internal_display_mode_lib *mode_lib,
11470 unsigned int pipe_idx)
11471 {
11472 dml2_printf("DML_DLG::%s: Calculation for pipe[%d] start\n", __func__, pipe_idx);
11473
11474 unsigned int plane_idx = dml_get_plane_idx(mode_lib, pipe_idx);
11475 enum dml2_source_format_class source_format = display_cfg->plane_descriptors[plane_idx].pixel_format;
11476 enum dml2_swizzle_mode sw_mode = display_cfg->plane_descriptors[plane_idx].surface.tiling;
11477 bool dual_plane = dml_is_dual_plane((enum dml2_source_format_class)(source_format));
11478
11479 unsigned int pixel_chunk_bytes = 0;
11480 unsigned int min_pixel_chunk_bytes = 0;
11481 unsigned int dpte_group_bytes = 0;
11482 unsigned int mpte_group_bytes = 0;
11483
11484 unsigned int p1_pixel_chunk_bytes = 0;
11485 unsigned int p1_min_pixel_chunk_bytes = 0;
11486 unsigned int p1_dpte_group_bytes = 0;
11487 unsigned int p1_mpte_group_bytes = 0;
11488
11489 pixel_chunk_bytes = (unsigned int)(mode_lib->ip.pixel_chunk_size_kbytes * 1024);
11490 min_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.min_pixel_chunk_size_bytes);
11491
11492 if (pixel_chunk_bytes == 64 * 1024)
11493 min_pixel_chunk_bytes = 0;
11494
11495 dpte_group_bytes = (unsigned int)(mode_lib->mp.dpte_group_bytes[mode_lib->mp.pipe_plane[pipe_idx]]);
11496 mpte_group_bytes = (unsigned int)(mode_lib->mp.vm_group_bytes[mode_lib->mp.pipe_plane[pipe_idx]]);
11497
11498 p1_pixel_chunk_bytes = pixel_chunk_bytes;
11499 p1_min_pixel_chunk_bytes = min_pixel_chunk_bytes;
11500 p1_dpte_group_bytes = dpte_group_bytes;
11501 p1_mpte_group_bytes = mpte_group_bytes;
11502
11503 if (source_format == dml2_rgbe_alpha)
11504 p1_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.alpha_pixel_chunk_size_kbytes * 1024);
11505
11506 rq_regs->unbounded_request_enabled = mode_lib->mp.UnboundedRequestEnabled;
11507 rq_regs->rq_regs_l.chunk_size = log_and_substract_if_non_zero(pixel_chunk_bytes, 10);
11508 rq_regs->rq_regs_c.chunk_size = log_and_substract_if_non_zero(p1_pixel_chunk_bytes, 10);
11509
11510 if (min_pixel_chunk_bytes == 0)
11511 rq_regs->rq_regs_l.min_chunk_size = 0;
11512 else
11513 rq_regs->rq_regs_l.min_chunk_size = log_and_substract_if_non_zero(min_pixel_chunk_bytes, 8 - 1);
11514
11515 if (p1_min_pixel_chunk_bytes == 0)
11516 rq_regs->rq_regs_c.min_chunk_size = 0;
11517 else
11518 rq_regs->rq_regs_c.min_chunk_size = log_and_substract_if_non_zero(p1_min_pixel_chunk_bytes, 8 - 1);
11519
11520 rq_regs->rq_regs_l.dpte_group_size = log_and_substract_if_non_zero(dpte_group_bytes, 6);
11521 rq_regs->rq_regs_l.mpte_group_size = log_and_substract_if_non_zero(mpte_group_bytes, 6);
11522 rq_regs->rq_regs_c.dpte_group_size = log_and_substract_if_non_zero(p1_dpte_group_bytes, 6);
11523 rq_regs->rq_regs_c.mpte_group_size = log_and_substract_if_non_zero(p1_mpte_group_bytes, 6);
11524
11525 unsigned int detile_buf_size_in_bytes = (unsigned int)(mode_lib->mp.DETBufferSizeInKByte[mode_lib->mp.pipe_plane[pipe_idx]] * 1024);
11526 unsigned int detile_buf_plane1_addr = 0;
11527
11528 if (sw_mode == dml2_sw_linear && display_cfg->gpuvm_enable) {
11529 unsigned int p0_pte_row_height_linear = (unsigned int)(mode_lib->mp.dpte_row_height_linear[mode_lib->mp.pipe_plane[pipe_idx]]);
11530 #ifdef __DML_VBA_DEBUG__
11531 dml2_printf("DML_DLG: %s: p0_pte_row_height_linear = %u\n", __func__, p0_pte_row_height_linear);
11532 #endif
11533 DML2_ASSERT(p0_pte_row_height_linear >= 8);
11534
11535 rq_regs->rq_regs_l.pte_row_height_linear = math_log2_approx(p0_pte_row_height_linear) - 3;
11536 if (dual_plane) {
11537 unsigned int p1_pte_row_height_linear = (unsigned int)(mode_lib->mp.dpte_row_height_linear_chroma[mode_lib->mp.pipe_plane[pipe_idx]]);
11538 #ifdef __DML_VBA_DEBUG__
11539 dml2_printf("DML_DLG: %s: p1_pte_row_height_linear = %u\n", __func__, p1_pte_row_height_linear);
11540 #endif
11541 if (sw_mode == dml2_sw_linear) {
11542 DML2_ASSERT(p1_pte_row_height_linear >= 8);
11543 }
11544
11545 rq_regs->rq_regs_c.pte_row_height_linear = math_log2_approx(p1_pte_row_height_linear) - 3;
11546 }
11547 } else {
11548 rq_regs->rq_regs_l.pte_row_height_linear = 0;
11549 rq_regs->rq_regs_c.pte_row_height_linear = 0;
11550 }
11551
11552 rq_regs->rq_regs_l.swath_height = log_and_substract_if_non_zero(mode_lib->mp.SwathHeightY[mode_lib->mp.pipe_plane[pipe_idx]], 0);
11553 rq_regs->rq_regs_c.swath_height = log_and_substract_if_non_zero(mode_lib->mp.SwathHeightC[mode_lib->mp.pipe_plane[pipe_idx]], 0);
11554
11555 // FIXME_DCN4, programming guide has dGPU condition
11556 if (pixel_chunk_bytes >= 32 * 1024 || (dual_plane && p1_pixel_chunk_bytes >= 32 * 1024)) { //32kb
11557 rq_regs->drq_expansion_mode = 0;
11558 } else {
11559 rq_regs->drq_expansion_mode = 2;
11560 }
11561 rq_regs->prq_expansion_mode = 1;
11562 rq_regs->crq_expansion_mode = 1;
11563 rq_regs->mrq_expansion_mode = 1;
11564
11565 double stored_swath_l_bytes = mode_lib->mp.DETBufferSizeY[mode_lib->mp.pipe_plane[pipe_idx]];
11566 double stored_swath_c_bytes = mode_lib->mp.DETBufferSizeC[mode_lib->mp.pipe_plane[pipe_idx]];
11567 bool is_phantom_pipe = dml_get_is_phantom_pipe(display_cfg, mode_lib, pipe_idx);
11568
11569 // Note: detile_buf_plane1_addr is in unit of 1KB
11570 if (dual_plane) {
11571 if (is_phantom_pipe) {
11572 detile_buf_plane1_addr = (unsigned int)((1024.0 * 1024.0) / 2.0 / 1024.0); // half to chroma
11573 } else {
11574 if (stored_swath_l_bytes / stored_swath_c_bytes <= 1.5) {
11575 detile_buf_plane1_addr = (unsigned int)(detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma
11576 #ifdef __DML_VBA_DEBUG__
11577 dml2_printf("DML_DLG: %s: detile_buf_plane1_addr = %d (1/2 to chroma)\n", __func__, detile_buf_plane1_addr);
11578 #endif
11579 } else {
11580 detile_buf_plane1_addr = (unsigned int)(dml_round_to_multiple((unsigned int)((2.0 * detile_buf_size_in_bytes) / 3.0), 1024, 0) / 1024.0); // 2/3 to luma
11581 #ifdef __DML_VBA_DEBUG__
11582 dml2_printf("DML_DLG: %s: detile_buf_plane1_addr = %d (1/3 chroma)\n", __func__, detile_buf_plane1_addr);
11583 #endif
11584 }
11585 }
11586 }
11587 rq_regs->plane1_base_address = detile_buf_plane1_addr;
11588
11589 #ifdef __DML_VBA_DEBUG__
11590 dml2_printf("DML_DLG: %s: is_phantom_pipe = %d\n", __func__, is_phantom_pipe);
11591 dml2_printf("DML_DLG: %s: stored_swath_l_bytes = %f\n", __func__, stored_swath_l_bytes);
11592 dml2_printf("DML_DLG: %s: stored_swath_c_bytes = %f\n", __func__, stored_swath_c_bytes);
11593 dml2_printf("DML_DLG: %s: detile_buf_size_in_bytes = %d\n", __func__, detile_buf_size_in_bytes);
11594 dml2_printf("DML_DLG: %s: detile_buf_plane1_addr = %d\n", __func__, detile_buf_plane1_addr);
11595 dml2_printf("DML_DLG: %s: plane1_base_address = %d\n", __func__, rq_regs->plane1_base_address);
11596 #endif
11597 //dml2_printf_rq_regs_st(rq_regs);
11598 dml2_printf("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx);
11599 }
11600
rq_dlg_get_dlg_reg(struct dml2_core_internal_scratch * s,struct dml2_display_dlg_regs * disp_dlg_regs,struct dml2_display_ttu_regs * disp_ttu_regs,const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,const unsigned int pipe_idx)11601 static void rq_dlg_get_dlg_reg(
11602 struct dml2_core_internal_scratch *s,
11603 struct dml2_display_dlg_regs *disp_dlg_regs,
11604 struct dml2_display_ttu_regs *disp_ttu_regs,
11605 const struct dml2_display_cfg *display_cfg,
11606 const struct dml2_core_internal_display_mode_lib *mode_lib,
11607 const unsigned int pipe_idx)
11608 {
11609 struct dml2_core_shared_rq_dlg_get_dlg_reg_locals *l = &s->rq_dlg_get_dlg_reg_locals;
11610
11611 memset(l, 0, sizeof(struct dml2_core_shared_rq_dlg_get_dlg_reg_locals));
11612
11613 dml2_printf("DML_DLG::%s: Calculation for pipe_idx=%d\n", __func__, pipe_idx);
11614
11615 l->plane_idx = dml_get_plane_idx(mode_lib, pipe_idx);
11616 dml2_assert(l->plane_idx < DML2_MAX_PLANES);
11617
11618 l->source_format = dml2_444_8;
11619 l->dual_plane = dml_is_dual_plane(l->source_format);
11620 l->odm_mode = dml2_odm_mode_bypass;
11621
11622 l->htotal = 0;
11623 l->hactive = 0;
11624 l->hblank_end = 0;
11625 l->vblank_end = 0;
11626 l->interlaced = false;
11627 l->pclk_freq_in_mhz = 0.0;
11628 l->refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz;
11629 l->ref_freq_to_pix_freq = 0.0;
11630
11631 if (l->plane_idx < DML2_MAX_PLANES) {
11632
11633 l->timing = &display_cfg->stream_descriptors[display_cfg->plane_descriptors[l->plane_idx].stream_index].timing;
11634 l->source_format = display_cfg->plane_descriptors[l->plane_idx].pixel_format;
11635 l->odm_mode = mode_lib->mp.ODMMode[l->plane_idx];
11636
11637 l->htotal = l->timing->h_total;
11638 l->hactive = l->timing->h_active;
11639 l->hblank_end = l->timing->h_blank_end;
11640 l->vblank_end = l->timing->v_blank_end;
11641 l->interlaced = l->timing->interlaced;
11642 l->pclk_freq_in_mhz = (double)l->timing->pixel_clock_khz / 1000;
11643 l->ref_freq_to_pix_freq = l->refclk_freq_in_mhz / l->pclk_freq_in_mhz;
11644
11645 dml2_printf("DML_DLG::%s: plane_idx = %d\n", __func__, l->plane_idx);
11646 dml2_printf("DML_DLG: %s: htotal = %d\n", __func__, l->htotal);
11647 dml2_printf("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, l->refclk_freq_in_mhz);
11648 dml2_printf("DML_DLG: %s: dlg_ref_clk_mhz = %3.2f\n", __func__, display_cfg->overrides.hw.dlg_ref_clk_mhz);
11649 dml2_printf("DML_DLG: %s: soc.refclk_mhz = %3.2f\n", __func__, mode_lib->soc.dchub_refclk_mhz);
11650 dml2_printf("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, l->pclk_freq_in_mhz);
11651 dml2_printf("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq);
11652 dml2_printf("DML_DLG: %s: interlaced = %d\n", __func__, l->interlaced);
11653
11654 DML2_ASSERT(l->refclk_freq_in_mhz != 0);
11655 DML2_ASSERT(l->pclk_freq_in_mhz != 0);
11656 DML2_ASSERT(l->ref_freq_to_pix_freq < 4.0);
11657
11658 // Need to figure out which side of odm combine we're in
11659 // Assume the pipe instance under the same plane is in order
11660
11661 if (l->odm_mode == dml2_odm_mode_bypass) {
11662 disp_dlg_regs->refcyc_h_blank_end = (unsigned int)((double)l->hblank_end * l->ref_freq_to_pix_freq);
11663 } else if (l->odm_mode == dml2_odm_mode_combine_2to1 || l->odm_mode == dml2_odm_mode_combine_3to1 || l->odm_mode == dml2_odm_mode_combine_4to1) {
11664 // find out how many pipe are in this plane
11665 l->num_active_pipes = mode_lib->mp.num_active_pipes;
11666 l->first_pipe_idx_in_plane = DML2_MAX_PLANES;
11667 l->pipe_idx_in_combine = 0; // pipe index within the plane
11668 l->odm_combine_factor = 2;
11669
11670 if (l->odm_mode == dml2_odm_mode_combine_3to1)
11671 l->odm_combine_factor = 3;
11672 else if (l->odm_mode == dml2_odm_mode_combine_4to1)
11673 l->odm_combine_factor = 4;
11674
11675 for (unsigned int i = 0; i < l->num_active_pipes; i++) {
11676 if (dml_get_plane_idx(mode_lib, i) == l->plane_idx) {
11677 if (i < l->first_pipe_idx_in_plane) {
11678 l->first_pipe_idx_in_plane = i;
11679 }
11680 }
11681 }
11682 l->pipe_idx_in_combine = pipe_idx - l->first_pipe_idx_in_plane; // DML assumes the pipes in the same plane will have continuous indexing (i.e. plane 0 use pipe 0, 1, and plane 1 uses pipe 2, 3, etc.)
11683
11684 disp_dlg_regs->refcyc_h_blank_end = (unsigned int)(((double)l->hblank_end + (double)l->pipe_idx_in_combine * (double)l->hactive / (double)l->odm_combine_factor) * l->ref_freq_to_pix_freq);
11685 dml2_printf("DML_DLG: %s: pipe_idx = %d\n", __func__, pipe_idx);
11686 dml2_printf("DML_DLG: %s: first_pipe_idx_in_plane = %d\n", __func__, l->first_pipe_idx_in_plane);
11687 dml2_printf("DML_DLG: %s: pipe_idx_in_combine = %d\n", __func__, l->pipe_idx_in_combine);
11688 dml2_printf("DML_DLG: %s: odm_combine_factor = %d\n", __func__, l->odm_combine_factor);
11689 }
11690 dml2_printf("DML_DLG: %s: refcyc_h_blank_end = %d\n", __func__, disp_dlg_regs->refcyc_h_blank_end);
11691
11692 DML2_ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)math_pow(2, 13));
11693
11694 disp_dlg_regs->ref_freq_to_pix_freq = (unsigned int)(l->ref_freq_to_pix_freq * math_pow(2, 19));
11695 disp_dlg_regs->refcyc_per_htotal = (unsigned int)(l->ref_freq_to_pix_freq * (double)l->htotal * math_pow(2, 8));
11696 disp_dlg_regs->dlg_vblank_end = l->interlaced ? (l->vblank_end / 2) : l->vblank_end; // 15 bits
11697
11698 l->min_ttu_vblank = mode_lib->mp.MinTTUVBlank[mode_lib->mp.pipe_plane[pipe_idx]];
11699 l->min_dst_y_next_start = (unsigned int)(mode_lib->mp.MIN_DST_Y_NEXT_START[mode_lib->mp.pipe_plane[pipe_idx]]);
11700
11701 dml2_printf("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, l->min_ttu_vblank);
11702 dml2_printf("DML_DLG: %s: min_dst_y_next_start = %d\n", __func__, l->min_dst_y_next_start);
11703 dml2_printf("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq);
11704
11705 l->vready_after_vcount0 = (unsigned int)(mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[mode_lib->mp.pipe_plane[pipe_idx]]);
11706 disp_dlg_regs->vready_after_vcount0 = l->vready_after_vcount0;
11707
11708 dml2_printf("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, disp_dlg_regs->vready_after_vcount0);
11709
11710 l->dst_x_after_scaler = (unsigned int)(mode_lib->mp.DSTXAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]);
11711 l->dst_y_after_scaler = (unsigned int)(mode_lib->mp.DSTYAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]);
11712
11713 dml2_printf("DML_DLG: %s: dst_x_after_scaler = %d\n", __func__, l->dst_x_after_scaler);
11714 dml2_printf("DML_DLG: %s: dst_y_after_scaler = %d\n", __func__, l->dst_y_after_scaler);
11715
11716 l->dst_y_prefetch = mode_lib->mp.dst_y_prefetch[mode_lib->mp.pipe_plane[pipe_idx]];
11717 l->dst_y_per_vm_vblank = mode_lib->mp.dst_y_per_vm_vblank[mode_lib->mp.pipe_plane[pipe_idx]];
11718 l->dst_y_per_row_vblank = mode_lib->mp.dst_y_per_row_vblank[mode_lib->mp.pipe_plane[pipe_idx]];
11719 l->dst_y_per_vm_flip = mode_lib->mp.dst_y_per_vm_flip[mode_lib->mp.pipe_plane[pipe_idx]];
11720 l->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip[mode_lib->mp.pipe_plane[pipe_idx]];
11721
11722 l->max_dst_y_per_vm_vblank = 32.0; //U5.2
11723 l->max_dst_y_per_row_vblank = 16.0; //U4.2
11724
11725 // magic!
11726 if (l->htotal <= 75) {
11727 l->max_dst_y_per_vm_vblank = 100.0;
11728 l->max_dst_y_per_row_vblank = 100.0;
11729 }
11730
11731 dml2_printf("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, l->dst_y_prefetch);
11732 dml2_printf("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, l->dst_y_per_vm_flip);
11733 dml2_printf("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, l->dst_y_per_row_flip);
11734 dml2_printf("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, l->dst_y_per_vm_vblank);
11735 dml2_printf("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, l->dst_y_per_row_vblank);
11736
11737 DML2_ASSERT(l->dst_y_per_vm_vblank < l->max_dst_y_per_vm_vblank);
11738 DML2_ASSERT(l->dst_y_per_row_vblank < l->max_dst_y_per_row_vblank);
11739 if (l->dst_y_prefetch > 0 && l->dst_y_per_vm_vblank > 0 && l->dst_y_per_row_vblank > 0) {
11740 DML2_ASSERT(l->dst_y_prefetch > (l->dst_y_per_vm_vblank + l->dst_y_per_row_vblank));
11741 }
11742
11743 l->vratio_pre_l = mode_lib->mp.VRatioPrefetchY[mode_lib->mp.pipe_plane[pipe_idx]];
11744 l->vratio_pre_c = mode_lib->mp.VRatioPrefetchC[mode_lib->mp.pipe_plane[pipe_idx]];
11745
11746 dml2_printf("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, l->vratio_pre_l);
11747 dml2_printf("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, l->vratio_pre_c);
11748
11749 // Active
11750 l->refcyc_per_line_delivery_pre_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
11751 l->refcyc_per_line_delivery_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
11752
11753 dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_l);
11754 dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_l);
11755
11756 l->refcyc_per_line_delivery_pre_c = 0.0;
11757 l->refcyc_per_line_delivery_c = 0.0;
11758
11759 if (l->dual_plane) {
11760 l->refcyc_per_line_delivery_pre_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
11761 l->refcyc_per_line_delivery_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
11762
11763 dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_c);
11764 dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_c);
11765 }
11766
11767 disp_dlg_regs->refcyc_per_vm_dmdata = (unsigned int)(mode_lib->mp.Tdmdl_vm[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
11768 disp_dlg_regs->dmdata_dl_delta = (unsigned int)(mode_lib->mp.Tdmdl[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
11769
11770 l->refcyc_per_req_delivery_pre_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
11771 l->refcyc_per_req_delivery_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
11772
11773 dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_l);
11774 dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_l);
11775
11776 l->refcyc_per_req_delivery_pre_c = 0.0;
11777 l->refcyc_per_req_delivery_c = 0.0;
11778 if (l->dual_plane) {
11779 l->refcyc_per_req_delivery_pre_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
11780 l->refcyc_per_req_delivery_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
11781
11782 dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_c);
11783 dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_c);
11784 }
11785
11786 // TTU - Cursor
11787 DML2_ASSERT(display_cfg->plane_descriptors[l->plane_idx].cursor.num_cursors <= 1);
11788
11789 // Assign to register structures
11790 disp_dlg_regs->min_dst_y_next_start = (unsigned int)((double)l->min_dst_y_next_start * math_pow(2, 2));
11791 DML2_ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)math_pow(2, 18));
11792
11793 disp_dlg_regs->dst_y_after_scaler = l->dst_y_after_scaler; // in terms of line
11794 disp_dlg_regs->refcyc_x_after_scaler = (unsigned int)((double)l->dst_x_after_scaler * l->ref_freq_to_pix_freq); // in terms of refclk
11795 disp_dlg_regs->dst_y_prefetch = (unsigned int)(l->dst_y_prefetch * math_pow(2, 2));
11796 disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int)(l->dst_y_per_vm_vblank * math_pow(2, 2));
11797 disp_dlg_regs->dst_y_per_row_vblank = (unsigned int)(l->dst_y_per_row_vblank * math_pow(2, 2));
11798 disp_dlg_regs->dst_y_per_vm_flip = (unsigned int)(l->dst_y_per_vm_flip * math_pow(2, 2));
11799 disp_dlg_regs->dst_y_per_row_flip = (unsigned int)(l->dst_y_per_row_flip * math_pow(2, 2));
11800
11801 disp_dlg_regs->vratio_prefetch = (unsigned int)(l->vratio_pre_l * math_pow(2, 19));
11802 disp_dlg_regs->vratio_prefetch_c = (unsigned int)(l->vratio_pre_c * math_pow(2, 19));
11803
11804 dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank);
11805 dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank);
11806 dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip);
11807 dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip);
11808
11809 disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(mode_lib->mp.TimePerVMGroupVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
11810 disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(mode_lib->mp.TimePerVMGroupFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
11811 disp_dlg_regs->refcyc_per_vm_req_vblank = (unsigned int)(mode_lib->mp.TimePerVMRequestVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz * math_pow(2, 10));
11812 disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(mode_lib->mp.TimePerVMRequestFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz * math_pow(2, 10));
11813
11814 l->dst_y_per_pte_row_nom_l = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L[mode_lib->mp.pipe_plane[pipe_idx]];
11815 l->dst_y_per_pte_row_nom_c = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C[mode_lib->mp.pipe_plane[pipe_idx]];
11816 l->refcyc_per_pte_group_nom_l = mode_lib->mp.time_per_pte_group_nom_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
11817 l->refcyc_per_pte_group_nom_c = mode_lib->mp.time_per_pte_group_nom_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
11818 l->refcyc_per_pte_group_vblank_l = mode_lib->mp.time_per_pte_group_vblank_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
11819 l->refcyc_per_pte_group_vblank_c = mode_lib->mp.time_per_pte_group_vblank_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
11820 l->refcyc_per_pte_group_flip_l = mode_lib->mp.time_per_pte_group_flip_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
11821 l->refcyc_per_pte_group_flip_c = mode_lib->mp.time_per_pte_group_flip_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
11822 l->refcyc_per_tdlut_group = mode_lib->mp.time_per_tdlut_group[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
11823
11824 disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int)(l->dst_y_per_pte_row_nom_l * math_pow(2, 2));
11825 disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int)(l->dst_y_per_pte_row_nom_c * math_pow(2, 2));
11826
11827 disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int)(l->refcyc_per_pte_group_nom_l);
11828 disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(l->refcyc_per_pte_group_nom_c);
11829 disp_dlg_regs->refcyc_per_pte_group_vblank_l = (unsigned int)(l->refcyc_per_pte_group_vblank_l);
11830 disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int)(l->refcyc_per_pte_group_vblank_c);
11831 disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int)(l->refcyc_per_pte_group_flip_l);
11832 disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int)(l->refcyc_per_pte_group_flip_c);
11833 disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int)math_floor2(l->refcyc_per_line_delivery_pre_l, 1);
11834 disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int)math_floor2(l->refcyc_per_line_delivery_l, 1);
11835 disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int)math_floor2(l->refcyc_per_line_delivery_pre_c, 1);
11836 disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int)math_floor2(l->refcyc_per_line_delivery_c, 1);
11837
11838 l->dst_y_per_meta_row_nom_l = mode_lib->mp.DST_Y_PER_META_ROW_NOM_L[mode_lib->mp.pipe_plane[pipe_idx]];
11839 l->dst_y_per_meta_row_nom_c = mode_lib->mp.DST_Y_PER_META_ROW_NOM_C[mode_lib->mp.pipe_plane[pipe_idx]];
11840 l->refcyc_per_meta_chunk_nom_l = mode_lib->mp.TimePerMetaChunkNominal[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
11841 l->refcyc_per_meta_chunk_nom_c = mode_lib->mp.TimePerChromaMetaChunkNominal[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
11842 l->refcyc_per_meta_chunk_vblank_l = mode_lib->mp.TimePerMetaChunkVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
11843 l->refcyc_per_meta_chunk_vblank_c = mode_lib->mp.TimePerChromaMetaChunkVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
11844 l->refcyc_per_meta_chunk_flip_l = mode_lib->mp.TimePerMetaChunkFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
11845 l->refcyc_per_meta_chunk_flip_c = mode_lib->mp.TimePerChromaMetaChunkFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
11846
11847 disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int)(l->dst_y_per_meta_row_nom_l * math_pow(2, 2));
11848 disp_dlg_regs->dst_y_per_meta_row_nom_c = (unsigned int)(l->dst_y_per_meta_row_nom_c * math_pow(2, 2));
11849 disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int)(l->refcyc_per_meta_chunk_nom_l);
11850 disp_dlg_regs->refcyc_per_meta_chunk_nom_c = (unsigned int)(l->refcyc_per_meta_chunk_nom_c);
11851 disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = (unsigned int)(l->refcyc_per_meta_chunk_vblank_l);
11852 disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = (unsigned int)(l->refcyc_per_meta_chunk_vblank_c);
11853 disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int)(l->refcyc_per_meta_chunk_flip_l);
11854 disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int)(l->refcyc_per_meta_chunk_flip_c);
11855
11856 disp_dlg_regs->refcyc_per_tdlut_group = (unsigned int)(l->refcyc_per_tdlut_group);
11857 disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off
11858
11859 disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int)(l->refcyc_per_req_delivery_pre_l * math_pow(2, 10));
11860 disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int)(l->refcyc_per_req_delivery_l * math_pow(2, 10));
11861 disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int)(l->refcyc_per_req_delivery_pre_c * math_pow(2, 10));
11862 disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int)(l->refcyc_per_req_delivery_c * math_pow(2, 10));
11863 disp_ttu_regs->qos_level_low_wm = 0;
11864
11865 disp_ttu_regs->qos_level_high_wm = (unsigned int)(4.0 * (double)l->htotal * l->ref_freq_to_pix_freq);
11866
11867 disp_ttu_regs->qos_level_flip = 14;
11868 disp_ttu_regs->qos_level_fixed_l = 8;
11869 disp_ttu_regs->qos_level_fixed_c = 8;
11870 disp_ttu_regs->qos_ramp_disable_l = 0;
11871 disp_ttu_regs->qos_ramp_disable_c = 0;
11872 disp_ttu_regs->min_ttu_vblank = (unsigned int)(l->min_ttu_vblank * l->refclk_freq_in_mhz);
11873
11874 // CHECK for HW registers' range, DML2_ASSERT or clamp
11875 DML2_ASSERT(l->refcyc_per_req_delivery_pre_l < math_pow(2, 13));
11876 DML2_ASSERT(l->refcyc_per_req_delivery_l < math_pow(2, 13));
11877 DML2_ASSERT(l->refcyc_per_req_delivery_pre_c < math_pow(2, 13));
11878 DML2_ASSERT(l->refcyc_per_req_delivery_c < math_pow(2, 13));
11879 if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int)math_pow(2, 23))
11880 disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(math_pow(2, 23) - 1);
11881
11882 if (disp_dlg_regs->refcyc_per_vm_group_flip >= (unsigned int)math_pow(2, 23))
11883 disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(math_pow(2, 23) - 1);
11884
11885 if (disp_dlg_regs->refcyc_per_vm_req_vblank >= (unsigned int)math_pow(2, 23))
11886 disp_dlg_regs->refcyc_per_vm_req_vblank = (unsigned int)(math_pow(2, 23) - 1);
11887
11888 if (disp_dlg_regs->refcyc_per_vm_req_flip >= (unsigned int)math_pow(2, 23))
11889 disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(math_pow(2, 23) - 1);
11890
11891
11892 DML2_ASSERT(disp_dlg_regs->dst_y_after_scaler < (unsigned int)8);
11893 DML2_ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)math_pow(2, 13));
11894
11895 if (disp_dlg_regs->dst_y_per_pte_row_nom_l >= (unsigned int)math_pow(2, 17)) {
11896 dml2_printf("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_L %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_l, (unsigned int)math_pow(2, 17) - 1);
11897 l->dst_y_per_pte_row_nom_l = (unsigned int)math_pow(2, 17) - 1;
11898 }
11899 if (l->dual_plane) {
11900 if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int)math_pow(2, 17)) {
11901 dml2_printf("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_C %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_c, (unsigned int)math_pow(2, 17) - 1);
11902 l->dst_y_per_pte_row_nom_c = (unsigned int)math_pow(2, 17) - 1;
11903 }
11904 }
11905
11906 if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int)math_pow(2, 23))
11907 disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int)(math_pow(2, 23) - 1);
11908 if (l->dual_plane) {
11909 if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int)math_pow(2, 23))
11910 disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(math_pow(2, 23) - 1);
11911 }
11912 DML2_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)math_pow(2, 13));
11913 if (l->dual_plane) {
11914 DML2_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int)math_pow(2, 13));
11915 }
11916
11917 DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)math_pow(2, 13));
11918 DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)math_pow(2, 13));
11919 DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)math_pow(2, 13));
11920 DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)math_pow(2, 13));
11921 DML2_ASSERT(disp_ttu_regs->qos_level_low_wm < (unsigned int)math_pow(2, 14));
11922 DML2_ASSERT(disp_ttu_regs->qos_level_high_wm < (unsigned int)math_pow(2, 14));
11923 DML2_ASSERT(disp_ttu_regs->min_ttu_vblank < (unsigned int)math_pow(2, 24));
11924
11925 dml2_printf("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx);
11926
11927 }
11928 }
11929
rq_dlg_get_arb_params(const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_display_arb_regs * arb_param)11930 static void rq_dlg_get_arb_params(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *arb_param)
11931 {
11932 arb_param->max_req_outstanding = mode_lib->soc.max_outstanding_reqs;
11933 arb_param->min_req_outstanding = mode_lib->soc.max_outstanding_reqs; // turn off the sat level feature if this set to max
11934 arb_param->sdpif_request_rate_limit = (3 * mode_lib->ip.words_per_channel * mode_lib->soc.clk_table.dram_config.channel_count) / 4;
11935 arb_param->sdpif_request_rate_limit = arb_param->sdpif_request_rate_limit < 96 ? 96 : arb_param->sdpif_request_rate_limit;
11936 arb_param->sat_level_us = 60;
11937 arb_param->hvm_max_qos_commit_threshold = 0xf;
11938 arb_param->hvm_min_req_outstand_commit_threshold = 0xa;
11939 arb_param->compbuf_reserved_space_kbytes = mode_lib->mp.compbuf_reserved_space_64b * 64 / 1024;
11940 arb_param->allow_sdpif_rate_limit_when_cstate_req = mode_lib->mp.hw_debug5;
11941 arb_param->dcfclk_deep_sleep_hysteresis = mode_lib->mp.dcfclk_deep_sleep_hysteresis;
11942
11943 #ifdef __DML_VBA_DEBUG__
11944 dml2_printf("DML::%s: max_req_outstanding = %d\n", __func__, arb_param->max_req_outstanding);
11945 dml2_printf("DML::%s: sdpif_request_rate_limit = %d\n", __func__, arb_param->sdpif_request_rate_limit);
11946 dml2_printf("DML::%s: compbuf_reserved_space_kbytes = %d\n", __func__, arb_param->compbuf_reserved_space_kbytes);
11947 dml2_printf("DML::%s: allow_sdpif_rate_limit_when_cstate_req = %d\n", __func__, arb_param->allow_sdpif_rate_limit_when_cstate_req);
11948 dml2_printf("DML::%s: dcfclk_deep_sleep_hysteresis = %d\n", __func__, arb_param->dcfclk_deep_sleep_hysteresis);
11949 #endif
11950
11951 }
11952
dml2_core_shared_get_watermarks(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_dchub_watermark_regs * out)11953 void dml2_core_shared_get_watermarks(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *out)
11954 {
11955 rq_dlg_get_wm_regs(display_cfg, mode_lib, out);
11956 }
11957
dml2_core_shared_get_arb_params(const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_display_arb_regs * out)11958 void dml2_core_shared_get_arb_params(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *out)
11959 {
11960 rq_dlg_get_arb_params(mode_lib, out);
11961 }
11962
dml2_core_shared_get_pipe_regs(const struct dml2_display_cfg * display_cfg,struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_dchub_per_pipe_register_set * out,int pipe_index)11963 void dml2_core_shared_get_pipe_regs(const struct dml2_display_cfg *display_cfg,
11964 struct dml2_core_internal_display_mode_lib *mode_lib,
11965 struct dml2_dchub_per_pipe_register_set *out, int pipe_index)
11966 {
11967 rq_dlg_get_rq_reg(&out->rq_regs, display_cfg, mode_lib, pipe_index);
11968 rq_dlg_get_dlg_reg(&mode_lib->scratch, &out->dlg_regs, &out->ttu_regs, display_cfg, mode_lib, pipe_index);
11969 out->det_size = mode_lib->mp.DETBufferSizeInKByte[mode_lib->mp.pipe_plane[pipe_index]] / mode_lib->ip.config_return_buffer_segment_size_in_kbytes;
11970 }
11971
dml2_core_shared_get_stream_programming(const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_per_stream_programming * out,int pipe_index)11972 void dml2_core_shared_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index)
11973 {
11974 // out->min_clocks.dcn4x.dscclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); // FIXME_STAGE2
11975 // out->min_clocks.dcn4x.dtbclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000);
11976 // out->min_clocks.dcn4x.phyclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000);
11977
11978 out->global_sync.dcn4x.vready_offset_pixels = mode_lib->mp.VReadyOffsetPix[mode_lib->mp.pipe_plane[pipe_index]];
11979 out->global_sync.dcn4x.vstartup_lines = mode_lib->mp.VStartup[mode_lib->mp.pipe_plane[pipe_index]];
11980 out->global_sync.dcn4x.vupdate_offset_pixels = mode_lib->mp.VUpdateOffsetPix[mode_lib->mp.pipe_plane[pipe_index]];
11981 out->global_sync.dcn4x.vupdate_vupdate_width_pixels = mode_lib->mp.VUpdateWidthPix[mode_lib->mp.pipe_plane[pipe_index]];
11982 }
11983
dml2_core_shared_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_mcache_surface_allocation * out,int plane_idx)11984 void dml2_core_shared_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_mcache_surface_allocation *out, int plane_idx)
11985 {
11986 unsigned int n;
11987
11988 out->num_mcaches_plane0 = mode_lib->ms.num_mcaches_l[plane_idx];
11989 out->num_mcaches_plane1 = mode_lib->ms.num_mcaches_c[plane_idx];
11990 out->shift_granularity.p0 = mode_lib->ms.mcache_shift_granularity_l[plane_idx];
11991 out->shift_granularity.p1 = mode_lib->ms.mcache_shift_granularity_c[plane_idx];
11992
11993 for (n = 0; n < out->num_mcaches_plane0; n++)
11994 out->mcache_x_offsets_plane0[n] = mode_lib->ms.mcache_offsets_l[plane_idx][n];
11995
11996 for (n = 0; n < out->num_mcaches_plane1; n++)
11997 out->mcache_x_offsets_plane1[n] = mode_lib->ms.mcache_offsets_l[plane_idx][n];
11998
11999 out->last_slice_sharing.mall_comb_mcache_p0 = mode_lib->ms.mall_comb_mcache_l[plane_idx];
12000 out->last_slice_sharing.mall_comb_mcache_p1 = mode_lib->ms.mall_comb_mcache_c[plane_idx];
12001 out->last_slice_sharing.plane0_plane1 = mode_lib->ms.lc_comb_mcache[plane_idx];
12002 out->informative.meta_row_bytes_plane0 = mode_lib->ms.mcache_row_bytes_l[plane_idx];
12003 out->informative.meta_row_bytes_plane1 = mode_lib->ms.mcache_row_bytes_c[plane_idx];
12004
12005 out->valid = true;
12006 }
12007
dml2_core_shared_get_mall_allocation(struct dml2_core_internal_display_mode_lib * mode_lib,unsigned int * out,int pipe_index)12008 void dml2_core_shared_get_mall_allocation(struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int *out, int pipe_index)
12009 {
12010 *out = mode_lib->mp.SurfaceSizeInTheMALL[mode_lib->mp.pipe_plane[pipe_index]];
12011 }
12012
dml2_core_shared_get_plane_support_info(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct core_plane_support_info * out,int plane_idx)12013 void dml2_core_shared_get_plane_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_plane_support_info *out, int plane_idx)
12014 {
12015 out->mall_svp_size_requirement_ways = 0;
12016
12017 out->nominal_vblank_pstate_latency_hiding_us =
12018 (int)(display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_idx].stream_index].timing.h_total /
12019 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_idx].stream_index].timing.pixel_clock_khz / 1000) * mode_lib->ms.TWait[plane_idx]);
12020
12021 out->dram_change_latency_hiding_margin_in_active = (int)mode_lib->ms.VActiveLatencyHidingMargin[plane_idx];
12022
12023 out->active_latency_hiding_us = (int)mode_lib->ms.VActiveLatencyHidingUs[plane_idx];
12024 }
12025
dml2_core_shared_get_stream_support_info(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct core_stream_support_info * out,int plane_index)12026 void dml2_core_shared_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index)
12027 {
12028 double phantom_processing_delay_pix;
12029 unsigned int phantom_processing_delay_lines;
12030 unsigned int phantom_v_active_lines;
12031 unsigned int phantom_v_startup_lines;
12032 unsigned int phantom_v_blank_lines;
12033 unsigned int main_v_blank_lines;
12034 unsigned int rem;
12035
12036 phantom_processing_delay_pix = (double)((mode_lib->ip.subvp_fw_processing_delay_us + mode_lib->ip.subvp_pstate_allow_width_us) *
12037 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.pixel_clock_khz / 1000));
12038 phantom_processing_delay_lines = (unsigned int)(phantom_processing_delay_pix / (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total);
12039 dml2_core_shared_div_rem(phantom_processing_delay_pix, display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total, &rem);
12040 if (rem)
12041 phantom_processing_delay_lines++;
12042
12043 phantom_v_startup_lines = mode_lib->ms.MaxVStartupLines[plane_index];
12044 phantom_v_active_lines = phantom_processing_delay_lines + mode_lib->ms.SubViewportLinesNeededInMALL[plane_index] + mode_lib->ip.subvp_swath_height_margin_lines;
12045
12046 // phantom_vblank = max(vbp(vstartup) + vactive + vfp(always 1) + vsync(can be 1), main_vblank)
12047 phantom_v_blank_lines = phantom_v_startup_lines + 1 + 1;
12048 main_v_blank_lines = display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.v_active;
12049 if (phantom_v_blank_lines > main_v_blank_lines)
12050 phantom_v_blank_lines = main_v_blank_lines;
12051
12052 out->phantom_v_active = phantom_v_active_lines;
12053 // phantom_vtotal = vactive + vblank
12054 out->phantom_v_total = phantom_v_active_lines + phantom_v_blank_lines;
12055
12056 out->phantom_min_v_active = mode_lib->ms.SubViewportLinesNeededInMALL[plane_index];
12057 out->phantom_v_startup = mode_lib->ms.MaxVStartupLines[plane_index];
12058
12059 out->vblank_reserved_time_us = display_cfg->plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000;
12060 #if defined(__DML_VBA_DEBUG__)
12061 dml2_printf("DML::%s: subvp_fw_processing_delay_us = %d\n", __func__, mode_lib->ip.subvp_fw_processing_delay_us);
12062 dml2_printf("DML::%s: subvp_pstate_allow_width_us = %d\n", __func__, mode_lib->ip.subvp_pstate_allow_width_us);
12063 dml2_printf("DML::%s: subvp_swath_height_margin_lines = %d\n", __func__, mode_lib->ip.subvp_swath_height_margin_lines);
12064 dml2_printf("DML::%s: vblank_reserved_time_us = %f\n", __func__, out->vblank_reserved_time_us);
12065 #endif
12066 }
12067
dml2_core_shared_get_informative(const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_display_cfg_programming * out)12068 void dml2_core_shared_get_informative(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_cfg_programming *out)
12069 {
12070 unsigned int k, n;
12071
12072 out->informative.mode_support_info.ModeIsSupported = mode_lib->ms.support.ModeSupport;
12073 out->informative.mode_support_info.ImmediateFlipSupport = mode_lib->ms.support.ImmediateFlipSupport;
12074 out->informative.mode_support_info.WritebackLatencySupport = mode_lib->ms.support.WritebackLatencySupport;
12075 out->informative.mode_support_info.ScaleRatioAndTapsSupport = mode_lib->ms.support.ScaleRatioAndTapsSupport;
12076 out->informative.mode_support_info.SourceFormatPixelAndScanSupport = mode_lib->ms.support.SourceFormatPixelAndScanSupport;
12077 out->informative.mode_support_info.P2IWith420 = mode_lib->ms.support.P2IWith420;
12078 out->informative.mode_support_info.DSCOnlyIfNecessaryWithBPP = mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP;
12079 out->informative.mode_support_info.DSC422NativeNotSupported = mode_lib->ms.support.DSC422NativeNotSupported;
12080 out->informative.mode_support_info.LinkRateDoesNotMatchDPVersion = mode_lib->ms.support.LinkRateDoesNotMatchDPVersion;
12081 out->informative.mode_support_info.LinkRateForMultistreamNotIndicated = mode_lib->ms.support.LinkRateForMultistreamNotIndicated;
12082 out->informative.mode_support_info.BPPForMultistreamNotIndicated = mode_lib->ms.support.BPPForMultistreamNotIndicated;
12083 out->informative.mode_support_info.MultistreamWithHDMIOreDP = mode_lib->ms.support.MultistreamWithHDMIOreDP;
12084 out->informative.mode_support_info.MSOOrODMSplitWithNonDPLink = mode_lib->ms.support.MSOOrODMSplitWithNonDPLink;
12085 out->informative.mode_support_info.NotEnoughLanesForMSO = mode_lib->ms.support.NotEnoughLanesForMSO;
12086 out->informative.mode_support_info.NumberOfOTGSupport = mode_lib->ms.support.NumberOfOTGSupport;
12087 out->informative.mode_support_info.NumberOfHDMIFRLSupport = mode_lib->ms.support.NumberOfHDMIFRLSupport;
12088 out->informative.mode_support_info.NumberOfDP2p0Support = mode_lib->ms.support.NumberOfDP2p0Support;
12089 out->informative.mode_support_info.WritebackScaleRatioAndTapsSupport = mode_lib->ms.support.WritebackScaleRatioAndTapsSupport;
12090 out->informative.mode_support_info.CursorSupport = mode_lib->ms.support.CursorSupport;
12091 out->informative.mode_support_info.PitchSupport = mode_lib->ms.support.PitchSupport;
12092 out->informative.mode_support_info.ViewportExceedsSurface = mode_lib->ms.support.ViewportExceedsSurface;
12093 out->informative.mode_support_info.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = false;
12094 out->informative.mode_support_info.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe;
12095 out->informative.mode_support_info.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen;
12096 out->informative.mode_support_info.InvalidCombinationOfMALLUseForPState = mode_lib->ms.support.InvalidCombinationOfMALLUseForPState;
12097 out->informative.mode_support_info.ExceededMALLSize = mode_lib->ms.support.ExceededMALLSize;
12098 out->informative.mode_support_info.EnoughWritebackUnits = mode_lib->ms.support.EnoughWritebackUnits;
12099
12100 out->informative.mode_support_info.ExceededMultistreamSlots = mode_lib->ms.support.ExceededMultistreamSlots;
12101 out->informative.mode_support_info.NotEnoughDSCUnits = mode_lib->ms.support.NotEnoughDSCUnits;
12102 out->informative.mode_support_info.NotEnoughDSCSlices = mode_lib->ms.support.NotEnoughDSCSlices;
12103 out->informative.mode_support_info.PixelsPerLinePerDSCUnitSupport = mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport;
12104 out->informative.mode_support_info.DSCCLKRequiredMoreThanSupported = mode_lib->ms.support.DSCCLKRequiredMoreThanSupported;
12105 out->informative.mode_support_info.DTBCLKRequiredMoreThanSupported = mode_lib->ms.support.DTBCLKRequiredMoreThanSupported;
12106 out->informative.mode_support_info.LinkCapacitySupport = mode_lib->ms.support.LinkCapacitySupport;
12107
12108 out->informative.mode_support_info.ROBSupport = mode_lib->ms.support.ROBSupport;
12109 out->informative.mode_support_info.OutstandingRequestsSupport = mode_lib->ms.support.OutstandingRequestsSupport;
12110 out->informative.mode_support_info.OutstandingRequestsUrgencyAvoidance = mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance;
12111 out->informative.mode_support_info.PTEBufferSizeNotExceeded = mode_lib->ms.support.PTEBufferSizeNotExceeded;
12112 out->informative.mode_support_info.DCCMetaBufferSizeNotExceeded = mode_lib->ms.support.DCCMetaBufferSizeNotExceeded;
12113
12114 out->informative.mode_support_info.TotalVerticalActiveBandwidthSupport = mode_lib->ms.support.AvgBandwidthSupport;
12115 out->informative.mode_support_info.VActiveBandwidthSupport = mode_lib->ms.support.UrgVactiveBandwidthSupport;
12116 out->informative.mode_support_info.USRRetrainingSupport = mode_lib->ms.support.USRRetrainingSupport;
12117
12118 out->informative.mode_support_info.PrefetchSupported = mode_lib->ms.support.PrefetchSupported;
12119 out->informative.mode_support_info.DynamicMetadataSupported = mode_lib->ms.support.DynamicMetadataSupported;
12120 out->informative.mode_support_info.VRatioInPrefetchSupported = mode_lib->ms.support.VRatioInPrefetchSupported;
12121 out->informative.mode_support_info.DISPCLK_DPPCLK_Support = mode_lib->ms.support.DISPCLK_DPPCLK_Support;
12122 out->informative.mode_support_info.TotalAvailablePipesSupport = mode_lib->ms.support.TotalAvailablePipesSupport;
12123 out->informative.mode_support_info.ViewportSizeSupport = mode_lib->ms.support.ViewportSizeSupport;
12124
12125 for (k = 0; k < out->display_config.num_planes; k++) {
12126
12127 out->informative.mode_support_info.FCLKChangeSupport[k] = mode_lib->ms.support.FCLKChangeSupport[k];
12128 out->informative.mode_support_info.MPCCombineEnable[k] = mode_lib->ms.support.MPCCombineEnable[k];
12129 out->informative.mode_support_info.ODMMode[k] = mode_lib->ms.support.ODMMode[k];
12130 out->informative.mode_support_info.DPPPerSurface[k] = mode_lib->ms.support.DPPPerSurface[k];
12131 out->informative.mode_support_info.DSCEnabled[k] = mode_lib->ms.support.DSCEnabled[k];
12132 out->informative.mode_support_info.FECEnabled[k] = mode_lib->ms.support.FECEnabled[k];
12133 out->informative.mode_support_info.NumberOfDSCSlices[k] = mode_lib->ms.support.NumberOfDSCSlices[k];
12134 out->informative.mode_support_info.OutputBpp[k] = mode_lib->ms.support.OutputBpp[k];
12135
12136 if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_unknown)
12137 out->informative.mode_support_info.OutputType[k] = dml2_output_type_unknown;
12138 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_dp)
12139 out->informative.mode_support_info.OutputType[k] = dml2_output_type_dp;
12140 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_edp)
12141 out->informative.mode_support_info.OutputType[k] = dml2_output_type_edp;
12142 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_dp2p0)
12143 out->informative.mode_support_info.OutputType[k] = dml2_output_type_dp2p0;
12144 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_hdmi)
12145 out->informative.mode_support_info.OutputType[k] = dml2_output_type_hdmi;
12146 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_hdmifrl)
12147 out->informative.mode_support_info.OutputType[k] = dml2_output_type_hdmifrl;
12148
12149 if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_unknown)
12150 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_unknown;
12151 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr)
12152 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr;
12153 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr2)
12154 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr2;
12155 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr3)
12156 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr3;
12157 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr10)
12158 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr10;
12159 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr13p5)
12160 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr13p5;
12161 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr20)
12162 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr20;
12163 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_3x3)
12164 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_3x3;
12165 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_6x3)
12166 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_6x3;
12167 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_6x4)
12168 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_6x4;
12169 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_8x4)
12170 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_8x4;
12171 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_10x4)
12172 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_10x4;
12173 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_12x4)
12174 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_12x4;
12175
12176 out->informative.mode_support_info.AlignedYPitch[k] = mode_lib->ms.support.AlignedYPitch[k];
12177 out->informative.mode_support_info.AlignedCPitch[k] = mode_lib->ms.support.AlignedCPitch[k];
12178 }
12179
12180 out->informative.watermarks.urgent_us = mode_lib->mp.Watermark.UrgentWatermark;
12181 out->informative.watermarks.writeback_urgent_us = mode_lib->mp.Watermark.WritebackUrgentWatermark;
12182 out->informative.watermarks.writeback_pstate_us = mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark;
12183 out->informative.watermarks.writeback_fclk_pstate_us = mode_lib->mp.Watermark.WritebackFCLKChangeWatermark;
12184
12185 out->informative.watermarks.cstate_exit_us = mode_lib->mp.Watermark.StutterExitWatermark;
12186 out->informative.watermarks.cstate_enter_plus_exit_us = mode_lib->mp.Watermark.StutterEnterPlusExitWatermark;
12187 out->informative.watermarks.z8_cstate_exit_us = mode_lib->mp.Watermark.Z8StutterExitWatermark;
12188 out->informative.watermarks.z8_cstate_enter_plus_exit_us = mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark;
12189 out->informative.watermarks.pstate_change_us = mode_lib->mp.Watermark.DRAMClockChangeWatermark;
12190 out->informative.watermarks.fclk_pstate_change_us = mode_lib->mp.Watermark.FCLKChangeWatermark;
12191 out->informative.watermarks.usr_retraining_us = mode_lib->mp.Watermark.USRRetrainingWatermark;
12192
12193 out->informative.mall.total_surface_size_in_mall_bytes = 0;
12194 for (k = 0; k < out->display_config.num_planes; ++k)
12195 out->informative.mall.total_surface_size_in_mall_bytes += mode_lib->mp.SurfaceSizeInTheMALL[k];
12196
12197 out->informative.qos.min_return_latency_in_dcfclk = mode_lib->mp.min_return_latency_in_dcfclk;
12198 out->informative.qos.urgent_latency_us = mode_lib->mp.UrgentLatency;
12199
12200 out->informative.qos.max_urgent_latency_us = mode_lib->ms.support.max_urgent_latency_us;
12201 out->informative.qos.avg_non_urgent_latency_us = mode_lib->ms.support.avg_non_urgent_latency_us;
12202 out->informative.qos.avg_urgent_latency_us = mode_lib->ms.support.avg_urgent_latency_us;
12203
12204 out->informative.qos.wm_memory_trip_us = mode_lib->mp.UrgentLatency;
12205 out->informative.qos.meta_trip_memory_us = mode_lib->mp.MetaTripToMemory;
12206 out->informative.qos.fraction_of_urgent_bandwidth = mode_lib->mp.FractionOfUrgentBandwidth;
12207 out->informative.qos.fraction_of_urgent_bandwidth_immediate_flip = mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip;
12208 out->informative.qos.fraction_of_urgent_bandwidth_mall = mode_lib->mp.FractionOfUrgentBandwidthMALL;
12209
12210 out->informative.qos.avg_bw_required.sys_active.sdp_bw_mbps =
12211 mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
12212 out->informative.qos.avg_bw_required.sys_active.dram_bw_mbps =
12213 mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
12214 out->informative.qos.avg_bw_required.svp_prefetch.sdp_bw_mbps =
12215 mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
12216 out->informative.qos.avg_bw_required.svp_prefetch.dram_bw_mbps =
12217 mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
12218
12219 out->informative.qos.avg_bw_available.sys_active.sdp_bw_mbps =
12220 mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
12221 out->informative.qos.avg_bw_available.sys_active.dram_bw_mbps =
12222 mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
12223 out->informative.qos.avg_bw_available.svp_prefetch.sdp_bw_mbps =
12224 mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
12225 out->informative.qos.avg_bw_available.svp_prefetch.dram_bw_mbps =
12226 mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
12227
12228 out->informative.qos.urg_bw_available.sys_active.sdp_bw_mbps =
12229 mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
12230 out->informative.qos.urg_bw_available.sys_active.dram_bw_mbps =
12231 mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
12232 out->informative.qos.urg_bw_available.sys_active.dram_vm_only_bw_mbps =
12233 mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active];
12234
12235 out->informative.qos.urg_bw_available.svp_prefetch.sdp_bw_mbps =
12236 mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
12237 out->informative.qos.urg_bw_available.svp_prefetch.dram_bw_mbps =
12238 mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
12239 out->informative.qos.urg_bw_available.svp_prefetch.dram_vm_only_bw_mbps =
12240 mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_svp_prefetch];
12241
12242 out->informative.qos.urg_bw_required.sys_active.sdp_bw_mbps = mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
12243 out->informative.qos.urg_bw_required.sys_active.dram_bw_mbps = mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
12244 out->informative.qos.urg_bw_required.svp_prefetch.sdp_bw_mbps = mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
12245 out->informative.qos.urg_bw_required.svp_prefetch.dram_bw_mbps = mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
12246
12247 out->informative.qos.non_urg_bw_required.sys_active.sdp_bw_mbps = mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
12248 out->informative.qos.non_urg_bw_required.sys_active.dram_bw_mbps = mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
12249 out->informative.qos.non_urg_bw_required.svp_prefetch.sdp_bw_mbps = mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
12250 out->informative.qos.non_urg_bw_required.svp_prefetch.dram_bw_mbps = mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
12251
12252 out->informative.qos.urg_bw_required_with_flip.sys_active.sdp_bw_mbps = mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
12253 out->informative.qos.urg_bw_required_with_flip.sys_active.dram_bw_mbps = mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
12254 out->informative.qos.urg_bw_required_with_flip.svp_prefetch.sdp_bw_mbps = mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
12255 out->informative.qos.urg_bw_required_with_flip.svp_prefetch.dram_bw_mbps = mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
12256
12257 out->informative.qos.non_urg_bw_required_with_flip.sys_active.sdp_bw_mbps = mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
12258 out->informative.qos.non_urg_bw_required_with_flip.sys_active.dram_bw_mbps = mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
12259 out->informative.qos.non_urg_bw_required_with_flip.svp_prefetch.sdp_bw_mbps = mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
12260 out->informative.qos.non_urg_bw_required_with_flip.svp_prefetch.dram_bw_mbps = mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
12261
12262 out->informative.crb.comp_buffer_size_kbytes = mode_lib->mp.CompressedBufferSizeInkByte;
12263 out->informative.crb.UnboundedRequestEnabled = mode_lib->mp.UnboundedRequestEnabled;
12264
12265 out->informative.crb.compbuf_reserved_space_64b = mode_lib->mp.compbuf_reserved_space_64b;
12266 out->informative.misc.hw_debug5 = mode_lib->mp.hw_debug5;
12267 out->informative.misc.dcfclk_deep_sleep_hysteresis = mode_lib->mp.dcfclk_deep_sleep_hysteresis;
12268
12269 out->informative.power_management.stutter_efficiency = mode_lib->mp.StutterEfficiencyNotIncludingVBlank;
12270 out->informative.power_management.stutter_efficiency_with_vblank = mode_lib->mp.StutterEfficiency;
12271 out->informative.power_management.stutter_num_bursts = mode_lib->mp.NumberOfStutterBurstsPerFrame;
12272
12273 out->informative.power_management.z8.stutter_efficiency = mode_lib->mp.Z8StutterEfficiency;
12274 out->informative.power_management.z8.stutter_efficiency_with_vblank = mode_lib->mp.StutterEfficiency;
12275 out->informative.power_management.z8.stutter_num_bursts = mode_lib->mp.Z8NumberOfStutterBurstsPerFrame;
12276 out->informative.power_management.z8.stutter_period = mode_lib->mp.StutterPeriod;
12277
12278 out->informative.power_management.z8.bestcase.stutter_efficiency = mode_lib->mp.Z8StutterEfficiencyBestCase;
12279 out->informative.power_management.z8.bestcase.stutter_num_bursts = mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase;
12280 out->informative.power_management.z8.bestcase.stutter_period = mode_lib->mp.StutterPeriodBestCase;
12281
12282 out->informative.misc.cstate_max_cap_mode = mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE;
12283
12284 out->min_clocks.dcn4x.dpprefclk_khz = (int unsigned)(mode_lib->mp.GlobalDPPCLK * 1000.0);
12285
12286 out->informative.qos.max_active_fclk_change_latency_supported = mode_lib->mp.MaxActiveFCLKChangeLatencySupported;
12287
12288 for (k = 0; k < out->display_config.num_planes; k++) {
12289
12290 if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us)
12291 && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.fclk_change_blackout_us)
12292 && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us))
12293 out->informative.misc.PrefetchMode[k] = 0;
12294 else if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.fclk_change_blackout_us)
12295 && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us))
12296 out->informative.misc.PrefetchMode[k] = 1;
12297 else if (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us)
12298 out->informative.misc.PrefetchMode[k] = 2;
12299 else
12300 out->informative.misc.PrefetchMode[k] = 3;
12301
12302 out->informative.misc.min_ttu_vblank_us[k] = mode_lib->mp.MinTTUVBlank[k];
12303 out->informative.mall.subviewport_lines_needed_in_mall[k] = mode_lib->mp.SubViewportLinesNeededInMALL[k];
12304 out->informative.crb.det_size_in_kbytes[k] = mode_lib->mp.DETBufferSizeInKByte[k];
12305 out->informative.crb.DETBufferSizeY[k] = mode_lib->mp.DETBufferSizeY[k];
12306 out->informative.misc.ImmediateFlipSupportedForPipe[k] = mode_lib->mp.ImmediateFlipSupportedForPipe[k];
12307 out->informative.misc.UsesMALLForStaticScreen[k] = mode_lib->mp.is_using_mall_for_ss[k];
12308 out->informative.plane_info[k].dpte_row_height_plane0 = mode_lib->mp.dpte_row_height[k];
12309 out->informative.plane_info[k].dpte_row_height_plane1 = mode_lib->mp.dpte_row_height_chroma[k];
12310 out->informative.plane_info[k].meta_row_height_plane0 = mode_lib->mp.meta_row_height[k];
12311 out->informative.plane_info[k].meta_row_height_plane1 = mode_lib->mp.meta_row_height_chroma[k];
12312 out->informative.dcc_control[k].max_uncompressed_block_plane0 = mode_lib->mp.DCCYMaxUncompressedBlock[k];
12313 out->informative.dcc_control[k].max_compressed_block_plane0 = mode_lib->mp.DCCYMaxCompressedBlock[k];
12314 out->informative.dcc_control[k].independent_block_plane0 = mode_lib->mp.DCCYIndependentBlock[k];
12315 out->informative.dcc_control[k].max_uncompressed_block_plane1 = mode_lib->mp.DCCCMaxUncompressedBlock[k];
12316 out->informative.dcc_control[k].max_compressed_block_plane1 = mode_lib->mp.DCCCMaxCompressedBlock[k];
12317 out->informative.dcc_control[k].independent_block_plane1 = mode_lib->mp.DCCCIndependentBlock[k];
12318 out->informative.misc.dst_x_after_scaler[k] = mode_lib->mp.DSTXAfterScaler[k];
12319 out->informative.misc.dst_y_after_scaler[k] = mode_lib->mp.DSTYAfterScaler[k];
12320 out->informative.misc.prefetch_source_lines_plane0[k] = mode_lib->mp.PrefetchSourceLinesY[k];
12321 out->informative.misc.prefetch_source_lines_plane1[k] = mode_lib->mp.PrefetchSourceLinesC[k];
12322 out->informative.misc.vready_at_or_after_vsync[k] = mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k];
12323 out->informative.misc.min_dst_y_next_start[k] = mode_lib->mp.MIN_DST_Y_NEXT_START[k];
12324 out->informative.plane_info[k].swath_width_plane0 = mode_lib->mp.SwathWidthY[k];
12325 out->informative.plane_info[k].swath_height_plane0 = mode_lib->mp.SwathHeightY[k];
12326 out->informative.plane_info[k].swath_height_plane1 = mode_lib->mp.SwathHeightC[k];
12327 out->informative.misc.CursorDstXOffset[k] = mode_lib->mp.CursorDstXOffset[k];
12328 out->informative.misc.CursorDstYOffset[k] = mode_lib->mp.CursorDstYOffset[k];
12329 out->informative.misc.CursorChunkHDLAdjust[k] = mode_lib->mp.CursorChunkHDLAdjust[k];
12330 out->informative.misc.dpte_group_bytes[k] = mode_lib->mp.dpte_group_bytes[k];
12331 out->informative.misc.vm_group_bytes[k] = mode_lib->mp.vm_group_bytes[k];
12332 out->informative.misc.DisplayPipeRequestDeliveryTimeLuma[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[k];
12333 out->informative.misc.DisplayPipeRequestDeliveryTimeChroma[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[k];
12334 out->informative.misc.DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[k];
12335 out->informative.misc.DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[k];
12336 out->informative.misc.TimePerVMGroupVBlank[k] = mode_lib->mp.TimePerVMGroupVBlank[k];
12337 out->informative.misc.TimePerVMGroupFlip[k] = mode_lib->mp.TimePerVMGroupFlip[k];
12338 out->informative.misc.TimePerVMRequestVBlank[k] = mode_lib->mp.TimePerVMRequestVBlank[k];
12339 out->informative.misc.TimePerVMRequestFlip[k] = mode_lib->mp.TimePerVMRequestFlip[k];
12340 out->informative.misc.Tdmdl_vm[k] = mode_lib->mp.Tdmdl_vm[k];
12341 out->informative.misc.Tdmdl[k] = mode_lib->mp.Tdmdl[k];
12342 out->informative.misc.VStartup[k] = mode_lib->mp.VStartup[k];
12343 out->informative.misc.VUpdateOffsetPix[k] = mode_lib->mp.VUpdateOffsetPix[k];
12344 out->informative.misc.VUpdateWidthPix[k] = mode_lib->mp.VUpdateWidthPix[k];
12345 out->informative.misc.VReadyOffsetPix[k] = mode_lib->mp.VReadyOffsetPix[k];
12346
12347 out->informative.misc.DST_Y_PER_PTE_ROW_NOM_L[k] = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L[k];
12348 out->informative.misc.DST_Y_PER_PTE_ROW_NOM_C[k] = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C[k];
12349 out->informative.misc.time_per_pte_group_nom_luma[k] = mode_lib->mp.time_per_pte_group_nom_luma[k];
12350 out->informative.misc.time_per_pte_group_nom_chroma[k] = mode_lib->mp.time_per_pte_group_nom_chroma[k];
12351 out->informative.misc.time_per_pte_group_vblank_luma[k] = mode_lib->mp.time_per_pte_group_vblank_luma[k];
12352 out->informative.misc.time_per_pte_group_vblank_chroma[k] = mode_lib->mp.time_per_pte_group_vblank_chroma[k];
12353 out->informative.misc.time_per_pte_group_flip_luma[k] = mode_lib->mp.time_per_pte_group_flip_luma[k];
12354 out->informative.misc.time_per_pte_group_flip_chroma[k] = mode_lib->mp.time_per_pte_group_flip_chroma[k];
12355 out->informative.misc.VRatioPrefetchY[k] = mode_lib->mp.VRatioPrefetchY[k];
12356 out->informative.misc.VRatioPrefetchC[k] = mode_lib->mp.VRatioPrefetchC[k];
12357 out->informative.misc.DestinationLinesForPrefetch[k] = mode_lib->mp.dst_y_prefetch[k];
12358 out->informative.misc.DestinationLinesToRequestVMInVBlank[k] = mode_lib->mp.dst_y_per_vm_vblank[k];
12359 out->informative.misc.DestinationLinesToRequestRowInVBlank[k] = mode_lib->mp.dst_y_per_row_vblank[k];
12360 out->informative.misc.DestinationLinesToRequestVMInImmediateFlip[k] = mode_lib->mp.dst_y_per_vm_flip[k];
12361 out->informative.misc.DestinationLinesToRequestRowInImmediateFlip[k] = mode_lib->mp.dst_y_per_row_flip[k];
12362 out->informative.misc.DisplayPipeLineDeliveryTimeLuma[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[k];
12363 out->informative.misc.DisplayPipeLineDeliveryTimeChroma[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[k];
12364 out->informative.misc.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[k];
12365 out->informative.misc.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[k];
12366
12367 out->informative.misc.WritebackAllowDRAMClockChangeEndPosition[k] = mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k];
12368 out->informative.misc.WritebackAllowFCLKChangeEndPosition[k] = mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k];
12369 out->informative.misc.DSCCLK_calculated[k] = mode_lib->mp.DSCCLK[k];
12370 out->informative.misc.BIGK_FRAGMENT_SIZE[k] = mode_lib->mp.BIGK_FRAGMENT_SIZE[k];
12371 out->informative.misc.PTE_BUFFER_MODE[k] = mode_lib->mp.PTE_BUFFER_MODE[k];
12372 out->informative.misc.DSCDelay[k] = mode_lib->mp.DSCDelay[k];
12373 out->informative.misc.MaxActiveDRAMClockChangeLatencySupported[k] = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported[k];
12374 }
12375
12376 // For this DV informative layer, all pipes in the same planes will just use the same id
12377 // will have the optimization and helper layer later on
12378 // only work when we can have high "mcache" that fit everything without thrashing the cache
12379 for (k = 0; k < out->display_config.num_planes; k++) {
12380 out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane0 = mode_lib->ms.num_mcaches_l[k];
12381 out->informative.non_optimized_mcache_allocation[k].informative.meta_row_bytes_plane0 = mode_lib->ms.mcache_row_bytes_l[k];
12382
12383 for (n = 0; n < out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane0; n++) {
12384 out->informative.non_optimized_mcache_allocation[k].mcache_x_offsets_plane0[n] = mode_lib->ms.mcache_offsets_l[k][n];
12385 out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane0[n] = k;
12386 }
12387
12388 out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane1 = mode_lib->ms.num_mcaches_c[k];
12389 out->informative.non_optimized_mcache_allocation[k].informative.meta_row_bytes_plane1 = mode_lib->ms.mcache_row_bytes_c[k];
12390
12391 for (n = 0; n < out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane1; n++) {
12392 out->informative.non_optimized_mcache_allocation[k].mcache_x_offsets_plane1[n] = mode_lib->ms.mcache_offsets_c[k][n];
12393 out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane1[n] = k;
12394 }
12395 }
12396
12397 out->informative.qos.max_non_urgent_latency_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles
12398 / mode_lib->mp.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0)
12399 + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->mp.FabricClock
12400 + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->mp.FabricClock
12401 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0);
12402
12403 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) {
12404 if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024
12405 / mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= out->informative.qos.max_non_urgent_latency_us) {
12406 out->informative.misc.ROBUrgencyAvoidance = true;
12407 } else {
12408 out->informative.misc.ROBUrgencyAvoidance = false;
12409 }
12410 } else {
12411 out->informative.misc.ROBUrgencyAvoidance = true;
12412 }
12413 }
12414