1 /*
2 * Copyright 2020 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25
26 #include "dc.h"
27 #include "../display_mode_lib.h"
28 #include "display_mode_vba_30.h"
29 #include "../dml_inline_defs.h"
30
31
32 /*
33 * NOTE:
34 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
35 *
36 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
37 * ways. Unless there is something clearly wrong with it the code should
38 * remain as-is as it provides us with a guarantee from HW that it is correct.
39 */
40
41
42 typedef struct {
43 double DPPCLK;
44 double DISPCLK;
45 double PixelClock;
46 double DCFCLKDeepSleep;
47 unsigned int DPPPerPlane;
48 bool ScalerEnabled;
49 enum scan_direction_class SourceScan;
50 unsigned int BlockWidth256BytesY;
51 unsigned int BlockHeight256BytesY;
52 unsigned int BlockWidth256BytesC;
53 unsigned int BlockHeight256BytesC;
54 unsigned int InterlaceEnable;
55 unsigned int NumberOfCursors;
56 unsigned int VBlank;
57 unsigned int HTotal;
58 unsigned int DCCEnable;
59 bool ODMCombineEnabled;
60 } Pipe;
61
62 #define BPP_INVALID 0
63 #define BPP_BLENDED_PIPE 0xffffffff
64 #define DCN30_MAX_DSC_IMAGE_WIDTH 5184
65 #define DCN30_MAX_FMT_420_BUFFER_WIDTH 4096
66
67 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
68 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
69 struct display_mode_lib *mode_lib);
70 static unsigned int dscceComputeDelay(
71 unsigned int bpc,
72 double BPP,
73 unsigned int sliceWidth,
74 unsigned int numSlices,
75 enum output_format_class pixelFormat,
76 enum output_encoder_class Output);
77 static unsigned int dscComputeDelay(
78 enum output_format_class pixelFormat,
79 enum output_encoder_class Output);
80 // Super monster function with some 45 argument
81 static bool CalculatePrefetchSchedule(
82 struct display_mode_lib *mode_lib,
83 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
84 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
85 Pipe *myPipe,
86 unsigned int DSCDelay,
87 double DPPCLKDelaySubtotalPlusCNVCFormater,
88 double DPPCLKDelaySCL,
89 double DPPCLKDelaySCLLBOnly,
90 double DPPCLKDelayCNVCCursor,
91 double DISPCLKDelaySubtotal,
92 unsigned int DPP_RECOUT_WIDTH,
93 enum output_format_class OutputFormat,
94 unsigned int MaxInterDCNTileRepeaters,
95 unsigned int VStartup,
96 unsigned int MaxVStartup,
97 unsigned int GPUVMPageTableLevels,
98 bool GPUVMEnable,
99 bool HostVMEnable,
100 unsigned int HostVMMaxNonCachedPageTableLevels,
101 double HostVMMinPageSize,
102 bool DynamicMetadataEnable,
103 bool DynamicMetadataVMEnabled,
104 int DynamicMetadataLinesBeforeActiveRequired,
105 unsigned int DynamicMetadataTransmittedBytes,
106 double UrgentLatency,
107 double UrgentExtraLatency,
108 double TCalc,
109 unsigned int PDEAndMetaPTEBytesFrame,
110 unsigned int MetaRowByte,
111 unsigned int PixelPTEBytesPerRow,
112 double PrefetchSourceLinesY,
113 unsigned int SwathWidthY,
114 int BytePerPixelY,
115 double VInitPreFillY,
116 unsigned int MaxNumSwathY,
117 double PrefetchSourceLinesC,
118 unsigned int SwathWidthC,
119 int BytePerPixelC,
120 double VInitPreFillC,
121 unsigned int MaxNumSwathC,
122 long swath_width_luma_ub,
123 long swath_width_chroma_ub,
124 unsigned int SwathHeightY,
125 unsigned int SwathHeightC,
126 double TWait,
127 bool ProgressiveToInterlaceUnitInOPP,
128 double *DSTXAfterScaler,
129 double *DSTYAfterScaler,
130 double *DestinationLinesForPrefetch,
131 double *PrefetchBandwidth,
132 double *DestinationLinesToRequestVMInVBlank,
133 double *DestinationLinesToRequestRowInVBlank,
134 double *VRatioPrefetchY,
135 double *VRatioPrefetchC,
136 double *RequiredPrefetchPixDataBWLuma,
137 double *RequiredPrefetchPixDataBWChroma,
138 bool *NotEnoughTimeForDynamicMetadata,
139 double *Tno_bw,
140 double *prefetch_vmrow_bw,
141 double *Tdmdl_vm,
142 double *Tdmdl,
143 unsigned int *VUpdateOffsetPix,
144 double *VUpdateWidthPix,
145 double *VReadyOffsetPix);
146 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
147 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
148 static void CalculateDCCConfiguration(
149 bool DCCEnabled,
150 bool DCCProgrammingAssumesScanDirectionUnknown,
151 enum source_format_class SourcePixelFormat,
152 unsigned int ViewportWidthLuma,
153 unsigned int ViewportWidthChroma,
154 unsigned int ViewportHeightLuma,
155 unsigned int ViewportHeightChroma,
156 double DETBufferSize,
157 unsigned int RequestHeight256ByteLuma,
158 unsigned int RequestHeight256ByteChroma,
159 enum dm_swizzle_mode TilingFormat,
160 unsigned int BytePerPixelY,
161 unsigned int BytePerPixelC,
162 double BytePerPixelDETY,
163 double BytePerPixelDETC,
164 enum scan_direction_class ScanOrientation,
165 unsigned int *MaxUncompressedBlockLuma,
166 unsigned int *MaxUncompressedBlockChroma,
167 unsigned int *MaxCompressedBlockLuma,
168 unsigned int *MaxCompressedBlockChroma,
169 unsigned int *IndependentBlockLuma,
170 unsigned int *IndependentBlockChroma);
171 static double CalculatePrefetchSourceLines(
172 struct display_mode_lib *mode_lib,
173 double VRatio,
174 double vtaps,
175 bool Interlace,
176 bool ProgressiveToInterlaceUnitInOPP,
177 unsigned int SwathHeight,
178 unsigned int ViewportYStart,
179 double *VInitPreFill,
180 unsigned int *MaxNumSwath);
181 static unsigned int CalculateVMAndRowBytes(
182 struct display_mode_lib *mode_lib,
183 bool DCCEnable,
184 unsigned int BlockHeight256Bytes,
185 unsigned int BlockWidth256Bytes,
186 enum source_format_class SourcePixelFormat,
187 unsigned int SurfaceTiling,
188 unsigned int BytePerPixel,
189 enum scan_direction_class ScanDirection,
190 unsigned int SwathWidth,
191 unsigned int ViewportHeight,
192 bool GPUVMEnable,
193 bool HostVMEnable,
194 unsigned int HostVMMaxNonCachedPageTableLevels,
195 unsigned int GPUVMMinPageSize,
196 unsigned int HostVMMinPageSize,
197 unsigned int PTEBufferSizeInRequests,
198 unsigned int Pitch,
199 unsigned int DCCMetaPitch,
200 unsigned int *MacroTileWidth,
201 unsigned int *MetaRowByte,
202 unsigned int *PixelPTEBytesPerRow,
203 bool *PTEBufferSizeNotExceeded,
204 unsigned int *dpte_row_width_ub,
205 unsigned int *dpte_row_height,
206 unsigned int *MetaRequestWidth,
207 unsigned int *MetaRequestHeight,
208 unsigned int *meta_row_width,
209 unsigned int *meta_row_height,
210 unsigned int *vm_group_bytes,
211 unsigned int *dpte_group_bytes,
212 unsigned int *PixelPTEReqWidth,
213 unsigned int *PixelPTEReqHeight,
214 unsigned int *PTERequestSize,
215 unsigned int *DPDE0BytesFrame,
216 unsigned int *MetaPTEBytesFrame);
217 static double CalculateTWait(
218 unsigned int PrefetchMode,
219 double DRAMClockChangeLatency,
220 double UrgentLatency,
221 double SREnterPlusExitTime);
222 static void CalculateRowBandwidth(
223 bool GPUVMEnable,
224 enum source_format_class SourcePixelFormat,
225 double VRatio,
226 double VRatioChroma,
227 bool DCCEnable,
228 double LineTime,
229 unsigned int MetaRowByteLuma,
230 unsigned int MetaRowByteChroma,
231 unsigned int meta_row_height_luma,
232 unsigned int meta_row_height_chroma,
233 unsigned int PixelPTEBytesPerRowLuma,
234 unsigned int PixelPTEBytesPerRowChroma,
235 unsigned int dpte_row_height_luma,
236 unsigned int dpte_row_height_chroma,
237 double *meta_row_bw,
238 double *dpte_row_bw);
239 static void CalculateFlipSchedule(
240 struct display_mode_lib *mode_lib,
241 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
242 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
243 double UrgentExtraLatency,
244 double UrgentLatency,
245 unsigned int GPUVMMaxPageTableLevels,
246 bool HostVMEnable,
247 unsigned int HostVMMaxNonCachedPageTableLevels,
248 bool GPUVMEnable,
249 double HostVMMinPageSize,
250 double PDEAndMetaPTEBytesPerFrame,
251 double MetaRowBytes,
252 double DPTEBytesPerRow,
253 double BandwidthAvailableForImmediateFlip,
254 unsigned int TotImmediateFlipBytes,
255 enum source_format_class SourcePixelFormat,
256 double LineTime,
257 double VRatio,
258 double VRatioChroma,
259 double Tno_bw,
260 bool DCCEnable,
261 unsigned int dpte_row_height,
262 unsigned int meta_row_height,
263 unsigned int dpte_row_height_chroma,
264 unsigned int meta_row_height_chroma,
265 double *DestinationLinesToRequestVMInImmediateFlip,
266 double *DestinationLinesToRequestRowInImmediateFlip,
267 double *final_flip_bw,
268 bool *ImmediateFlipSupportedForPipe);
269 static double CalculateWriteBackDelay(
270 enum source_format_class WritebackPixelFormat,
271 double WritebackHRatio,
272 double WritebackVRatio,
273 unsigned int WritebackVTaps,
274 long WritebackDestinationWidth,
275 long WritebackDestinationHeight,
276 long WritebackSourceHeight,
277 unsigned int HTotal);
278 static void CalculateDynamicMetadataParameters(
279 int MaxInterDCNTileRepeaters,
280 double DPPCLK,
281 double DISPCLK,
282 double DCFClkDeepSleep,
283 double PixelClock,
284 long HTotal,
285 long VBlank,
286 long DynamicMetadataTransmittedBytes,
287 long DynamicMetadataLinesBeforeActiveRequired,
288 int InterlaceEnable,
289 bool ProgressiveToInterlaceUnitInOPP,
290 double *Tsetup,
291 double *Tdmbf,
292 double *Tdmec,
293 double *Tdmsks);
294 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
295 struct display_mode_lib *mode_lib,
296 unsigned int PrefetchMode,
297 unsigned int NumberOfActivePlanes,
298 unsigned int MaxLineBufferLines,
299 unsigned int LineBufferSize,
300 unsigned int DPPOutputBufferPixels,
301 unsigned int DETBufferSizeInKByte,
302 unsigned int WritebackInterfaceBufferSize,
303 double DCFCLK,
304 double ReturnBW,
305 bool GPUVMEnable,
306 unsigned int dpte_group_bytes[],
307 unsigned int MetaChunkSize,
308 double UrgentLatency,
309 double ExtraLatency,
310 double WritebackLatency,
311 double WritebackChunkSize,
312 double SOCCLK,
313 double DRAMClockChangeLatency,
314 double SRExitTime,
315 double SREnterPlusExitTime,
316 double DCFCLKDeepSleep,
317 unsigned int DPPPerPlane[],
318 bool DCCEnable[],
319 double DPPCLK[],
320 unsigned int DETBufferSizeY[],
321 unsigned int DETBufferSizeC[],
322 unsigned int SwathHeightY[],
323 unsigned int SwathHeightC[],
324 unsigned int LBBitPerPixel[],
325 double SwathWidthY[],
326 double SwathWidthC[],
327 double HRatio[],
328 double HRatioChroma[],
329 unsigned int vtaps[],
330 unsigned int VTAPsChroma[],
331 double VRatio[],
332 double VRatioChroma[],
333 unsigned int HTotal[],
334 double PixelClock[],
335 unsigned int BlendingAndTiming[],
336 double BytePerPixelDETY[],
337 double BytePerPixelDETC[],
338 double DSTXAfterScaler[],
339 double DSTYAfterScaler[],
340 bool WritebackEnable[],
341 enum source_format_class WritebackPixelFormat[],
342 double WritebackDestinationWidth[],
343 double WritebackDestinationHeight[],
344 double WritebackSourceHeight[],
345 enum clock_change_support *DRAMClockChangeSupport,
346 double *UrgentWatermark,
347 double *WritebackUrgentWatermark,
348 double *DRAMClockChangeWatermark,
349 double *WritebackDRAMClockChangeWatermark,
350 double *StutterExitWatermark,
351 double *StutterEnterPlusExitWatermark,
352 double *MinActiveDRAMClockChangeLatencySupported);
353 static void CalculateDCFCLKDeepSleep(
354 struct display_mode_lib *mode_lib,
355 unsigned int NumberOfActivePlanes,
356 int BytePerPixelY[],
357 int BytePerPixelC[],
358 double VRatio[],
359 double VRatioChroma[],
360 double SwathWidthY[],
361 double SwathWidthC[],
362 unsigned int DPPPerPlane[],
363 double HRatio[],
364 double HRatioChroma[],
365 double PixelClock[],
366 double PSCL_THROUGHPUT[],
367 double PSCL_THROUGHPUT_CHROMA[],
368 double DPPCLK[],
369 double ReadBandwidthLuma[],
370 double ReadBandwidthChroma[],
371 int ReturnBusWidth,
372 double *DCFCLKDeepSleep);
373 static void CalculateUrgentBurstFactor(
374 long swath_width_luma_ub,
375 long swath_width_chroma_ub,
376 unsigned int DETBufferSizeInKByte,
377 unsigned int SwathHeightY,
378 unsigned int SwathHeightC,
379 double LineTime,
380 double UrgentLatency,
381 double CursorBufferSize,
382 unsigned int CursorWidth,
383 unsigned int CursorBPP,
384 double VRatio,
385 double VRatioC,
386 double BytePerPixelInDETY,
387 double BytePerPixelInDETC,
388 double DETBufferSizeY,
389 double DETBufferSizeC,
390 double *UrgentBurstFactorCursor,
391 double *UrgentBurstFactorLuma,
392 double *UrgentBurstFactorChroma,
393 bool *NotEnoughUrgentLatencyHiding);
394
395 static void UseMinimumDCFCLK(
396 struct display_mode_lib *mode_lib,
397 struct vba_vars_st *v,
398 int MaxPrefetchMode,
399 int ReorderingBytes);
400
401 static void CalculatePixelDeliveryTimes(
402 unsigned int NumberOfActivePlanes,
403 double VRatio[],
404 double VRatioChroma[],
405 double VRatioPrefetchY[],
406 double VRatioPrefetchC[],
407 unsigned int swath_width_luma_ub[],
408 unsigned int swath_width_chroma_ub[],
409 unsigned int DPPPerPlane[],
410 double HRatio[],
411 double HRatioChroma[],
412 double PixelClock[],
413 double PSCL_THROUGHPUT[],
414 double PSCL_THROUGHPUT_CHROMA[],
415 double DPPCLK[],
416 int BytePerPixelC[],
417 enum scan_direction_class SourceScan[],
418 unsigned int NumberOfCursors[],
419 unsigned int CursorWidth[][2],
420 unsigned int CursorBPP[][2],
421 unsigned int BlockWidth256BytesY[],
422 unsigned int BlockHeight256BytesY[],
423 unsigned int BlockWidth256BytesC[],
424 unsigned int BlockHeight256BytesC[],
425 double DisplayPipeLineDeliveryTimeLuma[],
426 double DisplayPipeLineDeliveryTimeChroma[],
427 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
428 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
429 double DisplayPipeRequestDeliveryTimeLuma[],
430 double DisplayPipeRequestDeliveryTimeChroma[],
431 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
432 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
433 double CursorRequestDeliveryTime[],
434 double CursorRequestDeliveryTimePrefetch[]);
435
436 static void CalculateMetaAndPTETimes(
437 int NumberOfActivePlanes,
438 bool GPUVMEnable,
439 int MetaChunkSize,
440 int MinMetaChunkSizeBytes,
441 int HTotal[],
442 double VRatio[],
443 double VRatioChroma[],
444 double DestinationLinesToRequestRowInVBlank[],
445 double DestinationLinesToRequestRowInImmediateFlip[],
446 bool DCCEnable[],
447 double PixelClock[],
448 int BytePerPixelY[],
449 int BytePerPixelC[],
450 enum scan_direction_class SourceScan[],
451 int dpte_row_height[],
452 int dpte_row_height_chroma[],
453 int meta_row_width[],
454 int meta_row_width_chroma[],
455 int meta_row_height[],
456 int meta_row_height_chroma[],
457 int meta_req_width[],
458 int meta_req_width_chroma[],
459 int meta_req_height[],
460 int meta_req_height_chroma[],
461 int dpte_group_bytes[],
462 int PTERequestSizeY[],
463 int PTERequestSizeC[],
464 int PixelPTEReqWidthY[],
465 int PixelPTEReqHeightY[],
466 int PixelPTEReqWidthC[],
467 int PixelPTEReqHeightC[],
468 int dpte_row_width_luma_ub[],
469 int dpte_row_width_chroma_ub[],
470 double DST_Y_PER_PTE_ROW_NOM_L[],
471 double DST_Y_PER_PTE_ROW_NOM_C[],
472 double DST_Y_PER_META_ROW_NOM_L[],
473 double DST_Y_PER_META_ROW_NOM_C[],
474 double TimePerMetaChunkNominal[],
475 double TimePerChromaMetaChunkNominal[],
476 double TimePerMetaChunkVBlank[],
477 double TimePerChromaMetaChunkVBlank[],
478 double TimePerMetaChunkFlip[],
479 double TimePerChromaMetaChunkFlip[],
480 double time_per_pte_group_nom_luma[],
481 double time_per_pte_group_vblank_luma[],
482 double time_per_pte_group_flip_luma[],
483 double time_per_pte_group_nom_chroma[],
484 double time_per_pte_group_vblank_chroma[],
485 double time_per_pte_group_flip_chroma[]);
486
487 static void CalculateVMGroupAndRequestTimes(
488 unsigned int NumberOfActivePlanes,
489 bool GPUVMEnable,
490 unsigned int GPUVMMaxPageTableLevels,
491 unsigned int HTotal[],
492 int BytePerPixelC[],
493 double DestinationLinesToRequestVMInVBlank[],
494 double DestinationLinesToRequestVMInImmediateFlip[],
495 bool DCCEnable[],
496 double PixelClock[],
497 int dpte_row_width_luma_ub[],
498 int dpte_row_width_chroma_ub[],
499 int vm_group_bytes[],
500 unsigned int dpde0_bytes_per_frame_ub_l[],
501 unsigned int dpde0_bytes_per_frame_ub_c[],
502 int meta_pte_bytes_per_frame_ub_l[],
503 int meta_pte_bytes_per_frame_ub_c[],
504 double TimePerVMGroupVBlank[],
505 double TimePerVMGroupFlip[],
506 double TimePerVMRequestVBlank[],
507 double TimePerVMRequestFlip[]);
508
509 static void CalculateStutterEfficiency(
510 int NumberOfActivePlanes,
511 long ROBBufferSizeInKByte,
512 double TotalDataReadBandwidth,
513 double DCFCLK,
514 double ReturnBW,
515 double SRExitTime,
516 bool SynchronizedVBlank,
517 int DPPPerPlane[],
518 unsigned int DETBufferSizeY[],
519 int BytePerPixelY[],
520 double BytePerPixelDETY[],
521 double SwathWidthY[],
522 int SwathHeightY[],
523 int SwathHeightC[],
524 double DCCRateLuma[],
525 double DCCRateChroma[],
526 int HTotal[],
527 int VTotal[],
528 double PixelClock[],
529 double VRatio[],
530 enum scan_direction_class SourceScan[],
531 int BlockHeight256BytesY[],
532 int BlockWidth256BytesY[],
533 int BlockHeight256BytesC[],
534 int BlockWidth256BytesC[],
535 int DCCYMaxUncompressedBlock[],
536 int DCCCMaxUncompressedBlock[],
537 int VActive[],
538 bool DCCEnable[],
539 bool WritebackEnable[],
540 double ReadBandwidthPlaneLuma[],
541 double ReadBandwidthPlaneChroma[],
542 double meta_row_bw[],
543 double dpte_row_bw[],
544 double *StutterEfficiencyNotIncludingVBlank,
545 double *StutterEfficiency,
546 double *StutterPeriodOut);
547
548 static void CalculateSwathAndDETConfiguration(
549 bool ForceSingleDPP,
550 int NumberOfActivePlanes,
551 unsigned int DETBufferSizeInKByte,
552 double MaximumSwathWidthLuma[],
553 double MaximumSwathWidthChroma[],
554 enum scan_direction_class SourceScan[],
555 enum source_format_class SourcePixelFormat[],
556 enum dm_swizzle_mode SurfaceTiling[],
557 int ViewportWidth[],
558 int ViewportHeight[],
559 int SurfaceWidthY[],
560 int SurfaceWidthC[],
561 int SurfaceHeightY[],
562 int SurfaceHeightC[],
563 int Read256BytesBlockHeightY[],
564 int Read256BytesBlockHeightC[],
565 int Read256BytesBlockWidthY[],
566 int Read256BytesBlockWidthC[],
567 enum odm_combine_mode ODMCombineEnabled[],
568 int BlendingAndTiming[],
569 int BytePerPixY[],
570 int BytePerPixC[],
571 double BytePerPixDETY[],
572 double BytePerPixDETC[],
573 int HActive[],
574 double HRatio[],
575 double HRatioChroma[],
576 int DPPPerPlane[],
577 int swath_width_luma_ub[],
578 int swath_width_chroma_ub[],
579 double SwathWidth[],
580 double SwathWidthChroma[],
581 int SwathHeightY[],
582 int SwathHeightC[],
583 unsigned int DETBufferSizeY[],
584 unsigned int DETBufferSizeC[],
585 bool ViewportSizeSupportPerPlane[],
586 bool *ViewportSizeSupport);
587 static void CalculateSwathWidth(
588 bool ForceSingleDPP,
589 int NumberOfActivePlanes,
590 enum source_format_class SourcePixelFormat[],
591 enum scan_direction_class SourceScan[],
592 unsigned int ViewportWidth[],
593 unsigned int ViewportHeight[],
594 unsigned int SurfaceWidthY[],
595 unsigned int SurfaceWidthC[],
596 unsigned int SurfaceHeightY[],
597 unsigned int SurfaceHeightC[],
598 enum odm_combine_mode ODMCombineEnabled[],
599 int BytePerPixY[],
600 int BytePerPixC[],
601 int Read256BytesBlockHeightY[],
602 int Read256BytesBlockHeightC[],
603 int Read256BytesBlockWidthY[],
604 int Read256BytesBlockWidthC[],
605 int BlendingAndTiming[],
606 unsigned int HActive[],
607 double HRatio[],
608 int DPPPerPlane[],
609 double SwathWidthSingleDPPY[],
610 double SwathWidthSingleDPPC[],
611 double SwathWidthY[],
612 double SwathWidthC[],
613 int MaximumSwathHeightY[],
614 int MaximumSwathHeightC[],
615 unsigned int swath_width_luma_ub[],
616 unsigned int swath_width_chroma_ub[]);
617 static double CalculateExtraLatency(
618 long RoundTripPingLatencyCycles,
619 long ReorderingBytes,
620 double DCFCLK,
621 int TotalNumberOfActiveDPP,
622 int PixelChunkSizeInKByte,
623 int TotalNumberOfDCCActiveDPP,
624 int MetaChunkSize,
625 double ReturnBW,
626 bool GPUVMEnable,
627 bool HostVMEnable,
628 int NumberOfActivePlanes,
629 int NumberOfDPP[],
630 int dpte_group_bytes[],
631 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
632 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
633 double HostVMMinPageSize,
634 int HostVMMaxNonCachedPageTableLevels);
635 static double CalculateExtraLatencyBytes(
636 long ReorderingBytes,
637 int TotalNumberOfActiveDPP,
638 int PixelChunkSizeInKByte,
639 int TotalNumberOfDCCActiveDPP,
640 int MetaChunkSize,
641 bool GPUVMEnable,
642 bool HostVMEnable,
643 int NumberOfActivePlanes,
644 int NumberOfDPP[],
645 int dpte_group_bytes[],
646 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
647 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
648 double HostVMMinPageSize,
649 int HostVMMaxNonCachedPageTableLevels);
650 static double CalculateUrgentLatency(
651 double UrgentLatencyPixelDataOnly,
652 double UrgentLatencyPixelMixedWithVMData,
653 double UrgentLatencyVMDataOnly,
654 bool DoUrgentLatencyAdjustment,
655 double UrgentLatencyAdjustmentFabricClockComponent,
656 double UrgentLatencyAdjustmentFabricClockReference,
657 double FabricClockSingle);
658
dml30_recalculate(struct display_mode_lib * mode_lib)659 void dml30_recalculate(struct display_mode_lib *mode_lib)
660 {
661 ModeSupportAndSystemConfiguration(mode_lib);
662 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
663 DisplayPipeConfiguration(mode_lib);
664 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
665 }
666
dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat,enum output_encoder_class Output)667 static unsigned int dscceComputeDelay(
668 unsigned int bpc,
669 double BPP,
670 unsigned int sliceWidth,
671 unsigned int numSlices,
672 enum output_format_class pixelFormat,
673 enum output_encoder_class Output)
674 {
675 // valid bpc = source bits per component in the set of {8, 10, 12}
676 // valid bpp = increments of 1/16 of a bit
677 // min = 6/7/8 in N420/N422/444, respectively
678 // max = such that compression is 1:1
679 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
680 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
681 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
682
683 // fixed value
684 unsigned int rcModelSize = 8192;
685
686 // N422/N420 operate at 2 pixels per clock
687 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L,
688 Delay, pixels;
689
690 if (pixelFormat == dm_420)
691 pixelsPerClock = 2;
692 // #all other modes operate at 1 pixel per clock
693 else if (pixelFormat == dm_444)
694 pixelsPerClock = 1;
695 else if (pixelFormat == dm_n422)
696 pixelsPerClock = 2;
697 else
698 pixelsPerClock = 1;
699
700 //initial transmit delay as per PPS
701 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
702
703 //compute ssm delay
704 if (bpc == 8)
705 D = 81;
706 else if (bpc == 10)
707 D = 89;
708 else
709 D = 113;
710
711 //divide by pixel per cycle to compute slice width as seen by DSC
712 w = sliceWidth / pixelsPerClock;
713
714 //422 mode has an additional cycle of delay
715 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
716 s = 0;
717 else
718 s = 1;
719
720 //main calculation for the dscce
721 ix = initalXmitDelay + 45;
722 wx = (w + 2) / 3;
723 P = 3 * wx - w;
724 l0 = ix / w;
725 a = ix + P * l0;
726 ax = (a + 2) / 3 + D + 6 + 1;
727 L = (ax + wx - 1) / wx;
728 if ((ix % w) == 0 && P != 0)
729 lstall = 1;
730 else
731 lstall = 0;
732 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
733
734 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
735 pixels = Delay * 3 * pixelsPerClock;
736 return pixels;
737 }
738
dscComputeDelay(enum output_format_class pixelFormat,enum output_encoder_class Output)739 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
740 {
741 unsigned int Delay = 0;
742
743 if (pixelFormat == dm_420) {
744 // sfr
745 Delay = Delay + 2;
746 // dsccif
747 Delay = Delay + 0;
748 // dscc - input deserializer
749 Delay = Delay + 3;
750 // dscc gets pixels every other cycle
751 Delay = Delay + 2;
752 // dscc - input cdc fifo
753 Delay = Delay + 12;
754 // dscc gets pixels every other cycle
755 Delay = Delay + 13;
756 // dscc - cdc uncertainty
757 Delay = Delay + 2;
758 // dscc - output cdc fifo
759 Delay = Delay + 7;
760 // dscc gets pixels every other cycle
761 Delay = Delay + 3;
762 // dscc - cdc uncertainty
763 Delay = Delay + 2;
764 // dscc - output serializer
765 Delay = Delay + 1;
766 // sft
767 Delay = Delay + 1;
768 } else if (pixelFormat == dm_n422) {
769 // sfr
770 Delay = Delay + 2;
771 // dsccif
772 Delay = Delay + 1;
773 // dscc - input deserializer
774 Delay = Delay + 5;
775 // dscc - input cdc fifo
776 Delay = Delay + 25;
777 // dscc - cdc uncertainty
778 Delay = Delay + 2;
779 // dscc - output cdc fifo
780 Delay = Delay + 10;
781 // dscc - cdc uncertainty
782 Delay = Delay + 2;
783 // dscc - output serializer
784 Delay = Delay + 1;
785 // sft
786 Delay = Delay + 1;
787 } else {
788 // sfr
789 Delay = Delay + 2;
790 // dsccif
791 Delay = Delay + 0;
792 // dscc - input deserializer
793 Delay = Delay + 3;
794 // dscc - input cdc fifo
795 Delay = Delay + 12;
796 // dscc - cdc uncertainty
797 Delay = Delay + 2;
798 // dscc - output cdc fifo
799 Delay = Delay + 7;
800 // dscc - output serializer
801 Delay = Delay + 1;
802 // dscc - cdc uncertainty
803 Delay = Delay + 2;
804 // sft
805 Delay = Delay + 1;
806 }
807
808 return Delay;
809 }
810
CalculatePrefetchSchedule(struct display_mode_lib * mode_lib,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,Pipe * myPipe,unsigned int DSCDelay,double DPPCLKDelaySubtotalPlusCNVCFormater,double DPPCLKDelaySCL,double DPPCLKDelaySCLLBOnly,double DPPCLKDelayCNVCCursor,double DISPCLKDelaySubtotal,unsigned int DPP_RECOUT_WIDTH,enum output_format_class OutputFormat,unsigned int MaxInterDCNTileRepeaters,unsigned int VStartup,unsigned int MaxVStartup,unsigned int GPUVMPageTableLevels,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,double HostVMMinPageSize,bool DynamicMetadataEnable,bool DynamicMetadataVMEnabled,int DynamicMetadataLinesBeforeActiveRequired,unsigned int DynamicMetadataTransmittedBytes,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,int BytePerPixelY,double VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,unsigned int SwathWidthC,int BytePerPixelC,double VInitPreFillC,unsigned int MaxNumSwathC,long swath_width_luma_ub,long swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,bool ProgressiveToInterlaceUnitInOPP,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,bool * NotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,double * Tdmdl_vm,double * Tdmdl,unsigned int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)811 static bool CalculatePrefetchSchedule(
812 struct display_mode_lib *mode_lib,
813 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
814 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
815 Pipe *myPipe,
816 unsigned int DSCDelay,
817 double DPPCLKDelaySubtotalPlusCNVCFormater,
818 double DPPCLKDelaySCL,
819 double DPPCLKDelaySCLLBOnly,
820 double DPPCLKDelayCNVCCursor,
821 double DISPCLKDelaySubtotal,
822 unsigned int DPP_RECOUT_WIDTH,
823 enum output_format_class OutputFormat,
824 unsigned int MaxInterDCNTileRepeaters,
825 unsigned int VStartup,
826 unsigned int MaxVStartup,
827 unsigned int GPUVMPageTableLevels,
828 bool GPUVMEnable,
829 bool HostVMEnable,
830 unsigned int HostVMMaxNonCachedPageTableLevels,
831 double HostVMMinPageSize,
832 bool DynamicMetadataEnable,
833 bool DynamicMetadataVMEnabled,
834 int DynamicMetadataLinesBeforeActiveRequired,
835 unsigned int DynamicMetadataTransmittedBytes,
836 double UrgentLatency,
837 double UrgentExtraLatency,
838 double TCalc,
839 unsigned int PDEAndMetaPTEBytesFrame,
840 unsigned int MetaRowByte,
841 unsigned int PixelPTEBytesPerRow,
842 double PrefetchSourceLinesY,
843 unsigned int SwathWidthY,
844 int BytePerPixelY,
845 double VInitPreFillY,
846 unsigned int MaxNumSwathY,
847 double PrefetchSourceLinesC,
848 unsigned int SwathWidthC,
849 int BytePerPixelC,
850 double VInitPreFillC,
851 unsigned int MaxNumSwathC,
852 long swath_width_luma_ub,
853 long swath_width_chroma_ub,
854 unsigned int SwathHeightY,
855 unsigned int SwathHeightC,
856 double TWait,
857 bool ProgressiveToInterlaceUnitInOPP,
858 double *DSTXAfterScaler,
859 double *DSTYAfterScaler,
860 double *DestinationLinesForPrefetch,
861 double *PrefetchBandwidth,
862 double *DestinationLinesToRequestVMInVBlank,
863 double *DestinationLinesToRequestRowInVBlank,
864 double *VRatioPrefetchY,
865 double *VRatioPrefetchC,
866 double *RequiredPrefetchPixDataBWLuma,
867 double *RequiredPrefetchPixDataBWChroma,
868 bool *NotEnoughTimeForDynamicMetadata,
869 double *Tno_bw,
870 double *prefetch_vmrow_bw,
871 double *Tdmdl_vm,
872 double *Tdmdl,
873 unsigned int *VUpdateOffsetPix,
874 double *VUpdateWidthPix,
875 double *VReadyOffsetPix)
876 {
877 bool MyError = false;
878 unsigned int DPPCycles = 0, DISPCLKCycles = 0;
879 double DSTTotalPixelsAfterScaler = 0;
880 double LineTime = 0, Tsetup = 0;
881 double dst_y_prefetch_equ = 0;
882 double Tsw_oto = 0;
883 double prefetch_bw_oto = 0;
884 double Tvm_oto = 0;
885 double Tr0_oto = 0;
886 double Tvm_oto_lines = 0;
887 double Tr0_oto_lines = 0;
888 double dst_y_prefetch_oto = 0;
889 double TimeForFetchingMetaPTE = 0;
890 double TimeForFetchingRowInVBlank = 0;
891 double LinesToRequestPrefetchPixelData = 0;
892 double HostVMInefficiencyFactor = 0;
893 unsigned int HostVMDynamicLevelsTrips = 0;
894 double trip_to_mem = 0;
895 double Tvm_trips = 0;
896 double Tr0_trips = 0;
897 double Tvm_trips_rounded = 0;
898 double Tr0_trips_rounded = 0;
899 double Lsw_oto = 0;
900 double Tpre_rounded = 0;
901 double prefetch_bw_equ = 0;
902 double Tvm_equ = 0;
903 double Tr0_equ = 0;
904 double Tdmbf = 0;
905 double Tdmec = 0;
906 double Tdmsks = 0;
907
908 if (GPUVMEnable == true && HostVMEnable == true) {
909 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
910 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
911 } else {
912 HostVMInefficiencyFactor = 1;
913 HostVMDynamicLevelsTrips = 0;
914 }
915
916 CalculateDynamicMetadataParameters(
917 MaxInterDCNTileRepeaters,
918 myPipe->DPPCLK,
919 myPipe->DISPCLK,
920 myPipe->DCFCLKDeepSleep,
921 myPipe->PixelClock,
922 myPipe->HTotal,
923 myPipe->VBlank,
924 DynamicMetadataTransmittedBytes,
925 DynamicMetadataLinesBeforeActiveRequired,
926 myPipe->InterlaceEnable,
927 ProgressiveToInterlaceUnitInOPP,
928 &Tsetup,
929 &Tdmbf,
930 &Tdmec,
931 &Tdmsks);
932
933 LineTime = myPipe->HTotal / myPipe->PixelClock;
934 trip_to_mem = UrgentLatency;
935 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
936
937 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
938 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
939 } else {
940 *Tdmdl = TWait + UrgentExtraLatency;
941 }
942
943 if (DynamicMetadataEnable == true) {
944 if (VStartup * LineTime < Tsetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
945 *NotEnoughTimeForDynamicMetadata = true;
946 } else {
947 *NotEnoughTimeForDynamicMetadata = false;
948 dml_print("DML: Not Enough Time for Dynamic Meta!\n");
949 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
950 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
951 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
952 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
953 }
954 } else {
955 *NotEnoughTimeForDynamicMetadata = false;
956 }
957
958 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
959
960 if (myPipe->ScalerEnabled)
961 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
962 else
963 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
964
965 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
966
967 DISPCLKCycles = DISPCLKDelaySubtotal;
968
969 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
970 return true;
971
972 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK
973 + DSCDelay;
974
975 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineEnabled)?18:0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
976
977 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && ProgressiveToInterlaceUnitInOPP))
978 *DSTYAfterScaler = 1;
979 else
980 *DSTYAfterScaler = 0;
981
982 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
983 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
984 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
985
986 MyError = false;
987
988
989 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
990 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
991 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
992
993 if (GPUVMEnable) {
994 if (GPUVMPageTableLevels >= 3) {
995 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
996 } else
997 *Tno_bw = 0;
998 } else if (!myPipe->DCCEnable)
999 *Tno_bw = LineTime;
1000 else
1001 *Tno_bw = LineTime / 4;
1002
1003 dst_y_prefetch_equ = VStartup - (Tsetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime
1004 - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1005
1006 Lsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC);
1007 Tsw_oto = Lsw_oto * LineTime;
1008
1009 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC) / Tsw_oto;
1010
1011 if (GPUVMEnable == true) {
1012 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
1013 Tvm_trips,
1014 LineTime / 4.0);
1015 } else
1016 Tvm_oto = LineTime / 4.0;
1017
1018 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1019 Tr0_oto = dml_max3(
1020 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
1021 LineTime - Tvm_oto, LineTime / 4);
1022 } else
1023 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1024
1025 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1026 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1027 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1028
1029 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1030 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1031
1032 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1033 dml_print("DML: dst_y_prefetch_equ: %f\n", dst_y_prefetch_equ);
1034
1035 dml_print("DML: LineTime: %f\n", LineTime);
1036 dml_print("DML: VStartup: %d\n", VStartup);
1037 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1038 dml_print("DML: Tsetup: %fus - time from vstartup to vready\n", Tsetup);
1039 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1040 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1041 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1042 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1043 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1044 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm);
1045 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1046 dml_print("DML: dst_x_after_scl: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler);
1047 dml_print("DML: dst_y_after_scl: %d lines - number of lines of pipeline and buffer delay after scaler \n", (int)*DSTYAfterScaler);
1048
1049 *PrefetchBandwidth = 0;
1050 *DestinationLinesToRequestVMInVBlank = 0;
1051 *DestinationLinesToRequestRowInVBlank = 0;
1052 *VRatioPrefetchY = 0;
1053 *VRatioPrefetchC = 0;
1054 *RequiredPrefetchPixDataBWLuma = 0;
1055 if (dst_y_prefetch_equ > 1) {
1056 double PrefetchBandwidth1 = 0;
1057 double PrefetchBandwidth2 = 0;
1058 double PrefetchBandwidth3 = 0;
1059 double PrefetchBandwidth4 = 0;
1060
1061 if (Tpre_rounded - *Tno_bw > 0)
1062 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
1063 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1064 + PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY
1065 + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC)
1066 / (Tpre_rounded - *Tno_bw);
1067 else
1068 PrefetchBandwidth1 = 0;
1069
1070 if (VStartup == MaxVStartup && (PrefetchBandwidth1 > 4 * prefetch_bw_oto) && (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - *Tno_bw) > 0) {
1071 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - *Tno_bw);
1072 }
1073
1074 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1075 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame *
1076 HostVMInefficiencyFactor + PrefetchSourceLinesY *
1077 swath_width_luma_ub * BytePerPixelY +
1078 PrefetchSourceLinesC * swath_width_chroma_ub *
1079 BytePerPixelC) /
1080 (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1081 else
1082 PrefetchBandwidth2 = 0;
1083
1084 if (Tpre_rounded - Tvm_trips_rounded > 0)
1085 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow *
1086 HostVMInefficiencyFactor + PrefetchSourceLinesY *
1087 swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC *
1088 swath_width_chroma_ub * BytePerPixelC) / (Tpre_rounded -
1089 Tvm_trips_rounded);
1090 else
1091 PrefetchBandwidth3 = 0;
1092
1093 if (VStartup == MaxVStartup && (PrefetchBandwidth3 > 4 * prefetch_bw_oto) && Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1094 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - Tvm_trips_rounded);
1095 }
1096
1097 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1098 PrefetchBandwidth4 = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC)
1099 / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1100 else
1101 PrefetchBandwidth4 = 0;
1102
1103 {
1104 bool Case1OK;
1105 bool Case2OK;
1106 bool Case3OK;
1107
1108 if (PrefetchBandwidth1 > 0) {
1109 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
1110 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1111 Case1OK = true;
1112 } else {
1113 Case1OK = false;
1114 }
1115 } else {
1116 Case1OK = false;
1117 }
1118
1119 if (PrefetchBandwidth2 > 0) {
1120 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
1121 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1122 Case2OK = true;
1123 } else {
1124 Case2OK = false;
1125 }
1126 } else {
1127 Case2OK = false;
1128 }
1129
1130 if (PrefetchBandwidth3 > 0) {
1131 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3
1132 < Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1133 Case3OK = true;
1134 } else {
1135 Case3OK = false;
1136 }
1137 } else {
1138 Case3OK = false;
1139 }
1140
1141 if (Case1OK) {
1142 prefetch_bw_equ = PrefetchBandwidth1;
1143 } else if (Case2OK) {
1144 prefetch_bw_equ = PrefetchBandwidth2;
1145 } else if (Case3OK) {
1146 prefetch_bw_equ = PrefetchBandwidth3;
1147 } else {
1148 prefetch_bw_equ = PrefetchBandwidth4;
1149 }
1150
1151 dml_print("DML: prefetch_bw_equ: %f\n", prefetch_bw_equ);
1152
1153 if (prefetch_bw_equ > 0) {
1154 if (GPUVMEnable) {
1155 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1156 } else {
1157 Tvm_equ = LineTime / 4;
1158 }
1159
1160 if ((GPUVMEnable || myPipe->DCCEnable)) {
1161 Tr0_equ = dml_max4(
1162 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1163 Tr0_trips,
1164 (LineTime - Tvm_equ) / 2,
1165 LineTime / 4);
1166 } else {
1167 Tr0_equ = (LineTime - Tvm_equ) / 2;
1168 }
1169 } else {
1170 Tvm_equ = 0;
1171 Tr0_equ = 0;
1172 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1173 }
1174 }
1175
1176 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1177 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1178 TimeForFetchingMetaPTE = Tvm_oto;
1179 TimeForFetchingRowInVBlank = Tr0_oto;
1180 *PrefetchBandwidth = prefetch_bw_oto;
1181 } else {
1182 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1183 TimeForFetchingMetaPTE = Tvm_equ;
1184 TimeForFetchingRowInVBlank = Tr0_equ;
1185 *PrefetchBandwidth = prefetch_bw_equ;
1186 }
1187
1188 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1189
1190 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1191
1192
1193 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank
1194 - 2 * *DestinationLinesToRequestRowInVBlank;
1195
1196 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1197
1198 *VRatioPrefetchY = (double) PrefetchSourceLinesY
1199 / LinesToRequestPrefetchPixelData;
1200 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1201 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1202 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1203 *VRatioPrefetchY = dml_max((double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1204 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1205 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1206 } else {
1207 MyError = true;
1208 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1209 *VRatioPrefetchY = 0;
1210 }
1211 }
1212
1213 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1214 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1215
1216 if ((SwathHeightC > 4)) {
1217 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1218 *VRatioPrefetchC = dml_max(*VRatioPrefetchC,
1219 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1220 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1221 } else {
1222 MyError = true;
1223 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1224 *VRatioPrefetchC = 0;
1225 }
1226 }
1227
1228 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * BytePerPixelY * swath_width_luma_ub / LineTime;
1229 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * BytePerPixelC * swath_width_chroma_ub / LineTime;
1230 } else {
1231 MyError = true;
1232 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1233 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1234 *VRatioPrefetchY = 0;
1235 *VRatioPrefetchC = 0;
1236 *RequiredPrefetchPixDataBWLuma = 0;
1237 *RequiredPrefetchPixDataBWChroma = 0;
1238 }
1239
1240 dml_print("DML: Tpre: %fus - sum of tim to request meta pte, 2 x data pte + meta data, swaths\n", (double)LinesToRequestPrefetchPixelData * LineTime + 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1241 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1242 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1243 dml_print("DML: Tr1: %fus - time to fetch second row of data pagetables and second row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1244 dml_print("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)LinesToRequestPrefetchPixelData * LineTime);
1245 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
1246 dml_print("DML: Tvstartup - Tsetup - Tcalc - Twait - Tpre - To > 0\n");
1247 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank - (*DSTYAfterScaler + ((*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - Tsetup);
1248 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1249
1250 } else {
1251 MyError = true;
1252 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1253 }
1254
1255 {
1256 double prefetch_vm_bw = 0;
1257 double prefetch_row_bw = 0;
1258
1259 if (PDEAndMetaPTEBytesFrame == 0) {
1260 prefetch_vm_bw = 0;
1261 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1262 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1263 } else {
1264 prefetch_vm_bw = 0;
1265 MyError = true;
1266 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1267 }
1268 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1269 prefetch_row_bw = 0;
1270 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1271 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1272 } else {
1273 prefetch_row_bw = 0;
1274 MyError = true;
1275 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1276 }
1277
1278 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1279 }
1280
1281 if (MyError) {
1282 *PrefetchBandwidth = 0;
1283 TimeForFetchingMetaPTE = 0;
1284 TimeForFetchingRowInVBlank = 0;
1285 *DestinationLinesToRequestVMInVBlank = 0;
1286 *DestinationLinesToRequestRowInVBlank = 0;
1287 *DestinationLinesForPrefetch = 0;
1288 LinesToRequestPrefetchPixelData = 0;
1289 *VRatioPrefetchY = 0;
1290 *VRatioPrefetchC = 0;
1291 *RequiredPrefetchPixDataBWLuma = 0;
1292 *RequiredPrefetchPixDataBWChroma = 0;
1293 }
1294
1295 return MyError;
1296 }
1297
RoundToDFSGranularityUp(double Clock,double VCOSpeed)1298 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1299 {
1300 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1301 }
1302
RoundToDFSGranularityDown(double Clock,double VCOSpeed)1303 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1304 {
1305 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1306 }
1307
CalculateDCCConfiguration(bool DCCEnabled,bool DCCProgrammingAssumesScanDirectionUnknown,enum source_format_class SourcePixelFormat,unsigned int SurfaceWidthLuma,unsigned int SurfaceWidthChroma,unsigned int SurfaceHeightLuma,unsigned int SurfaceHeightChroma,double DETBufferSize,unsigned int RequestHeight256ByteLuma,unsigned int RequestHeight256ByteChroma,enum dm_swizzle_mode TilingFormat,unsigned int BytePerPixelY,unsigned int BytePerPixelC,double BytePerPixelDETY,double BytePerPixelDETC,enum scan_direction_class ScanOrientation,unsigned int * MaxUncompressedBlockLuma,unsigned int * MaxUncompressedBlockChroma,unsigned int * MaxCompressedBlockLuma,unsigned int * MaxCompressedBlockChroma,unsigned int * IndependentBlockLuma,unsigned int * IndependentBlockChroma)1308 static void CalculateDCCConfiguration(
1309 bool DCCEnabled,
1310 bool DCCProgrammingAssumesScanDirectionUnknown,
1311 enum source_format_class SourcePixelFormat,
1312 unsigned int SurfaceWidthLuma,
1313 unsigned int SurfaceWidthChroma,
1314 unsigned int SurfaceHeightLuma,
1315 unsigned int SurfaceHeightChroma,
1316 double DETBufferSize,
1317 unsigned int RequestHeight256ByteLuma,
1318 unsigned int RequestHeight256ByteChroma,
1319 enum dm_swizzle_mode TilingFormat,
1320 unsigned int BytePerPixelY,
1321 unsigned int BytePerPixelC,
1322 double BytePerPixelDETY,
1323 double BytePerPixelDETC,
1324 enum scan_direction_class ScanOrientation,
1325 unsigned int *MaxUncompressedBlockLuma,
1326 unsigned int *MaxUncompressedBlockChroma,
1327 unsigned int *MaxCompressedBlockLuma,
1328 unsigned int *MaxCompressedBlockChroma,
1329 unsigned int *IndependentBlockLuma,
1330 unsigned int *IndependentBlockChroma)
1331 {
1332 int yuv420 = 0;
1333 int horz_div_l = 0;
1334 int horz_div_c = 0;
1335 int vert_div_l = 0;
1336 int vert_div_c = 0;
1337
1338 int req128_horz_wc_l = 0;
1339 int req128_horz_wc_c = 0;
1340 int req128_vert_wc_l = 0;
1341 int req128_vert_wc_c = 0;
1342 int segment_order_horz_contiguous_luma = 0;
1343 int segment_order_horz_contiguous_chroma = 0;
1344 int segment_order_vert_contiguous_luma = 0;
1345 int segment_order_vert_contiguous_chroma = 0;
1346
1347 long full_swath_bytes_horz_wc_l = 0;
1348 long full_swath_bytes_horz_wc_c = 0;
1349 long full_swath_bytes_vert_wc_l = 0;
1350 long full_swath_bytes_vert_wc_c = 0;
1351
1352 long swath_buf_size = 0;
1353 double detile_buf_vp_horz_limit = 0;
1354 double detile_buf_vp_vert_limit = 0;
1355
1356 long MAS_vp_horz_limit = 0;
1357 long MAS_vp_vert_limit = 0;
1358 long max_vp_horz_width = 0;
1359 long max_vp_vert_height = 0;
1360 long eff_surf_width_l = 0;
1361 long eff_surf_width_c = 0;
1362 long eff_surf_height_l = 0;
1363 long eff_surf_height_c = 0;
1364
1365 typedef enum {
1366 REQ_256Bytes,
1367 REQ_128BytesNonContiguous,
1368 REQ_128BytesContiguous,
1369 REQ_NA
1370 } RequestType;
1371
1372 RequestType RequestLuma;
1373 RequestType RequestChroma;
1374
1375 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1376 horz_div_l = 1;
1377 horz_div_c = 1;
1378 vert_div_l = 1;
1379 vert_div_c = 1;
1380
1381 if (BytePerPixelY == 1)
1382 vert_div_l = 0;
1383 if (BytePerPixelC == 1)
1384 vert_div_c = 0;
1385 if (BytePerPixelY == 8
1386 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t
1387 || TilingFormat == dm_sw_64kb_s_x))
1388 horz_div_l = 0;
1389 if (BytePerPixelC == 8
1390 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t
1391 || TilingFormat == dm_sw_64kb_s_x))
1392 horz_div_c = 0;
1393
1394 if (BytePerPixelC == 0) {
1395 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1396 detile_buf_vp_horz_limit = (double) swath_buf_size
1397 / ((double) RequestHeight256ByteLuma * BytePerPixelY
1398 / (1 + horz_div_l));
1399 detile_buf_vp_vert_limit = (double) swath_buf_size
1400 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1401 } else {
1402 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1403 detile_buf_vp_horz_limit = (double) swath_buf_size
1404 / ((double) RequestHeight256ByteLuma * BytePerPixelY
1405 / (1 + horz_div_l)
1406 + (double) RequestHeight256ByteChroma
1407 * BytePerPixelC / (1 + horz_div_c)
1408 / (1 + yuv420));
1409 detile_buf_vp_vert_limit = (double) swath_buf_size
1410 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)
1411 + 256.0 / RequestHeight256ByteChroma
1412 / (1 + vert_div_c) / (1 + yuv420));
1413 }
1414
1415 if (SourcePixelFormat == dm_420_10) {
1416 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1417 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1418 }
1419
1420 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1421 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1422
1423 MAS_vp_horz_limit = 5760;
1424 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1425 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1426 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1427 eff_surf_width_l =
1428 (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1429 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1430 eff_surf_height_l = (
1431 SurfaceHeightLuma > max_vp_vert_height ?
1432 max_vp_vert_height : SurfaceHeightLuma);
1433 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1434
1435 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1436 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1437 if (BytePerPixelC > 0) {
1438 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma
1439 * BytePerPixelC;
1440 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1441 } else {
1442 full_swath_bytes_horz_wc_c = 0;
1443 full_swath_bytes_vert_wc_c = 0;
1444 }
1445
1446 if (SourcePixelFormat == dm_420_10) {
1447 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1448 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1449 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1450 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1451 }
1452
1453 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1454 req128_horz_wc_l = 0;
1455 req128_horz_wc_c = 0;
1456 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c
1457 && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c
1458 <= DETBufferSize) {
1459 req128_horz_wc_l = 0;
1460 req128_horz_wc_c = 1;
1461 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c
1462 && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c
1463 <= DETBufferSize) {
1464 req128_horz_wc_l = 1;
1465 req128_horz_wc_c = 0;
1466 } else {
1467 req128_horz_wc_l = 1;
1468 req128_horz_wc_c = 1;
1469 }
1470
1471 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1472 req128_vert_wc_l = 0;
1473 req128_vert_wc_c = 0;
1474 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c
1475 && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c
1476 <= DETBufferSize) {
1477 req128_vert_wc_l = 0;
1478 req128_vert_wc_c = 1;
1479 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c
1480 && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c
1481 <= DETBufferSize) {
1482 req128_vert_wc_l = 1;
1483 req128_vert_wc_c = 0;
1484 } else {
1485 req128_vert_wc_l = 1;
1486 req128_vert_wc_c = 1;
1487 }
1488
1489 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1490 segment_order_horz_contiguous_luma = 0;
1491 } else {
1492 segment_order_horz_contiguous_luma = 1;
1493 }
1494 if ((BytePerPixelY == 8
1495 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x
1496 || TilingFormat == dm_sw_64kb_d_t
1497 || TilingFormat == dm_sw_64kb_r_x))
1498 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1499 segment_order_vert_contiguous_luma = 0;
1500 } else {
1501 segment_order_vert_contiguous_luma = 1;
1502 }
1503 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1504 segment_order_horz_contiguous_chroma = 0;
1505 } else {
1506 segment_order_horz_contiguous_chroma = 1;
1507 }
1508 if ((BytePerPixelC == 8
1509 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x
1510 || TilingFormat == dm_sw_64kb_d_t
1511 || TilingFormat == dm_sw_64kb_r_x))
1512 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1513 segment_order_vert_contiguous_chroma = 0;
1514 } else {
1515 segment_order_vert_contiguous_chroma = 1;
1516 }
1517
1518 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1519 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1520 RequestLuma = REQ_256Bytes;
1521 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0)
1522 || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1523 RequestLuma = REQ_128BytesNonContiguous;
1524 } else {
1525 RequestLuma = REQ_128BytesContiguous;
1526 }
1527 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1528 RequestChroma = REQ_256Bytes;
1529 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0)
1530 || (req128_vert_wc_c == 1
1531 && segment_order_vert_contiguous_chroma == 0)) {
1532 RequestChroma = REQ_128BytesNonContiguous;
1533 } else {
1534 RequestChroma = REQ_128BytesContiguous;
1535 }
1536 } else if (ScanOrientation != dm_vert) {
1537 if (req128_horz_wc_l == 0) {
1538 RequestLuma = REQ_256Bytes;
1539 } else if (segment_order_horz_contiguous_luma == 0) {
1540 RequestLuma = REQ_128BytesNonContiguous;
1541 } else {
1542 RequestLuma = REQ_128BytesContiguous;
1543 }
1544 if (req128_horz_wc_c == 0) {
1545 RequestChroma = REQ_256Bytes;
1546 } else if (segment_order_horz_contiguous_chroma == 0) {
1547 RequestChroma = REQ_128BytesNonContiguous;
1548 } else {
1549 RequestChroma = REQ_128BytesContiguous;
1550 }
1551 } else {
1552 if (req128_vert_wc_l == 0) {
1553 RequestLuma = REQ_256Bytes;
1554 } else if (segment_order_vert_contiguous_luma == 0) {
1555 RequestLuma = REQ_128BytesNonContiguous;
1556 } else {
1557 RequestLuma = REQ_128BytesContiguous;
1558 }
1559 if (req128_vert_wc_c == 0) {
1560 RequestChroma = REQ_256Bytes;
1561 } else if (segment_order_vert_contiguous_chroma == 0) {
1562 RequestChroma = REQ_128BytesNonContiguous;
1563 } else {
1564 RequestChroma = REQ_128BytesContiguous;
1565 }
1566 }
1567
1568 if (RequestLuma == REQ_256Bytes) {
1569 *MaxUncompressedBlockLuma = 256;
1570 *MaxCompressedBlockLuma = 256;
1571 *IndependentBlockLuma = 0;
1572 } else if (RequestLuma == REQ_128BytesContiguous) {
1573 *MaxUncompressedBlockLuma = 256;
1574 *MaxCompressedBlockLuma = 128;
1575 *IndependentBlockLuma = 128;
1576 } else {
1577 *MaxUncompressedBlockLuma = 256;
1578 *MaxCompressedBlockLuma = 64;
1579 *IndependentBlockLuma = 64;
1580 }
1581
1582 if (RequestChroma == REQ_256Bytes) {
1583 *MaxUncompressedBlockChroma = 256;
1584 *MaxCompressedBlockChroma = 256;
1585 *IndependentBlockChroma = 0;
1586 } else if (RequestChroma == REQ_128BytesContiguous) {
1587 *MaxUncompressedBlockChroma = 256;
1588 *MaxCompressedBlockChroma = 128;
1589 *IndependentBlockChroma = 128;
1590 } else {
1591 *MaxUncompressedBlockChroma = 256;
1592 *MaxCompressedBlockChroma = 64;
1593 *IndependentBlockChroma = 64;
1594 }
1595
1596 if (DCCEnabled != true || BytePerPixelC == 0) {
1597 *MaxUncompressedBlockChroma = 0;
1598 *MaxCompressedBlockChroma = 0;
1599 *IndependentBlockChroma = 0;
1600 }
1601
1602 if (DCCEnabled != true) {
1603 *MaxUncompressedBlockLuma = 0;
1604 *MaxCompressedBlockLuma = 0;
1605 *IndependentBlockLuma = 0;
1606 }
1607 }
1608
1609
CalculatePrefetchSourceLines(struct display_mode_lib * mode_lib,double VRatio,double vtaps,bool Interlace,bool ProgressiveToInterlaceUnitInOPP,unsigned int SwathHeight,unsigned int ViewportYStart,double * VInitPreFill,unsigned int * MaxNumSwath)1610 static double CalculatePrefetchSourceLines(
1611 struct display_mode_lib *mode_lib,
1612 double VRatio,
1613 double vtaps,
1614 bool Interlace,
1615 bool ProgressiveToInterlaceUnitInOPP,
1616 unsigned int SwathHeight,
1617 unsigned int ViewportYStart,
1618 double *VInitPreFill,
1619 unsigned int *MaxNumSwath)
1620 {
1621 unsigned int MaxPartialSwath = 0;
1622
1623 if (ProgressiveToInterlaceUnitInOPP)
1624 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1625 else
1626 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1627
1628 if (!mode_lib->vba.IgnoreViewportPositioning) {
1629
1630 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1631
1632 if (*VInitPreFill > 1.0)
1633 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1634 else
1635 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2)
1636 % SwathHeight;
1637 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1638
1639 } else {
1640
1641 if (ViewportYStart != 0)
1642 dml_print(
1643 "WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1644
1645 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1646
1647 if (*VInitPreFill > 1.0)
1648 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1649 else
1650 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1)
1651 % SwathHeight;
1652 }
1653
1654 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1655 }
1656
CalculateVMAndRowBytes(struct display_mode_lib * mode_lib,bool DCCEnable,unsigned int BlockHeight256Bytes,unsigned int BlockWidth256Bytes,enum source_format_class SourcePixelFormat,unsigned int SurfaceTiling,unsigned int BytePerPixel,enum scan_direction_class ScanDirection,unsigned int SwathWidth,unsigned int ViewportHeight,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,unsigned int GPUVMMinPageSize,unsigned int HostVMMinPageSize,unsigned int PTEBufferSizeInRequests,unsigned int Pitch,unsigned int DCCMetaPitch,unsigned int * MacroTileWidth,unsigned int * MetaRowByte,unsigned int * PixelPTEBytesPerRow,bool * PTEBufferSizeNotExceeded,unsigned int * dpte_row_width_ub,unsigned int * dpte_row_height,unsigned int * MetaRequestWidth,unsigned int * MetaRequestHeight,unsigned int * meta_row_width,unsigned int * meta_row_height,unsigned int * vm_group_bytes,unsigned int * dpte_group_bytes,unsigned int * PixelPTEReqWidth,unsigned int * PixelPTEReqHeight,unsigned int * PTERequestSize,unsigned int * DPDE0BytesFrame,unsigned int * MetaPTEBytesFrame)1657 static unsigned int CalculateVMAndRowBytes(
1658 struct display_mode_lib *mode_lib,
1659 bool DCCEnable,
1660 unsigned int BlockHeight256Bytes,
1661 unsigned int BlockWidth256Bytes,
1662 enum source_format_class SourcePixelFormat,
1663 unsigned int SurfaceTiling,
1664 unsigned int BytePerPixel,
1665 enum scan_direction_class ScanDirection,
1666 unsigned int SwathWidth,
1667 unsigned int ViewportHeight,
1668 bool GPUVMEnable,
1669 bool HostVMEnable,
1670 unsigned int HostVMMaxNonCachedPageTableLevels,
1671 unsigned int GPUVMMinPageSize,
1672 unsigned int HostVMMinPageSize,
1673 unsigned int PTEBufferSizeInRequests,
1674 unsigned int Pitch,
1675 unsigned int DCCMetaPitch,
1676 unsigned int *MacroTileWidth,
1677 unsigned int *MetaRowByte,
1678 unsigned int *PixelPTEBytesPerRow,
1679 bool *PTEBufferSizeNotExceeded,
1680 unsigned int *dpte_row_width_ub,
1681 unsigned int *dpte_row_height,
1682 unsigned int *MetaRequestWidth,
1683 unsigned int *MetaRequestHeight,
1684 unsigned int *meta_row_width,
1685 unsigned int *meta_row_height,
1686 unsigned int *vm_group_bytes,
1687 unsigned int *dpte_group_bytes,
1688 unsigned int *PixelPTEReqWidth,
1689 unsigned int *PixelPTEReqHeight,
1690 unsigned int *PTERequestSize,
1691 unsigned int *DPDE0BytesFrame,
1692 unsigned int *MetaPTEBytesFrame)
1693 {
1694 unsigned int MPDEBytesFrame = 0;
1695 unsigned int DCCMetaSurfaceBytes = 0;
1696 unsigned int MacroTileSizeBytes = 0;
1697 unsigned int MacroTileHeight = 0;
1698 unsigned int ExtraDPDEBytesFrame = 0;
1699 unsigned int PDEAndMetaPTEBytesFrame = 0;
1700 unsigned int PixelPTEReqHeightPTEs = 0;
1701 unsigned int HostVMDynamicLevels = 0;
1702
1703 double FractionOfPTEReturnDrop;
1704
1705 if (GPUVMEnable == true && HostVMEnable == true) {
1706 if (HostVMMinPageSize < 2048) {
1707 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1708 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1709 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1710 } else {
1711 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1712 }
1713 }
1714
1715 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1716 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1717 if (ScanDirection != dm_vert) {
1718 *meta_row_height = *MetaRequestHeight;
1719 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth)
1720 + *MetaRequestWidth;
1721 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1722 } else {
1723 *meta_row_height = *MetaRequestWidth;
1724 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight)
1725 + *MetaRequestHeight;
1726 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1727 }
1728 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes)
1729 + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1730 if (GPUVMEnable == true) {
1731 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1732 MPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 1);
1733 } else {
1734 *MetaPTEBytesFrame = 0;
1735 MPDEBytesFrame = 0;
1736 }
1737
1738 if (DCCEnable != true) {
1739 *MetaPTEBytesFrame = 0;
1740 MPDEBytesFrame = 0;
1741 *MetaRowByte = 0;
1742 }
1743
1744 if (SurfaceTiling == dm_sw_linear) {
1745 MacroTileSizeBytes = 256;
1746 MacroTileHeight = BlockHeight256Bytes;
1747 } else {
1748 MacroTileSizeBytes = 65536;
1749 MacroTileHeight = 16 * BlockHeight256Bytes;
1750 }
1751 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1752
1753 if (GPUVMEnable == true && mode_lib->vba.GPUVMMaxPageTableLevels > 1) {
1754 if (ScanDirection != dm_vert) {
1755 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1756 } else {
1757 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1758 }
1759 ExtraDPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 2);
1760 } else {
1761 *DPDE0BytesFrame = 0;
1762 ExtraDPDEBytesFrame = 0;
1763 }
1764
1765 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame
1766 + ExtraDPDEBytesFrame;
1767
1768 if (HostVMEnable == true) {
1769 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1770 }
1771
1772 if (SurfaceTiling == dm_sw_linear) {
1773 PixelPTEReqHeightPTEs = 1;
1774 *PixelPTEReqHeight = 1;
1775 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1776 *PTERequestSize = 64;
1777 FractionOfPTEReturnDrop = 0;
1778 } else if (MacroTileSizeBytes == 4096) {
1779 PixelPTEReqHeightPTEs = 1;
1780 *PixelPTEReqHeight = MacroTileHeight;
1781 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1782 *PTERequestSize = 64;
1783 if (ScanDirection != dm_vert)
1784 FractionOfPTEReturnDrop = 0;
1785 else
1786 FractionOfPTEReturnDrop = 7.0 / 8;
1787 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1788 PixelPTEReqHeightPTEs = 16;
1789 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1790 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1791 *PTERequestSize = 128;
1792 FractionOfPTEReturnDrop = 0;
1793 } else {
1794 PixelPTEReqHeightPTEs = 1;
1795 *PixelPTEReqHeight = MacroTileHeight;
1796 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1797 *PTERequestSize = 64;
1798 FractionOfPTEReturnDrop = 0;
1799 }
1800
1801 if (SurfaceTiling == dm_sw_linear) {
1802 if (PTEBufferSizeInRequests == 0)
1803 *dpte_row_height = 1;
1804 else
1805 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
1806 *dpte_row_width_ub = (dml_ceil(((double) SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1807 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1808 } else if (ScanDirection != dm_vert) {
1809 *dpte_row_height = *PixelPTEReqHeight;
1810 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1811 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1812 } else {
1813 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1814 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1815 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1816 }
1817 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop)
1818 <= 64 * PTEBufferSizeInRequests) {
1819 *PTEBufferSizeNotExceeded = true;
1820 } else {
1821 *PTEBufferSizeNotExceeded = false;
1822 }
1823
1824 if (GPUVMEnable != true) {
1825 *PixelPTEBytesPerRow = 0;
1826 *PTEBufferSizeNotExceeded = true;
1827 }
1828 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
1829
1830 if (HostVMEnable == true) {
1831 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
1832 }
1833
1834 if (HostVMEnable == true) {
1835 *vm_group_bytes = 512;
1836 *dpte_group_bytes = 512;
1837 } else if (GPUVMEnable == true) {
1838 *vm_group_bytes = 2048;
1839 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
1840 *dpte_group_bytes = 512;
1841 } else {
1842 *dpte_group_bytes = 2048;
1843 }
1844 } else {
1845 *vm_group_bytes = 0;
1846 *dpte_group_bytes = 0;
1847 }
1848
1849 return PDEAndMetaPTEBytesFrame;
1850 }
1851
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib * mode_lib)1852 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
1853 struct display_mode_lib *mode_lib)
1854 {
1855 struct vba_vars_st *v = &mode_lib->vba;
1856 unsigned int j, k;
1857 long ReorderBytes = 0;
1858 unsigned int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
1859 double MaxTotalRDBandwidth = 0;
1860 double MaxTotalRDBandwidthNoUrgentBurst = 0;
1861 bool DestinationLineTimesForPrefetchLessThan2 = false;
1862 bool VRatioPrefetchMoreThan4 = false;
1863 double TWait;
1864
1865 v->WritebackDISPCLK = 0.0;
1866 v->DISPCLKWithRamping = 0;
1867 v->DISPCLKWithoutRamping = 0;
1868 v->GlobalDPPCLK = 0.0;
1869 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */
1870 v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] = dml_min3(
1871 v->ReturnBusWidth * v->DCFCLK,
1872 v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth,
1873 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
1874 if (v->HostVMEnable != true) {
1875 v->ReturnBW = v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100;
1876 } else {
1877 v->ReturnBW = v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100;
1878 }
1879 /* End DAL custom code */
1880
1881 // DISPCLK and DPPCLK Calculation
1882 //
1883 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1884 if (v->WritebackEnable[k]) {
1885 v->WritebackDISPCLK = dml_max(v->WritebackDISPCLK,
1886 dml30_CalculateWriteBackDISPCLK(
1887 v->WritebackPixelFormat[k],
1888 v->PixelClock[k],
1889 v->WritebackHRatio[k],
1890 v->WritebackVRatio[k],
1891 v->WritebackHTaps[k],
1892 v->WritebackVTaps[k],
1893 v->WritebackSourceWidth[k],
1894 v->WritebackDestinationWidth[k],
1895 v->HTotal[k],
1896 v->WritebackLineBufferSize));
1897 }
1898 }
1899
1900 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1901 if (v->HRatio[k] > 1) {
1902 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
1903 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
1904 } else {
1905 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
1906 v->MaxDCHUBToPSCLThroughput,
1907 v->MaxPSCLToLBThroughput);
1908 }
1909
1910 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
1911 * dml_max(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
1912 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
1913
1914 if ((v->htaps[k] > 6 || v->vtaps[k] > 6)
1915 && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
1916 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
1917 }
1918
1919 if ((v->SourcePixelFormat[k] != dm_420_8
1920 && v->SourcePixelFormat[k] != dm_420_10
1921 && v->SourcePixelFormat[k] != dm_420_12
1922 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
1923 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
1924 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
1925 } else {
1926 if (v->HRatioChroma[k] > 1) {
1927 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
1928 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
1929 } else {
1930 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
1931 v->MaxDCHUBToPSCLThroughput,
1932 v->MaxPSCLToLBThroughput);
1933 }
1934 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
1935 * dml_max3(v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
1936 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k], 1.0);
1937
1938 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6)
1939 && v->DPPCLKUsingSingleDPPChroma
1940 < 2 * v->PixelClock[k]) {
1941 v->DPPCLKUsingSingleDPPChroma = 2
1942 * v->PixelClock[k];
1943 }
1944
1945 v->DPPCLKUsingSingleDPP[k] = dml_max(
1946 v->DPPCLKUsingSingleDPPLuma,
1947 v->DPPCLKUsingSingleDPPChroma);
1948 }
1949 }
1950
1951 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1952 if (v->BlendingAndTiming[k] != k)
1953 continue;
1954 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
1955 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
1956 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1957 * (1 + v->DISPCLKRampingMargin / 100));
1958 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
1959 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
1960 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
1961 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
1962 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1963 * (1 + v->DISPCLKRampingMargin / 100));
1964 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
1965 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
1966 } else {
1967 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
1968 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1969 * (1 + v->DISPCLKRampingMargin / 100));
1970 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
1971 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
1972 }
1973 }
1974
1975 v->DISPCLKWithRamping = dml_max(
1976 v->DISPCLKWithRamping,
1977 v->WritebackDISPCLK);
1978 v->DISPCLKWithoutRamping = dml_max(
1979 v->DISPCLKWithoutRamping,
1980 v->WritebackDISPCLK);
1981
1982 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
1983 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
1984 v->DISPCLKWithRamping,
1985 v->DISPCLKDPPCLKVCOSpeed);
1986 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
1987 v->DISPCLKWithoutRamping,
1988 v->DISPCLKDPPCLKVCOSpeed);
1989 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
1990 v->soc.clock_limits[mode_lib->soc.num_states - 1].dispclk_mhz,
1991 v->DISPCLKDPPCLKVCOSpeed);
1992 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity
1993 > v->MaxDispclkRoundedToDFSGranularity) {
1994 v->DISPCLK_calculated =
1995 v->DISPCLKWithoutRampingRoundedToDFSGranularity;
1996 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity
1997 > v->MaxDispclkRoundedToDFSGranularity) {
1998 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
1999 } else {
2000 v->DISPCLK_calculated =
2001 v->DISPCLKWithRampingRoundedToDFSGranularity;
2002 }
2003 v->DISPCLK = v->DISPCLK_calculated;
2004 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2005
2006 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2007 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k]
2008 / v->DPPPerPlane[k]
2009 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2010 v->GlobalDPPCLK = dml_max(
2011 v->GlobalDPPCLK,
2012 v->DPPCLK_calculated[k]);
2013 }
2014 v->GlobalDPPCLK = RoundToDFSGranularityUp(
2015 v->GlobalDPPCLK,
2016 v->DISPCLKDPPCLKVCOSpeed);
2017 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2018 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255
2019 * dml_ceil(
2020 v->DPPCLK_calculated[k] * 255.0
2021 / v->GlobalDPPCLK,
2022 1);
2023 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2024 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2025 }
2026
2027 // Urgent and B P-State/DRAM Clock Change Watermark
2028 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2029 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2030
2031 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2032 dml30_CalculateBytePerPixelAnd256BBlockSizes(
2033 v->SourcePixelFormat[k],
2034 v->SurfaceTiling[k],
2035 &v->BytePerPixelY[k],
2036 &v->BytePerPixelC[k],
2037 &v->BytePerPixelDETY[k],
2038 &v->BytePerPixelDETC[k],
2039 &v->BlockHeight256BytesY[k],
2040 &v->BlockHeight256BytesC[k],
2041 &v->BlockWidth256BytesY[k],
2042 &v->BlockWidth256BytesC[k]);
2043 }
2044
2045 CalculateSwathWidth(
2046 false,
2047 v->NumberOfActivePlanes,
2048 v->SourcePixelFormat,
2049 v->SourceScan,
2050 v->ViewportWidth,
2051 v->ViewportHeight,
2052 v->SurfaceWidthY,
2053 v->SurfaceWidthC,
2054 v->SurfaceHeightY,
2055 v->SurfaceHeightC,
2056 v->ODMCombineEnabled,
2057 v->BytePerPixelY,
2058 v->BytePerPixelC,
2059 v->BlockHeight256BytesY,
2060 v->BlockHeight256BytesC,
2061 v->BlockWidth256BytesY,
2062 v->BlockWidth256BytesC,
2063 v->BlendingAndTiming,
2064 v->HActive,
2065 v->HRatio,
2066 v->DPPPerPlane,
2067 v->SwathWidthSingleDPPY,
2068 v->SwathWidthSingleDPPC,
2069 v->SwathWidthY,
2070 v->SwathWidthC,
2071 v->dummyinteger3,
2072 v->dummyinteger4,
2073 v->swath_width_luma_ub,
2074 v->swath_width_chroma_ub);
2075
2076
2077 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2078 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2079 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioChroma[k];
2080 DTRACE("read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2081 }
2082
2083
2084 // DCFCLK Deep Sleep
2085 CalculateDCFCLKDeepSleep(
2086 mode_lib,
2087 v->NumberOfActivePlanes,
2088 v->BytePerPixelY,
2089 v->BytePerPixelC,
2090 v->VRatio,
2091 v->VRatioChroma,
2092 v->SwathWidthY,
2093 v->SwathWidthC,
2094 v->DPPPerPlane,
2095 v->HRatio,
2096 v->HRatioChroma,
2097 v->PixelClock,
2098 v->PSCL_THROUGHPUT_LUMA,
2099 v->PSCL_THROUGHPUT_CHROMA,
2100 v->DPPCLK,
2101 v->ReadBandwidthPlaneLuma,
2102 v->ReadBandwidthPlaneChroma,
2103 v->ReturnBusWidth,
2104 &v->DCFCLKDeepSleep);
2105
2106 // DSCCLK
2107 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2108 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2109 v->DSCCLK_calculated[k] = 0.0;
2110 } else {
2111 if (v->OutputFormat[k] == dm_420)
2112 v->DSCFormatFactor = 2;
2113 else if (v->OutputFormat[k] == dm_444)
2114 v->DSCFormatFactor = 1;
2115 else if (v->OutputFormat[k] == dm_n422)
2116 v->DSCFormatFactor = 2;
2117 else
2118 v->DSCFormatFactor = 1;
2119 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2120 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12
2121 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2122 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2123 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6
2124 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2125 else
2126 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3
2127 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2128 }
2129 }
2130
2131 // DSC Delay
2132 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2133 double BPP = v->OutputBppPerState[k][v->VoltageLevel];
2134
2135 if (v->DSCEnabled[k] && BPP != 0) {
2136 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2137 v->DSCDelay[k] = dscceComputeDelay(v->DSCInputBitPerComponent[k],
2138 BPP,
2139 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2140 v->NumberOfDSCSlices[k],
2141 v->OutputFormat[k],
2142 v->Output[k])
2143 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2144 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2145 v->DSCDelay[k] = 2 * dscceComputeDelay(v->DSCInputBitPerComponent[k],
2146 BPP,
2147 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2148 v->NumberOfDSCSlices[k] / 2.0,
2149 v->OutputFormat[k],
2150 v->Output[k])
2151 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2152 } else {
2153 v->DSCDelay[k] = 4 * dscceComputeDelay(v->DSCInputBitPerComponent[k],
2154 BPP,
2155 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2156 v->NumberOfDSCSlices[k] / 4.0,
2157 v->OutputFormat[k],
2158 v->Output[k])
2159 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2160 }
2161 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2162 } else {
2163 v->DSCDelay[k] = 0;
2164 }
2165 }
2166
2167 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2168 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2169 if (j != k && v->BlendingAndTiming[k] == j
2170 && v->DSCEnabled[j])
2171 v->DSCDelay[k] = v->DSCDelay[j];
2172
2173 // Prefetch
2174 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2175 unsigned int PDEAndMetaPTEBytesFrameY = 0;
2176 unsigned int PixelPTEBytesPerRowY = 0;
2177 unsigned int MetaRowByteY = 0;
2178 unsigned int MetaRowByteC = 0;
2179 unsigned int PDEAndMetaPTEBytesFrameC = 0;
2180 unsigned int PixelPTEBytesPerRowC = 0;
2181 bool PTEBufferSizeNotExceededY = 0;
2182 bool PTEBufferSizeNotExceededC = 0;
2183
2184
2185 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2186 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2187 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2188 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2189 } else {
2190 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2191 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2192
2193 }
2194 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2195 mode_lib,
2196 v->DCCEnable[k],
2197 v->BlockHeight256BytesC[k],
2198 v->BlockWidth256BytesC[k],
2199 v->SourcePixelFormat[k],
2200 v->SurfaceTiling[k],
2201 v->BytePerPixelC[k],
2202 v->SourceScan[k],
2203 v->SwathWidthC[k],
2204 v->ViewportHeightChroma[k],
2205 v->GPUVMEnable,
2206 v->HostVMEnable,
2207 v->HostVMMaxNonCachedPageTableLevels,
2208 v->GPUVMMinPageSize,
2209 v->HostVMMinPageSize,
2210 v->PTEBufferSizeInRequestsForChroma,
2211 v->PitchC[k],
2212 v->DCCMetaPitchC[k],
2213 &v->MacroTileWidthC[k],
2214 &MetaRowByteC,
2215 &PixelPTEBytesPerRowC,
2216 &PTEBufferSizeNotExceededC,
2217 &v->dpte_row_width_chroma_ub[k],
2218 &v->dpte_row_height_chroma[k],
2219 &v->meta_req_width_chroma[k],
2220 &v->meta_req_height_chroma[k],
2221 &v->meta_row_width_chroma[k],
2222 &v->meta_row_height_chroma[k],
2223 &v->dummyinteger1,
2224 &v->dummyinteger2,
2225 &v->PixelPTEReqWidthC[k],
2226 &v->PixelPTEReqHeightC[k],
2227 &v->PTERequestSizeC[k],
2228 &v->dpde0_bytes_per_frame_ub_c[k],
2229 &v->meta_pte_bytes_per_frame_ub_c[k]);
2230
2231 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2232 mode_lib,
2233 v->VRatioChroma[k],
2234 v->VTAPsChroma[k],
2235 v->Interlace[k],
2236 v->ProgressiveToInterlaceUnitInOPP,
2237 v->SwathHeightC[k],
2238 v->ViewportYStartC[k],
2239 &v->VInitPreFillC[k],
2240 &v->MaxNumSwathC[k]);
2241 } else {
2242 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2243 v->PTEBufferSizeInRequestsForChroma = 0;
2244 PixelPTEBytesPerRowC = 0;
2245 PDEAndMetaPTEBytesFrameC = 0;
2246 MetaRowByteC = 0;
2247 v->MaxNumSwathC[k] = 0;
2248 v->PrefetchSourceLinesC[k] = 0;
2249 }
2250
2251 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2252 mode_lib,
2253 v->DCCEnable[k],
2254 v->BlockHeight256BytesY[k],
2255 v->BlockWidth256BytesY[k],
2256 v->SourcePixelFormat[k],
2257 v->SurfaceTiling[k],
2258 v->BytePerPixelY[k],
2259 v->SourceScan[k],
2260 v->SwathWidthY[k],
2261 v->ViewportHeight[k],
2262 v->GPUVMEnable,
2263 v->HostVMEnable,
2264 v->HostVMMaxNonCachedPageTableLevels,
2265 v->GPUVMMinPageSize,
2266 v->HostVMMinPageSize,
2267 v->PTEBufferSizeInRequestsForLuma,
2268 v->PitchY[k],
2269 v->DCCMetaPitchY[k],
2270 &v->MacroTileWidthY[k],
2271 &MetaRowByteY,
2272 &PixelPTEBytesPerRowY,
2273 &PTEBufferSizeNotExceededY,
2274 &v->dpte_row_width_luma_ub[k],
2275 &v->dpte_row_height[k],
2276 &v->meta_req_width[k],
2277 &v->meta_req_height[k],
2278 &v->meta_row_width[k],
2279 &v->meta_row_height[k],
2280 &v->vm_group_bytes[k],
2281 &v->dpte_group_bytes[k],
2282 &v->PixelPTEReqWidthY[k],
2283 &v->PixelPTEReqHeightY[k],
2284 &v->PTERequestSizeY[k],
2285 &v->dpde0_bytes_per_frame_ub_l[k],
2286 &v->meta_pte_bytes_per_frame_ub_l[k]);
2287
2288 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2289 mode_lib,
2290 v->VRatio[k],
2291 v->vtaps[k],
2292 v->Interlace[k],
2293 v->ProgressiveToInterlaceUnitInOPP,
2294 v->SwathHeightY[k],
2295 v->ViewportYStartY[k],
2296 &v->VInitPreFillY[k],
2297 &v->MaxNumSwathY[k]);
2298 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2299 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY
2300 + PDEAndMetaPTEBytesFrameC;
2301 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2302
2303 CalculateRowBandwidth(
2304 v->GPUVMEnable,
2305 v->SourcePixelFormat[k],
2306 v->VRatio[k],
2307 v->VRatioChroma[k],
2308 v->DCCEnable[k],
2309 v->HTotal[k] / v->PixelClock[k],
2310 MetaRowByteY,
2311 MetaRowByteC,
2312 v->meta_row_height[k],
2313 v->meta_row_height_chroma[k],
2314 PixelPTEBytesPerRowY,
2315 PixelPTEBytesPerRowC,
2316 v->dpte_row_height[k],
2317 v->dpte_row_height_chroma[k],
2318 &v->meta_row_bw[k],
2319 &v->dpte_row_bw[k]);
2320 }
2321
2322 v->TotalDCCActiveDPP = 0;
2323 v->TotalActiveDPP = 0;
2324 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2325 v->TotalActiveDPP = v->TotalActiveDPP
2326 + v->DPPPerPlane[k];
2327 if (v->DCCEnable[k])
2328 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP
2329 + v->DPPPerPlane[k];
2330 }
2331
2332
2333 ReorderBytes = v->NumberOfChannels * dml_max3(
2334 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2335 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2336 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2337
2338 v->UrgentExtraLatency = CalculateExtraLatency(
2339 v->RoundTripPingLatencyCycles,
2340 ReorderBytes,
2341 v->DCFCLK,
2342 v->TotalActiveDPP,
2343 v->PixelChunkSizeInKByte,
2344 v->TotalDCCActiveDPP,
2345 v->MetaChunkSize,
2346 v->ReturnBW,
2347 v->GPUVMEnable,
2348 v->HostVMEnable,
2349 v->NumberOfActivePlanes,
2350 v->DPPPerPlane,
2351 v->dpte_group_bytes,
2352 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2353 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2354 v->HostVMMinPageSize,
2355 v->HostVMMaxNonCachedPageTableLevels);
2356
2357 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2358
2359 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2360 if (v->BlendingAndTiming[k] == k) {
2361 if (v->WritebackEnable[k] == true) {
2362 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency +
2363 CalculateWriteBackDelay(v->WritebackPixelFormat[k],
2364 v->WritebackHRatio[k],
2365 v->WritebackVRatio[k],
2366 v->WritebackVTaps[k],
2367 v->WritebackDestinationWidth[k],
2368 v->WritebackDestinationHeight[k],
2369 v->WritebackSourceHeight[k],
2370 v->HTotal[k]) / v->DISPCLK;
2371 } else
2372 v->WritebackDelay[v->VoltageLevel][k] = 0;
2373 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2374 if (v->BlendingAndTiming[j] == k
2375 && v->WritebackEnable[j] == true) {
2376 v->WritebackDelay[v->VoltageLevel][k] = dml_max(v->WritebackDelay[v->VoltageLevel][k],
2377 v->WritebackLatency + CalculateWriteBackDelay(
2378 v->WritebackPixelFormat[j],
2379 v->WritebackHRatio[j],
2380 v->WritebackVRatio[j],
2381 v->WritebackVTaps[j],
2382 v->WritebackDestinationWidth[j],
2383 v->WritebackDestinationHeight[j],
2384 v->WritebackSourceHeight[j],
2385 v->HTotal[k]) / v->DISPCLK);
2386 }
2387 }
2388 }
2389 }
2390
2391 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2392 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2393 if (v->BlendingAndTiming[k] == j)
2394 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2395
2396 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2397 v->MaxVStartupLines[k] = v->VTotal[k] - v->VActive[k] - dml_max(1.0, dml_ceil((double) v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1));
2398 }
2399
2400 v->MaximumMaxVStartupLines = 0;
2401 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2402 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2403
2404 if (v->DRAMClockChangeLatencyOverride > 0.0) {
2405 v->FinalDRAMClockChangeLatency = v->DRAMClockChangeLatencyOverride;
2406 } else {
2407 v->FinalDRAMClockChangeLatency = v->DRAMClockChangeLatency;
2408 }
2409 v->UrgentLatency = CalculateUrgentLatency(v->UrgentLatencyPixelDataOnly, v->UrgentLatencyPixelMixedWithVMData, v->UrgentLatencyVMDataOnly, v->DoUrgentLatencyAdjustment, v->UrgentLatencyAdjustmentFabricClockComponent, v->UrgentLatencyAdjustmentFabricClockReference, v->FabricClock);
2410
2411
2412 v->FractionOfUrgentBandwidth = 0.0;
2413 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2414
2415 v->VStartupLines = 13;
2416
2417 do {
2418 MaxTotalRDBandwidth = 0;
2419 MaxTotalRDBandwidthNoUrgentBurst = 0;
2420 DestinationLineTimesForPrefetchLessThan2 = false;
2421 VRatioPrefetchMoreThan4 = false;
2422 TWait = CalculateTWait(
2423 PrefetchMode,
2424 v->FinalDRAMClockChangeLatency,
2425 v->UrgentLatency,
2426 v->SREnterPlusExitTime);
2427
2428 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2429 Pipe myPipe = { 0 };
2430
2431 myPipe.DPPCLK = v->DPPCLK[k];
2432 myPipe.DISPCLK = v->DISPCLK;
2433 myPipe.PixelClock = v->PixelClock[k];
2434 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2435 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2436 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2437 myPipe.SourceScan = v->SourceScan[k];
2438 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2439 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2440 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2441 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2442 myPipe.InterlaceEnable = v->Interlace[k];
2443 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2444 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2445 myPipe.HTotal = v->HTotal[k];
2446 myPipe.DCCEnable = v->DCCEnable[k];
2447 myPipe.ODMCombineEnabled = !!v->ODMCombineEnabled[k];
2448
2449 v->ErrorResult[k] = CalculatePrefetchSchedule(
2450 mode_lib,
2451 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2452 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2453 &myPipe,
2454 v->DSCDelay[k],
2455 v->DPPCLKDelaySubtotal
2456 + v->DPPCLKDelayCNVCFormater,
2457 v->DPPCLKDelaySCL,
2458 v->DPPCLKDelaySCLLBOnly,
2459 v->DPPCLKDelayCNVCCursor,
2460 v->DISPCLKDelaySubtotal,
2461 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2462 v->OutputFormat[k],
2463 v->MaxInterDCNTileRepeaters,
2464 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2465 v->MaxVStartupLines[k],
2466 v->GPUVMMaxPageTableLevels,
2467 v->GPUVMEnable,
2468 v->HostVMEnable,
2469 v->HostVMMaxNonCachedPageTableLevels,
2470 v->HostVMMinPageSize,
2471 v->DynamicMetadataEnable[k],
2472 v->DynamicMetadataVMEnabled,
2473 v->DynamicMetadataLinesBeforeActiveRequired[k],
2474 v->DynamicMetadataTransmittedBytes[k],
2475 v->UrgentLatency,
2476 v->UrgentExtraLatency,
2477 v->TCalc,
2478 v->PDEAndMetaPTEBytesFrame[k],
2479 v->MetaRowByte[k],
2480 v->PixelPTEBytesPerRow[k],
2481 v->PrefetchSourceLinesY[k],
2482 v->SwathWidthY[k],
2483 v->BytePerPixelY[k],
2484 v->VInitPreFillY[k],
2485 v->MaxNumSwathY[k],
2486 v->PrefetchSourceLinesC[k],
2487 v->SwathWidthC[k],
2488 v->BytePerPixelC[k],
2489 v->VInitPreFillC[k],
2490 v->MaxNumSwathC[k],
2491 v->swath_width_luma_ub[k],
2492 v->swath_width_chroma_ub[k],
2493 v->SwathHeightY[k],
2494 v->SwathHeightC[k],
2495 TWait,
2496 v->ProgressiveToInterlaceUnitInOPP,
2497 &v->DSTXAfterScaler[k],
2498 &v->DSTYAfterScaler[k],
2499 &v->DestinationLinesForPrefetch[k],
2500 &v->PrefetchBandwidth[k],
2501 &v->DestinationLinesToRequestVMInVBlank[k],
2502 &v->DestinationLinesToRequestRowInVBlank[k],
2503 &v->VRatioPrefetchY[k],
2504 &v->VRatioPrefetchC[k],
2505 &v->RequiredPrefetchPixDataBWLuma[k],
2506 &v->RequiredPrefetchPixDataBWChroma[k],
2507 &v->NotEnoughTimeForDynamicMetadata[k],
2508 &v->Tno_bw[k],
2509 &v->prefetch_vmrow_bw[k],
2510 &v->Tdmdl_vm[k],
2511 &v->Tdmdl[k],
2512 &v->VUpdateOffsetPix[k],
2513 &v->VUpdateWidthPix[k],
2514 &v->VReadyOffsetPix[k]);
2515 if (v->BlendingAndTiming[k] == k) {
2516 double TotalRepeaterDelayTime = v->MaxInterDCNTileRepeaters * (2 / v->DPPCLK[k] + 3 / v->DISPCLK);
2517 v->VUpdateWidthPix[k] = (14 / v->DCFCLKDeepSleep + 12 / v->DPPCLK[k] + TotalRepeaterDelayTime) * v->PixelClock[k];
2518 v->VReadyOffsetPix[k] = dml_max(150.0 / v->DPPCLK[k], TotalRepeaterDelayTime + 20 / v->DCFCLKDeepSleep + 10 / v->DPPCLK[k]) * v->PixelClock[k];
2519 v->VUpdateOffsetPix[k] = dml_ceil(v->HTotal[k] / 4.0, 1);
2520 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2521 } else {
2522 int x = v->BlendingAndTiming[k];
2523 double TotalRepeaterDelayTime = v->MaxInterDCNTileRepeaters * (2 / v->DPPCLK[k] + 3 / v->DISPCLK);
2524 v->VUpdateWidthPix[k] = (14 / v->DCFCLKDeepSleep + 12 / v->DPPCLK[k] + TotalRepeaterDelayTime) * v->PixelClock[x];
2525 v->VReadyOffsetPix[k] = dml_max(150.0 / v->DPPCLK[k], TotalRepeaterDelayTime + 20 / v->DCFCLKDeepSleep + 10 / v->DPPCLK[k]) * v->PixelClock[x];
2526 v->VUpdateOffsetPix[k] = dml_ceil(v->HTotal[x] / 4.0, 1);
2527 if (!v->MaxVStartupLines[x])
2528 v->MaxVStartupLines[x] = v->MaxVStartupLines[k];
2529 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[x]);
2530 }
2531 }
2532
2533 v->NotEnoughUrgentLatencyHiding[0][0] = false;
2534 v->NotEnoughUrgentLatencyHidingPre = false;
2535
2536 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2537 v->cursor_bw[k] = v->NumberOfCursors[k]
2538 * v->CursorWidth[k][0] * v->CursorBPP[k][0]
2539 / 8.0
2540 / (v->HTotal[k] / v->PixelClock[k])
2541 * v->VRatio[k];
2542 v->cursor_bw_pre[k] = v->NumberOfCursors[k]
2543 * v->CursorWidth[k][0] * v->CursorBPP[k][0]
2544 / 8.0
2545 / (v->HTotal[k] / v->PixelClock[k])
2546 * v->VRatioPrefetchY[k];
2547
2548 CalculateUrgentBurstFactor(
2549 v->swath_width_luma_ub[k],
2550 v->swath_width_chroma_ub[k],
2551 v->DETBufferSizeInKByte[0],
2552 v->SwathHeightY[k],
2553 v->SwathHeightC[k],
2554 v->HTotal[k] / v->PixelClock[k],
2555 v->UrgentLatency,
2556 v->CursorBufferSize,
2557 v->CursorWidth[k][0],
2558 v->CursorBPP[k][0],
2559 v->VRatio[k],
2560 v->VRatioChroma[k],
2561 v->BytePerPixelDETY[k],
2562 v->BytePerPixelDETC[k],
2563 v->DETBufferSizeY[k],
2564 v->DETBufferSizeC[k],
2565 &v->UrgentBurstFactorCursor[k],
2566 &v->UrgentBurstFactorLuma[k],
2567 &v->UrgentBurstFactorChroma[k],
2568 &v->NoUrgentLatencyHiding[k]);
2569
2570 CalculateUrgentBurstFactor(
2571 v->swath_width_luma_ub[k],
2572 v->swath_width_chroma_ub[k],
2573 v->DETBufferSizeInKByte[0],
2574 v->SwathHeightY[k],
2575 v->SwathHeightC[k],
2576 v->HTotal[k] / v->PixelClock[k],
2577 v->UrgentLatency,
2578 v->CursorBufferSize,
2579 v->CursorWidth[k][0],
2580 v->CursorBPP[k][0],
2581 v->VRatioPrefetchY[k],
2582 v->VRatioPrefetchC[k],
2583 v->BytePerPixelDETY[k],
2584 v->BytePerPixelDETC[k],
2585 v->DETBufferSizeY[k],
2586 v->DETBufferSizeC[k],
2587 &v->UrgentBurstFactorCursorPre[k],
2588 &v->UrgentBurstFactorLumaPre[k],
2589 &v->UrgentBurstFactorChromaPre[k],
2590 &v->NoUrgentLatencyHidingPre[k]);
2591
2592 MaxTotalRDBandwidth = MaxTotalRDBandwidth +
2593 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2594 v->ReadBandwidthPlaneLuma[k] *
2595 v->UrgentBurstFactorLuma[k] +
2596 v->ReadBandwidthPlaneChroma[k] *
2597 v->UrgentBurstFactorChroma[k] +
2598 v->cursor_bw[k] *
2599 v->UrgentBurstFactorCursor[k] +
2600 v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2601 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2602 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) + v->cursor_bw_pre[k] *
2603 v->UrgentBurstFactorCursorPre[k]);
2604
2605 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst +
2606 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2607 v->ReadBandwidthPlaneLuma[k] +
2608 v->ReadBandwidthPlaneChroma[k] +
2609 v->cursor_bw[k] +
2610 v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2611 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2612
2613 if (v->DestinationLinesForPrefetch[k] < 2)
2614 DestinationLineTimesForPrefetchLessThan2 = true;
2615 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2616 VRatioPrefetchMoreThan4 = true;
2617 if (v->NoUrgentLatencyHiding[k] == true)
2618 v->NotEnoughUrgentLatencyHiding[0][0] = true;
2619
2620 if (v->NoUrgentLatencyHidingPre[k] == true)
2621 v->NotEnoughUrgentLatencyHidingPre = true;
2622 }
2623 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2624
2625
2626 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NotEnoughUrgentLatencyHiding[0][0] == 0
2627 && v->NotEnoughUrgentLatencyHidingPre == 0 && !VRatioPrefetchMoreThan4
2628 && !DestinationLineTimesForPrefetchLessThan2)
2629 v->PrefetchModeSupported = true;
2630 else {
2631 v->PrefetchModeSupported = false;
2632 dml_print("DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n");
2633 dml_print("DML: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", MaxTotalRDBandwidth, v->ReturnBW);
2634 dml_print("DML: VRatioPrefetch %s more than 4\n", (VRatioPrefetchMoreThan4) ? "is" : "is not");
2635 dml_print("DML: DestinationLines for Prefetch %s less than 2\n", (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2636 }
2637
2638 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2639 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2640 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2641 v->BandwidthAvailableForImmediateFlip =
2642 v->BandwidthAvailableForImmediateFlip
2643 - dml_max(
2644 v->ReadBandwidthPlaneLuma[k] * v->UrgentBurstFactorLuma[k]
2645 + v->ReadBandwidthPlaneChroma[k] * v->UrgentBurstFactorChroma[k]
2646 + v->cursor_bw[k] * v->UrgentBurstFactorCursor[k],
2647 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2648 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) +
2649 v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
2650 }
2651
2652 v->TotImmediateFlipBytes = 0;
2653 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2654 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2655 }
2656 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2657 CalculateFlipSchedule(
2658 mode_lib,
2659 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2660 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2661 v->UrgentExtraLatency,
2662 v->UrgentLatency,
2663 v->GPUVMMaxPageTableLevels,
2664 v->HostVMEnable,
2665 v->HostVMMaxNonCachedPageTableLevels,
2666 v->GPUVMEnable,
2667 v->HostVMMinPageSize,
2668 v->PDEAndMetaPTEBytesFrame[k],
2669 v->MetaRowByte[k],
2670 v->PixelPTEBytesPerRow[k],
2671 v->BandwidthAvailableForImmediateFlip,
2672 v->TotImmediateFlipBytes,
2673 v->SourcePixelFormat[k],
2674 v->HTotal[k] / v->PixelClock[k],
2675 v->VRatio[k],
2676 v->VRatioChroma[k],
2677 v->Tno_bw[k],
2678 v->DCCEnable[k],
2679 v->dpte_row_height[k],
2680 v->meta_row_height[k],
2681 v->dpte_row_height_chroma[k],
2682 v->meta_row_height_chroma[k],
2683 &v->DestinationLinesToRequestVMInImmediateFlip[k],
2684 &v->DestinationLinesToRequestRowInImmediateFlip[k],
2685 &v->final_flip_bw[k],
2686 &v->ImmediateFlipSupportedForPipe[k]);
2687 }
2688 v->total_dcn_read_bw_with_flip = 0.0;
2689 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2690 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2691 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip + dml_max3(
2692 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2693 v->DPPPerPlane[k] * v->final_flip_bw[k] +
2694 v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k] +
2695 v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k] +
2696 v->cursor_bw[k] * v->UrgentBurstFactorCursor[k],
2697 v->DPPPerPlane[k] * (v->final_flip_bw[k] +
2698 v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2699 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) +
2700 v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
2701 v->total_dcn_read_bw_with_flip_no_urgent_burst =
2702 v->total_dcn_read_bw_with_flip_no_urgent_burst +
2703 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2704 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2705 v->DPPPerPlane[k] * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2706
2707 }
2708 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2709
2710 v->ImmediateFlipSupported = true;
2711 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2712 v->ImmediateFlipSupported = false;
2713 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2714 }
2715 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2716 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2717 v->ImmediateFlipSupported = false;
2718 }
2719 }
2720 } else {
2721 v->ImmediateFlipSupported = false;
2722 }
2723
2724 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2725 if (v->ErrorResult[k] || v->NotEnoughTimeForDynamicMetadata[k]) {
2726 v->PrefetchModeSupported = false;
2727 dml_print("DML: CalculatePrefetchSchedule ***failed***. Prefetch schedule violation. Results are NOT valid\n");
2728 }
2729 }
2730
2731 v->VStartupLines = v->VStartupLines + 1;
2732 v->PrefetchModeSupported = (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport &&
2733 !v->HostVMEnable && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2734 v->ImmediateFlipSupported)) ? true : false;
2735 } while (!v->PrefetchModeSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2736 ASSERT(v->PrefetchModeSupported);
2737
2738 //Watermarks and NB P-State/DRAM Clock Change Support
2739 {
2740 enum clock_change_support DRAMClockChangeSupport = 0; // dummy
2741 CalculateWatermarksAndDRAMSpeedChangeSupport(
2742 mode_lib,
2743 PrefetchMode,
2744 v->NumberOfActivePlanes,
2745 v->MaxLineBufferLines,
2746 v->LineBufferSize,
2747 v->DPPOutputBufferPixels,
2748 v->DETBufferSizeInKByte[0],
2749 v->WritebackInterfaceBufferSize,
2750 v->DCFCLK,
2751 v->ReturnBW,
2752 v->GPUVMEnable,
2753 v->dpte_group_bytes,
2754 v->MetaChunkSize,
2755 v->UrgentLatency,
2756 v->UrgentExtraLatency,
2757 v->WritebackLatency,
2758 v->WritebackChunkSize,
2759 v->SOCCLK,
2760 v->FinalDRAMClockChangeLatency,
2761 v->SRExitTime,
2762 v->SREnterPlusExitTime,
2763 v->DCFCLKDeepSleep,
2764 v->DPPPerPlane,
2765 v->DCCEnable,
2766 v->DPPCLK,
2767 v->DETBufferSizeY,
2768 v->DETBufferSizeC,
2769 v->SwathHeightY,
2770 v->SwathHeightC,
2771 v->LBBitPerPixel,
2772 v->SwathWidthY,
2773 v->SwathWidthC,
2774 v->HRatio,
2775 v->HRatioChroma,
2776 v->vtaps,
2777 v->VTAPsChroma,
2778 v->VRatio,
2779 v->VRatioChroma,
2780 v->HTotal,
2781 v->PixelClock,
2782 v->BlendingAndTiming,
2783 v->BytePerPixelDETY,
2784 v->BytePerPixelDETC,
2785 v->DSTXAfterScaler,
2786 v->DSTYAfterScaler,
2787 v->WritebackEnable,
2788 v->WritebackPixelFormat,
2789 v->WritebackDestinationWidth,
2790 v->WritebackDestinationHeight,
2791 v->WritebackSourceHeight,
2792 &DRAMClockChangeSupport,
2793 &v->UrgentWatermark,
2794 &v->WritebackUrgentWatermark,
2795 &v->DRAMClockChangeWatermark,
2796 &v->WritebackDRAMClockChangeWatermark,
2797 &v->StutterExitWatermark,
2798 &v->StutterEnterPlusExitWatermark,
2799 &v->MinActiveDRAMClockChangeLatencySupported);
2800
2801 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2802 if (v->WritebackEnable[k] == true) {
2803 if (v->BlendingAndTiming[k] == k) {
2804 v->ThisVStartup = v->VStartup[k];
2805 } else {
2806 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2807 if (v->BlendingAndTiming[k] == j) {
2808 v->ThisVStartup = v->VStartup[j];
2809 }
2810 }
2811 }
2812 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(0,
2813 v->ThisVStartup * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
2814 } else {
2815 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
2816 }
2817 }
2818
2819 }
2820
2821
2822 //Display Pipeline Delivery Time in Prefetch, Groups
2823 CalculatePixelDeliveryTimes(
2824 v->NumberOfActivePlanes,
2825 v->VRatio,
2826 v->VRatioChroma,
2827 v->VRatioPrefetchY,
2828 v->VRatioPrefetchC,
2829 v->swath_width_luma_ub,
2830 v->swath_width_chroma_ub,
2831 v->DPPPerPlane,
2832 v->HRatio,
2833 v->HRatioChroma,
2834 v->PixelClock,
2835 v->PSCL_THROUGHPUT_LUMA,
2836 v->PSCL_THROUGHPUT_CHROMA,
2837 v->DPPCLK,
2838 v->BytePerPixelC,
2839 v->SourceScan,
2840 v->NumberOfCursors,
2841 v->CursorWidth,
2842 v->CursorBPP,
2843 v->BlockWidth256BytesY,
2844 v->BlockHeight256BytesY,
2845 v->BlockWidth256BytesC,
2846 v->BlockHeight256BytesC,
2847 v->DisplayPipeLineDeliveryTimeLuma,
2848 v->DisplayPipeLineDeliveryTimeChroma,
2849 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
2850 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
2851 v->DisplayPipeRequestDeliveryTimeLuma,
2852 v->DisplayPipeRequestDeliveryTimeChroma,
2853 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
2854 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
2855 v->CursorRequestDeliveryTime,
2856 v->CursorRequestDeliveryTimePrefetch);
2857
2858 CalculateMetaAndPTETimes(
2859 v->NumberOfActivePlanes,
2860 v->GPUVMEnable,
2861 v->MetaChunkSize,
2862 v->MinMetaChunkSizeBytes,
2863 v->HTotal,
2864 v->VRatio,
2865 v->VRatioChroma,
2866 v->DestinationLinesToRequestRowInVBlank,
2867 v->DestinationLinesToRequestRowInImmediateFlip,
2868 v->DCCEnable,
2869 v->PixelClock,
2870 v->BytePerPixelY,
2871 v->BytePerPixelC,
2872 v->SourceScan,
2873 v->dpte_row_height,
2874 v->dpte_row_height_chroma,
2875 v->meta_row_width,
2876 v->meta_row_width_chroma,
2877 v->meta_row_height,
2878 v->meta_row_height_chroma,
2879 v->meta_req_width,
2880 v->meta_req_width_chroma,
2881 v->meta_req_height,
2882 v->meta_req_height_chroma,
2883 v->dpte_group_bytes,
2884 v->PTERequestSizeY,
2885 v->PTERequestSizeC,
2886 v->PixelPTEReqWidthY,
2887 v->PixelPTEReqHeightY,
2888 v->PixelPTEReqWidthC,
2889 v->PixelPTEReqHeightC,
2890 v->dpte_row_width_luma_ub,
2891 v->dpte_row_width_chroma_ub,
2892 v->DST_Y_PER_PTE_ROW_NOM_L,
2893 v->DST_Y_PER_PTE_ROW_NOM_C,
2894 v->DST_Y_PER_META_ROW_NOM_L,
2895 v->DST_Y_PER_META_ROW_NOM_C,
2896 v->TimePerMetaChunkNominal,
2897 v->TimePerChromaMetaChunkNominal,
2898 v->TimePerMetaChunkVBlank,
2899 v->TimePerChromaMetaChunkVBlank,
2900 v->TimePerMetaChunkFlip,
2901 v->TimePerChromaMetaChunkFlip,
2902 v->time_per_pte_group_nom_luma,
2903 v->time_per_pte_group_vblank_luma,
2904 v->time_per_pte_group_flip_luma,
2905 v->time_per_pte_group_nom_chroma,
2906 v->time_per_pte_group_vblank_chroma,
2907 v->time_per_pte_group_flip_chroma);
2908
2909 CalculateVMGroupAndRequestTimes(
2910 v->NumberOfActivePlanes,
2911 v->GPUVMEnable,
2912 v->GPUVMMaxPageTableLevels,
2913 v->HTotal,
2914 v->BytePerPixelC,
2915 v->DestinationLinesToRequestVMInVBlank,
2916 v->DestinationLinesToRequestVMInImmediateFlip,
2917 v->DCCEnable,
2918 v->PixelClock,
2919 v->dpte_row_width_luma_ub,
2920 v->dpte_row_width_chroma_ub,
2921 v->vm_group_bytes,
2922 v->dpde0_bytes_per_frame_ub_l,
2923 v->dpde0_bytes_per_frame_ub_c,
2924 v->meta_pte_bytes_per_frame_ub_l,
2925 v->meta_pte_bytes_per_frame_ub_c,
2926 v->TimePerVMGroupVBlank,
2927 v->TimePerVMGroupFlip,
2928 v->TimePerVMRequestVBlank,
2929 v->TimePerVMRequestFlip);
2930
2931
2932 // Min TTUVBlank
2933 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2934 if (PrefetchMode == 0) {
2935 v->AllowDRAMClockChangeDuringVBlank[k] = true;
2936 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
2937 v->MinTTUVBlank[k] = dml_max(
2938 v->DRAMClockChangeWatermark,
2939 dml_max(
2940 v->StutterEnterPlusExitWatermark,
2941 v->UrgentWatermark));
2942 } else if (PrefetchMode == 1) {
2943 v->AllowDRAMClockChangeDuringVBlank[k] = false;
2944 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
2945 v->MinTTUVBlank[k] = dml_max(
2946 v->StutterEnterPlusExitWatermark,
2947 v->UrgentWatermark);
2948 } else {
2949 v->AllowDRAMClockChangeDuringVBlank[k] = false;
2950 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
2951 v->MinTTUVBlank[k] = v->UrgentWatermark;
2952 }
2953 if (!v->DynamicMetadataEnable[k])
2954 v->MinTTUVBlank[k] = v->TCalc
2955 + v->MinTTUVBlank[k];
2956 }
2957
2958 // DCC Configuration
2959 v->ActiveDPPs = 0;
2960 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2961 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
2962 v->SourcePixelFormat[k],
2963 v->SurfaceWidthY[k],
2964 v->SurfaceWidthC[k],
2965 v->SurfaceHeightY[k],
2966 v->SurfaceHeightC[k],
2967 v->DETBufferSizeInKByte[0] * 1024,
2968 v->BlockHeight256BytesY[k],
2969 v->BlockHeight256BytesC[k],
2970 v->SurfaceTiling[k],
2971 v->BytePerPixelY[k],
2972 v->BytePerPixelC[k],
2973 v->BytePerPixelDETY[k],
2974 v->BytePerPixelDETC[k],
2975 v->SourceScan[k],
2976 &v->DCCYMaxUncompressedBlock[k],
2977 &v->DCCCMaxUncompressedBlock[k],
2978 &v->DCCYMaxCompressedBlock[k],
2979 &v->DCCCMaxCompressedBlock[k],
2980 &v->DCCYIndependentBlock[k],
2981 &v->DCCCIndependentBlock[k]);
2982 }
2983
2984 {
2985 //Maximum Bandwidth Used
2986 v->TotalDataReadBandwidth = 0;
2987 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2988 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth
2989 + v->ReadBandwidthPlaneLuma[k]
2990 + v->ReadBandwidthPlaneChroma[k];
2991 }
2992 }
2993
2994 // VStartup Margin
2995 v->VStartupMargin = 0;
2996 v->FirstMainPlane = true;
2997 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2998 if (v->BlendingAndTiming[k] == k) {
2999 double margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k]
3000 / v->PixelClock[k];
3001 if (v->FirstMainPlane == true) {
3002 v->VStartupMargin = margin;
3003 v->FirstMainPlane = false;
3004 } else {
3005 v->VStartupMargin = dml_min(v->VStartupMargin, margin);
3006 }
3007 }
3008 }
3009
3010 // Stutter Efficiency
3011 CalculateStutterEfficiency(
3012 v->NumberOfActivePlanes,
3013 v->ROBBufferSizeInKByte,
3014 v->TotalDataReadBandwidth,
3015 v->DCFCLK,
3016 v->ReturnBW,
3017 v->SRExitTime,
3018 v->SynchronizedVBlank,
3019 v->DPPPerPlane,
3020 v->DETBufferSizeY,
3021 v->BytePerPixelY,
3022 v->BytePerPixelDETY,
3023 v->SwathWidthY,
3024 v->SwathHeightY,
3025 v->SwathHeightC,
3026 v->DCCRateLuma,
3027 v->DCCRateChroma,
3028 v->HTotal,
3029 v->VTotal,
3030 v->PixelClock,
3031 v->VRatio,
3032 v->SourceScan,
3033 v->BlockHeight256BytesY,
3034 v->BlockWidth256BytesY,
3035 v->BlockHeight256BytesC,
3036 v->BlockWidth256BytesC,
3037 v->DCCYMaxUncompressedBlock,
3038 v->DCCCMaxUncompressedBlock,
3039 v->VActive,
3040 v->DCCEnable,
3041 v->WritebackEnable,
3042 v->ReadBandwidthPlaneLuma,
3043 v->ReadBandwidthPlaneChroma,
3044 v->meta_row_bw,
3045 v->dpte_row_bw,
3046 &v->StutterEfficiencyNotIncludingVBlank,
3047 &v->StutterEfficiency,
3048 &v->StutterPeriod);
3049 }
3050
DisplayPipeConfiguration(struct display_mode_lib * mode_lib)3051 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3052 {
3053 // Display Pipe Configuration
3054 double BytePerPixDETY[DC__NUM_DPP__MAX] = { 0 };
3055 double BytePerPixDETC[DC__NUM_DPP__MAX] = { 0 };
3056 int BytePerPixY[DC__NUM_DPP__MAX] = { 0 };
3057 int BytePerPixC[DC__NUM_DPP__MAX] = { 0 };
3058 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX] = { 0 };
3059 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX] = { 0 };
3060 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX] = { 0 };
3061 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX] = { 0 };
3062 double dummy1[DC__NUM_DPP__MAX] = { 0 };
3063 double dummy2[DC__NUM_DPP__MAX] = { 0 };
3064 double dummy3[DC__NUM_DPP__MAX] = { 0 };
3065 double dummy4[DC__NUM_DPP__MAX] = { 0 };
3066 int dummy5[DC__NUM_DPP__MAX] = { 0 };
3067 int dummy6[DC__NUM_DPP__MAX] = { 0 };
3068 bool dummy7[DC__NUM_DPP__MAX] = { 0 };
3069 bool dummysinglestring = 0;
3070 unsigned int k;
3071
3072 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
3073
3074 dml30_CalculateBytePerPixelAnd256BBlockSizes(
3075 mode_lib->vba.SourcePixelFormat[k],
3076 mode_lib->vba.SurfaceTiling[k],
3077 &BytePerPixY[k],
3078 &BytePerPixC[k],
3079 &BytePerPixDETY[k],
3080 &BytePerPixDETC[k],
3081 &Read256BytesBlockHeightY[k],
3082 &Read256BytesBlockHeightC[k],
3083 &Read256BytesBlockWidthY[k],
3084 &Read256BytesBlockWidthC[k]);
3085 }
3086 CalculateSwathAndDETConfiguration(
3087 false,
3088 mode_lib->vba.NumberOfActivePlanes,
3089 mode_lib->vba.DETBufferSizeInKByte[0],
3090 dummy1,
3091 dummy2,
3092 mode_lib->vba.SourceScan,
3093 mode_lib->vba.SourcePixelFormat,
3094 mode_lib->vba.SurfaceTiling,
3095 mode_lib->vba.ViewportWidth,
3096 mode_lib->vba.ViewportHeight,
3097 mode_lib->vba.SurfaceWidthY,
3098 mode_lib->vba.SurfaceWidthC,
3099 mode_lib->vba.SurfaceHeightY,
3100 mode_lib->vba.SurfaceHeightC,
3101 Read256BytesBlockHeightY,
3102 Read256BytesBlockHeightC,
3103 Read256BytesBlockWidthY,
3104 Read256BytesBlockWidthC,
3105 mode_lib->vba.ODMCombineEnabled,
3106 mode_lib->vba.BlendingAndTiming,
3107 BytePerPixY,
3108 BytePerPixC,
3109 BytePerPixDETY,
3110 BytePerPixDETC,
3111 mode_lib->vba.HActive,
3112 mode_lib->vba.HRatio,
3113 mode_lib->vba.HRatioChroma,
3114 mode_lib->vba.DPPPerPlane,
3115 dummy5,
3116 dummy6,
3117 dummy3,
3118 dummy4,
3119 mode_lib->vba.SwathHeightY,
3120 mode_lib->vba.SwathHeightC,
3121 mode_lib->vba.DETBufferSizeY,
3122 mode_lib->vba.DETBufferSizeC,
3123 dummy7,
3124 &dummysinglestring);
3125 }
3126
dml30_CalculateBytePerPixelAnd256BBlockSizes(enum source_format_class SourcePixelFormat,enum dm_swizzle_mode SurfaceTiling,unsigned int * BytePerPixelY,unsigned int * BytePerPixelC,double * BytePerPixelDETY,double * BytePerPixelDETC,unsigned int * BlockHeight256BytesY,unsigned int * BlockHeight256BytesC,unsigned int * BlockWidth256BytesY,unsigned int * BlockWidth256BytesC)3127 void dml30_CalculateBytePerPixelAnd256BBlockSizes(
3128 enum source_format_class SourcePixelFormat,
3129 enum dm_swizzle_mode SurfaceTiling,
3130 unsigned int *BytePerPixelY,
3131 unsigned int *BytePerPixelC,
3132 double *BytePerPixelDETY,
3133 double *BytePerPixelDETC,
3134 unsigned int *BlockHeight256BytesY,
3135 unsigned int *BlockHeight256BytesC,
3136 unsigned int *BlockWidth256BytesY,
3137 unsigned int *BlockWidth256BytesC)
3138 {
3139 if (SourcePixelFormat == dm_444_64) {
3140 *BytePerPixelDETY = 8;
3141 *BytePerPixelDETC = 0;
3142 *BytePerPixelY = 8;
3143 *BytePerPixelC = 0;
3144 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
3145 *BytePerPixelDETY = 4;
3146 *BytePerPixelDETC = 0;
3147 *BytePerPixelY = 4;
3148 *BytePerPixelC = 0;
3149 } else if (SourcePixelFormat == dm_444_16) {
3150 *BytePerPixelDETY = 2;
3151 *BytePerPixelDETC = 0;
3152 *BytePerPixelY = 2;
3153 *BytePerPixelC = 0;
3154 } else if (SourcePixelFormat == dm_444_8) {
3155 *BytePerPixelDETY = 1;
3156 *BytePerPixelDETC = 0;
3157 *BytePerPixelY = 1;
3158 *BytePerPixelC = 0;
3159 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3160 *BytePerPixelDETY = 4;
3161 *BytePerPixelDETC = 1;
3162 *BytePerPixelY = 4;
3163 *BytePerPixelC = 1;
3164 } else if (SourcePixelFormat == dm_420_8) {
3165 *BytePerPixelDETY = 1;
3166 *BytePerPixelDETC = 2;
3167 *BytePerPixelY = 1;
3168 *BytePerPixelC = 2;
3169 } else if (SourcePixelFormat == dm_420_12) {
3170 *BytePerPixelDETY = 2;
3171 *BytePerPixelDETC = 4;
3172 *BytePerPixelY = 2;
3173 *BytePerPixelC = 4;
3174 } else {
3175 *BytePerPixelDETY = 4.0 / 3;
3176 *BytePerPixelDETC = 8.0 / 3;
3177 *BytePerPixelY = 2;
3178 *BytePerPixelC = 4;
3179 }
3180
3181 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
3182 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8
3183 || SourcePixelFormat == dm_mono_16 || SourcePixelFormat == dm_mono_8
3184 || SourcePixelFormat == dm_rgbe)) {
3185 if (SurfaceTiling == dm_sw_linear) {
3186 *BlockHeight256BytesY = 1;
3187 } else if (SourcePixelFormat == dm_444_64) {
3188 *BlockHeight256BytesY = 4;
3189 } else if (SourcePixelFormat == dm_444_8) {
3190 *BlockHeight256BytesY = 16;
3191 } else {
3192 *BlockHeight256BytesY = 8;
3193 }
3194 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3195 *BlockHeight256BytesC = 0;
3196 *BlockWidth256BytesC = 0;
3197 } else {
3198 if (SurfaceTiling == dm_sw_linear) {
3199 *BlockHeight256BytesY = 1;
3200 *BlockHeight256BytesC = 1;
3201 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3202 *BlockHeight256BytesY = 8;
3203 *BlockHeight256BytesC = 16;
3204 } else if (SourcePixelFormat == dm_420_8) {
3205 *BlockHeight256BytesY = 16;
3206 *BlockHeight256BytesC = 8;
3207 } else {
3208 *BlockHeight256BytesY = 8;
3209 *BlockHeight256BytesC = 8;
3210 }
3211 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3212 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3213 }
3214 }
3215
CalculateTWait(unsigned int PrefetchMode,double DRAMClockChangeLatency,double UrgentLatency,double SREnterPlusExitTime)3216 static double CalculateTWait(
3217 unsigned int PrefetchMode,
3218 double DRAMClockChangeLatency,
3219 double UrgentLatency,
3220 double SREnterPlusExitTime)
3221 {
3222 if (PrefetchMode == 0) {
3223 return dml_max(DRAMClockChangeLatency + UrgentLatency,
3224 dml_max(SREnterPlusExitTime, UrgentLatency));
3225 } else if (PrefetchMode == 1) {
3226 return dml_max(SREnterPlusExitTime, UrgentLatency);
3227 } else {
3228 return UrgentLatency;
3229 }
3230 }
3231
dml30_CalculateWriteBackDISPCLK(enum source_format_class WritebackPixelFormat,double PixelClock,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackHTaps,unsigned int WritebackVTaps,long WritebackSourceWidth,long WritebackDestinationWidth,unsigned int HTotal,unsigned int WritebackLineBufferSize)3232 double dml30_CalculateWriteBackDISPCLK(
3233 enum source_format_class WritebackPixelFormat,
3234 double PixelClock,
3235 double WritebackHRatio,
3236 double WritebackVRatio,
3237 unsigned int WritebackHTaps,
3238 unsigned int WritebackVTaps,
3239 long WritebackSourceWidth,
3240 long WritebackDestinationWidth,
3241 unsigned int HTotal,
3242 unsigned int WritebackLineBufferSize)
3243 {
3244 double DISPCLK_H = 0, DISPCLK_V = 0, DISPCLK_HB = 0;
3245
3246 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3247 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3248 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3249 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3250 }
3251
CalculateWriteBackDelay(enum source_format_class WritebackPixelFormat,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackVTaps,long WritebackDestinationWidth,long WritebackDestinationHeight,long WritebackSourceHeight,unsigned int HTotal)3252 static double CalculateWriteBackDelay(
3253 enum source_format_class WritebackPixelFormat,
3254 double WritebackHRatio,
3255 double WritebackVRatio,
3256 unsigned int WritebackVTaps,
3257 long WritebackDestinationWidth,
3258 long WritebackDestinationHeight,
3259 long WritebackSourceHeight,
3260 unsigned int HTotal)
3261 {
3262 double CalculateWriteBackDelay = 0;
3263 double Line_length = 0;
3264 double Output_lines_last_notclamped = 0;
3265 double WritebackVInit = 0;
3266
3267 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3268 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3269 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3270 if (Output_lines_last_notclamped < 0) {
3271 CalculateWriteBackDelay = 0;
3272 } else {
3273 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3274 }
3275 return CalculateWriteBackDelay;
3276 }
3277
3278
CalculateDynamicMetadataParameters(int MaxInterDCNTileRepeaters,double DPPCLK,double DISPCLK,double DCFClkDeepSleep,double PixelClock,long HTotal,long VBlank,long DynamicMetadataTransmittedBytes,long DynamicMetadataLinesBeforeActiveRequired,int InterlaceEnable,bool ProgressiveToInterlaceUnitInOPP,double * Tsetup,double * Tdmbf,double * Tdmec,double * Tdmsks)3279 static void CalculateDynamicMetadataParameters(int MaxInterDCNTileRepeaters, double DPPCLK, double DISPCLK,
3280 double DCFClkDeepSleep, double PixelClock, long HTotal, long VBlank, long DynamicMetadataTransmittedBytes,
3281 long DynamicMetadataLinesBeforeActiveRequired, int InterlaceEnable, bool ProgressiveToInterlaceUnitInOPP,
3282 double *Tsetup, double *Tdmbf, double *Tdmec, double *Tdmsks)
3283 {
3284 double TotalRepeaterDelayTime = 0;
3285 double VUpdateWidthPix = 0;
3286 double VReadyOffsetPix = 0;
3287 double VUpdateOffsetPix = 0;
3288 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3289 VUpdateWidthPix = (14 / DCFClkDeepSleep + 12 / DPPCLK + TotalRepeaterDelayTime) * PixelClock;
3290 VReadyOffsetPix = dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20 / DCFClkDeepSleep + 10 / DPPCLK) * PixelClock;
3291 VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3292 *Tsetup = (VUpdateOffsetPix + VUpdateWidthPix + VReadyOffsetPix) / PixelClock;
3293 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3294 *Tdmec = HTotal / PixelClock;
3295 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3296 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3297 } else {
3298 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3299 }
3300 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3301 *Tdmsks = *Tdmsks / 2;
3302 }
3303 }
3304
CalculateRowBandwidth(bool GPUVMEnable,enum source_format_class SourcePixelFormat,double VRatio,double VRatioChroma,bool DCCEnable,double LineTime,unsigned int MetaRowByteLuma,unsigned int MetaRowByteChroma,unsigned int meta_row_height_luma,unsigned int meta_row_height_chroma,unsigned int PixelPTEBytesPerRowLuma,unsigned int PixelPTEBytesPerRowChroma,unsigned int dpte_row_height_luma,unsigned int dpte_row_height_chroma,double * meta_row_bw,double * dpte_row_bw)3305 static void CalculateRowBandwidth(
3306 bool GPUVMEnable,
3307 enum source_format_class SourcePixelFormat,
3308 double VRatio,
3309 double VRatioChroma,
3310 bool DCCEnable,
3311 double LineTime,
3312 unsigned int MetaRowByteLuma,
3313 unsigned int MetaRowByteChroma,
3314 unsigned int meta_row_height_luma,
3315 unsigned int meta_row_height_chroma,
3316 unsigned int PixelPTEBytesPerRowLuma,
3317 unsigned int PixelPTEBytesPerRowChroma,
3318 unsigned int dpte_row_height_luma,
3319 unsigned int dpte_row_height_chroma,
3320 double *meta_row_bw,
3321 double *dpte_row_bw)
3322 {
3323 if (DCCEnable != true) {
3324 *meta_row_bw = 0;
3325 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3326 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime)
3327 + VRatioChroma * MetaRowByteChroma
3328 / (meta_row_height_chroma * LineTime);
3329 } else {
3330 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3331 }
3332
3333 if (GPUVMEnable != true) {
3334 *dpte_row_bw = 0;
3335 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3336 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3337 + VRatioChroma * PixelPTEBytesPerRowChroma
3338 / (dpte_row_height_chroma * LineTime);
3339 } else {
3340 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3341 }
3342 }
3343
CalculateFlipSchedule(struct display_mode_lib * mode_lib,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,double UrgentExtraLatency,double UrgentLatency,unsigned int GPUVMMaxPageTableLevels,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,bool GPUVMEnable,double HostVMMinPageSize,double PDEAndMetaPTEBytesPerFrame,double MetaRowBytes,double DPTEBytesPerRow,double BandwidthAvailableForImmediateFlip,unsigned int TotImmediateFlipBytes,enum source_format_class SourcePixelFormat,double LineTime,double VRatio,double VRatioChroma,double Tno_bw,bool DCCEnable,unsigned int dpte_row_height,unsigned int meta_row_height,unsigned int dpte_row_height_chroma,unsigned int meta_row_height_chroma,double * DestinationLinesToRequestVMInImmediateFlip,double * DestinationLinesToRequestRowInImmediateFlip,double * final_flip_bw,bool * ImmediateFlipSupportedForPipe)3344 static void CalculateFlipSchedule(
3345 struct display_mode_lib *mode_lib,
3346 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
3347 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
3348 double UrgentExtraLatency,
3349 double UrgentLatency,
3350 unsigned int GPUVMMaxPageTableLevels,
3351 bool HostVMEnable,
3352 unsigned int HostVMMaxNonCachedPageTableLevels,
3353 bool GPUVMEnable,
3354 double HostVMMinPageSize,
3355 double PDEAndMetaPTEBytesPerFrame,
3356 double MetaRowBytes,
3357 double DPTEBytesPerRow,
3358 double BandwidthAvailableForImmediateFlip,
3359 unsigned int TotImmediateFlipBytes,
3360 enum source_format_class SourcePixelFormat,
3361 double LineTime,
3362 double VRatio,
3363 double VRatioChroma,
3364 double Tno_bw,
3365 bool DCCEnable,
3366 unsigned int dpte_row_height,
3367 unsigned int meta_row_height,
3368 unsigned int dpte_row_height_chroma,
3369 unsigned int meta_row_height_chroma,
3370 double *DestinationLinesToRequestVMInImmediateFlip,
3371 double *DestinationLinesToRequestRowInImmediateFlip,
3372 double *final_flip_bw,
3373 bool *ImmediateFlipSupportedForPipe)
3374 {
3375 double min_row_time = 0.0;
3376 unsigned int HostVMDynamicLevelsTrips = 0;
3377 double TimeForFetchingMetaPTEImmediateFlip = 0;
3378 double TimeForFetchingRowInVBlankImmediateFlip = 0;
3379 double ImmediateFlipBW = 0;
3380 double HostVMInefficiencyFactor = 0;
3381
3382 if (GPUVMEnable == true && HostVMEnable == true) {
3383 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
3384 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
3385 } else {
3386 HostVMInefficiencyFactor = 1;
3387 HostVMDynamicLevelsTrips = 0;
3388 }
3389
3390 if (GPUVMEnable == true || DCCEnable == true) {
3391 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
3392 }
3393
3394 if (GPUVMEnable == true) {
3395 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3396 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), LineTime / 4.0);
3397 } else {
3398 TimeForFetchingMetaPTEImmediateFlip = 0;
3399 }
3400
3401 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3402 if ((GPUVMEnable == true || DCCEnable == true)) {
3403 TimeForFetchingRowInVBlankImmediateFlip = dml_max3((MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3404 UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4);
3405 } else {
3406 TimeForFetchingRowInVBlankImmediateFlip = 0;
3407 }
3408
3409 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3410
3411 if (GPUVMEnable == true) {
3412 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
3413 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
3414 } else if ((GPUVMEnable == true || DCCEnable == true)) {
3415 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
3416 } else {
3417 *final_flip_bw = 0;
3418 }
3419
3420
3421 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
3422 if (GPUVMEnable == true && DCCEnable != true) {
3423 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
3424 } else if (GPUVMEnable != true && DCCEnable == true) {
3425 min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
3426 } else {
3427 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio,
3428 dpte_row_height_chroma * LineTime / VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
3429 }
3430 } else {
3431 if (GPUVMEnable == true && DCCEnable != true) {
3432 min_row_time = dpte_row_height * LineTime / VRatio;
3433 } else if (GPUVMEnable != true && DCCEnable == true) {
3434 min_row_time = meta_row_height * LineTime / VRatio;
3435 } else {
3436 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
3437 }
3438 }
3439
3440 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
3441 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3442 *ImmediateFlipSupportedForPipe = false;
3443 } else {
3444 *ImmediateFlipSupportedForPipe = true;
3445 }
3446 }
3447
TruncToValidBPP(double LinkBitRate,int Lanes,long HTotal,long HActive,double PixelClock,double DesiredBPP,bool DSCEnable,enum output_encoder_class Output,enum output_format_class Format,unsigned int DSCInputBitPerComponent,int DSCSlices,int AudioRate,int AudioLayout,enum odm_combine_mode ODMCombine)3448 static double TruncToValidBPP(
3449 double LinkBitRate,
3450 int Lanes,
3451 long HTotal,
3452 long HActive,
3453 double PixelClock,
3454 double DesiredBPP,
3455 bool DSCEnable,
3456 enum output_encoder_class Output,
3457 enum output_format_class Format,
3458 unsigned int DSCInputBitPerComponent,
3459 int DSCSlices,
3460 int AudioRate,
3461 int AudioLayout,
3462 enum odm_combine_mode ODMCombine)
3463 {
3464 double MaxLinkBPP = 0;
3465 int MinDSCBPP = 0;
3466 double MaxDSCBPP = 0;
3467 int NonDSCBPP0 = 0;
3468 int NonDSCBPP1 = 0;
3469 int NonDSCBPP2 = 0;
3470
3471 if (Format == dm_420) {
3472 NonDSCBPP0 = 12;
3473 NonDSCBPP1 = 15;
3474 NonDSCBPP2 = 18;
3475 MinDSCBPP = 6;
3476 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
3477 } else if (Format == dm_444) {
3478 NonDSCBPP0 = 24;
3479 NonDSCBPP1 = 30;
3480 NonDSCBPP2 = 36;
3481 MinDSCBPP = 8;
3482 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3483 } else {
3484 NonDSCBPP0 = 16;
3485 NonDSCBPP1 = 20;
3486 NonDSCBPP2 = 24;
3487
3488 if (Format == dm_n422) {
3489 MinDSCBPP = 7;
3490 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3491 } else {
3492 MinDSCBPP = 8;
3493 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3494 }
3495 }
3496
3497 if (DSCEnable && Output == dm_dp) {
3498 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3499 } else {
3500 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3501 }
3502
3503 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3504 MaxLinkBPP = 16;
3505 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3506 MaxLinkBPP = 32;
3507 }
3508
3509
3510 if (DesiredBPP == 0) {
3511 if (DSCEnable) {
3512 if (MaxLinkBPP < MinDSCBPP) {
3513 return BPP_INVALID;
3514 } else if (MaxLinkBPP >= MaxDSCBPP) {
3515 return MaxDSCBPP;
3516 } else {
3517 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3518 }
3519 } else {
3520 if (MaxLinkBPP >= NonDSCBPP2) {
3521 return NonDSCBPP2;
3522 } else if (MaxLinkBPP >= NonDSCBPP1) {
3523 return NonDSCBPP1;
3524 } else if (MaxLinkBPP >= NonDSCBPP0) {
3525 return NonDSCBPP0;
3526 } else {
3527 return BPP_INVALID;
3528 }
3529 }
3530 } else {
3531 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0 || DesiredBPP == 18)) ||
3532 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3533 return BPP_INVALID;
3534 } else {
3535 return DesiredBPP;
3536 }
3537 }
3538 }
3539
dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib * mode_lib)3540 void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3541 {
3542 struct vba_vars_st *v = &mode_lib->vba;
3543 int MinPrefetchMode, MaxPrefetchMode;
3544 int i, start_state;
3545 unsigned int j, k, m;
3546 bool EnoughWritebackUnits = true;
3547 bool WritebackModeSupport = true;
3548 bool ViewportExceedsSurface = false;
3549 double MaxTotalVActiveRDBandwidth = 0;
3550 long ReorderingBytes = 0;
3551 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX] = { 0 };
3552
3553 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3554
3555 if (mode_lib->validate_max_state)
3556 start_state = v->soc.num_states - 1;
3557 else
3558 start_state = 0;
3559
3560 CalculateMinAndMaxPrefetchMode(
3561 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3562 &MinPrefetchMode, &MaxPrefetchMode);
3563
3564 /*Scale Ratio, taps Support Check*/
3565
3566 v->ScaleRatioAndTapsSupport = true;
3567 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3568 if (v->ScalerEnabled[k] == false
3569 && ((v->SourcePixelFormat[k] != dm_444_64
3570 && v->SourcePixelFormat[k] != dm_444_32
3571 && v->SourcePixelFormat[k] != dm_444_16
3572 && v->SourcePixelFormat[k] != dm_mono_16
3573 && v->SourcePixelFormat[k] != dm_mono_8
3574 && v->SourcePixelFormat[k] != dm_rgbe
3575 && v->SourcePixelFormat[k] != dm_rgbe_alpha)
3576 || v->HRatio[k] != 1.0
3577 || v->htaps[k] != 1.0
3578 || v->VRatio[k] != 1.0
3579 || v->vtaps[k] != 1.0)) {
3580 v->ScaleRatioAndTapsSupport = false;
3581 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0
3582 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3583 || (v->htaps[k] > 1.0
3584 && (v->htaps[k] % 2) == 1)
3585 || v->HRatio[k] > v->MaxHSCLRatio
3586 || v->VRatio[k] > v->MaxVSCLRatio
3587 || v->HRatio[k] > v->htaps[k]
3588 || v->VRatio[k] > v->vtaps[k]
3589 || (v->SourcePixelFormat[k] != dm_444_64
3590 && v->SourcePixelFormat[k] != dm_444_32
3591 && v->SourcePixelFormat[k] != dm_444_16
3592 && v->SourcePixelFormat[k] != dm_mono_16
3593 && v->SourcePixelFormat[k] != dm_mono_8
3594 && v->SourcePixelFormat[k] != dm_rgbe
3595 && (v->VTAPsChroma[k] < 1
3596 || v->VTAPsChroma[k] > 8
3597 || v->HTAPsChroma[k] < 1
3598 || v->HTAPsChroma[k] > 8
3599 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3600 || v->HRatioChroma[k] > v->MaxHSCLRatio
3601 || v->VRatioChroma[k] > v->MaxVSCLRatio
3602 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3603 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3604 v->ScaleRatioAndTapsSupport = false;
3605 }
3606 }
3607 /*Source Format, Pixel Format and Scan Support Check*/
3608
3609 v->SourceFormatPixelAndScanSupport = true;
3610 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3611 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
3612 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t || v->SurfaceTiling[k] == dm_sw_64kb_d_x)
3613 && !(v->SourcePixelFormat[k] == dm_444_64))) {
3614 v->SourceFormatPixelAndScanSupport = false;
3615 }
3616 }
3617 /*Bandwidth Support Check*/
3618
3619 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3620 dml30_CalculateBytePerPixelAnd256BBlockSizes(
3621 v->SourcePixelFormat[k],
3622 v->SurfaceTiling[k],
3623 &v->BytePerPixelY[k],
3624 &v->BytePerPixelC[k],
3625 &v->BytePerPixelInDETY[k],
3626 &v->BytePerPixelInDETC[k],
3627 &v->Read256BlockHeightY[k],
3628 &v->Read256BlockHeightC[k],
3629 &v->Read256BlockWidthY[k],
3630 &v->Read256BlockWidthC[k]);
3631 }
3632 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3633 if (v->SourceScan[k] != dm_vert) {
3634 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3635 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3636 } else {
3637 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3638 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3639 }
3640 }
3641 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3642 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0) / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3643 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0) / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3644 }
3645 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3646 if (v->WritebackEnable[k] == true
3647 && v->WritebackPixelFormat[k] == dm_444_64) {
3648 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k]
3649 * v->WritebackDestinationHeight[k]
3650 / (v->WritebackSourceHeight[k]
3651 * v->HTotal[k]
3652 / v->PixelClock[k]) * 8.0;
3653 } else if (v->WritebackEnable[k] == true) {
3654 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k]
3655 * v->WritebackDestinationHeight[k]
3656 / (v->WritebackSourceHeight[k]
3657 * v->HTotal[k]
3658 / v->PixelClock[k]) * 4.0;
3659 } else {
3660 v->WriteBandwidth[k] = 0.0;
3661 }
3662 }
3663
3664 /*Writeback Latency support check*/
3665
3666 v->WritebackLatencySupport = true;
3667 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3668 if (v->WritebackEnable[k] == true) {
3669 if (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave ||
3670 v->WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave) {
3671 if (v->WriteBandwidth[k]
3672 > 2.0 * v->WritebackInterfaceBufferSize * 1024
3673 / v->WritebackLatency) {
3674 v->WritebackLatencySupport = false;
3675 }
3676 } else {
3677 if (v->WriteBandwidth[k]
3678 > v->WritebackInterfaceBufferSize * 1024
3679 / v->WritebackLatency) {
3680 v->WritebackLatencySupport = false;
3681 }
3682 }
3683 }
3684 }
3685
3686 /*Writeback Mode Support Check*/
3687
3688 v->TotalNumberOfActiveWriteback = 0;
3689 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3690 if (v->WritebackEnable[k] == true) {
3691 v->TotalNumberOfActiveWriteback =
3692 v->TotalNumberOfActiveWriteback + 1;
3693 }
3694 }
3695
3696 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
3697 EnoughWritebackUnits = false;
3698 }
3699 if (!v->WritebackSupportInterleaveAndUsingWholeBufferForASingleStream
3700 && (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave
3701 || v->WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave)) {
3702
3703 WritebackModeSupport = false;
3704 }
3705 if (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave && v->TotalNumberOfActiveWriteback > 1) {
3706 WritebackModeSupport = false;
3707 }
3708
3709 /*Writeback Scale Ratio and Taps Support Check*/
3710
3711 v->WritebackScaleRatioAndTapsSupport = true;
3712 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3713 if (v->WritebackEnable[k] == true) {
3714 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio
3715 || v->WritebackVRatio[k]
3716 > v->WritebackMaxVSCLRatio
3717 || v->WritebackHRatio[k]
3718 < v->WritebackMinHSCLRatio
3719 || v->WritebackVRatio[k]
3720 < v->WritebackMinVSCLRatio
3721 || v->WritebackHTaps[k]
3722 > v->WritebackMaxHSCLTaps
3723 || v->WritebackVTaps[k]
3724 > v->WritebackMaxVSCLTaps
3725 || v->WritebackHRatio[k]
3726 > v->WritebackHTaps[k]
3727 || v->WritebackVRatio[k]
3728 > v->WritebackVTaps[k]
3729 || (v->WritebackHTaps[k] > 2.0
3730 && ((v->WritebackHTaps[k] % 2)
3731 == 1))) {
3732 v->WritebackScaleRatioAndTapsSupport = false;
3733 }
3734 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
3735 v->WritebackScaleRatioAndTapsSupport = false;
3736 }
3737 }
3738 }
3739 /*Maximum DISPCLK/DPPCLK Support check*/
3740
3741 v->WritebackRequiredDISPCLK = 0.0;
3742 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3743 if (v->WritebackEnable[k] == true) {
3744 v->WritebackRequiredDISPCLK = dml_max(v->WritebackRequiredDISPCLK,
3745 dml30_CalculateWriteBackDISPCLK(
3746 v->WritebackPixelFormat[k],
3747 v->PixelClock[k],
3748 v->WritebackHRatio[k],
3749 v->WritebackVRatio[k],
3750 v->WritebackHTaps[k],
3751 v->WritebackVTaps[k],
3752 v->WritebackSourceWidth[k],
3753 v->WritebackDestinationWidth[k],
3754 v->HTotal[k],
3755 v->WritebackLineBufferSize));
3756 }
3757 }
3758 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3759 if (v->HRatio[k] > 1.0) {
3760 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
3761 } else {
3762 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3763 }
3764 if (v->BytePerPixelC[k] == 0.0) {
3765 v->PSCL_FACTOR_CHROMA[k] = 0.0;
3766 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
3767 * dml_max3(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 1.0);
3768 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3769 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3770 }
3771 } else {
3772 if (v->HRatioChroma[k] > 1.0) {
3773 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
3774 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
3775 } else {
3776 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3777 }
3778 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] * dml_max5(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
3779 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
3780 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
3781 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
3782 1.0);
3783 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
3784 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3785 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3786 }
3787 }
3788 }
3789 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3790 int MaximumSwathWidthSupportLuma = 0;
3791 int MaximumSwathWidthSupportChroma = 0;
3792
3793 if (v->SurfaceTiling[k] == dm_sw_linear) {
3794 MaximumSwathWidthSupportLuma = 8192.0;
3795 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
3796 MaximumSwathWidthSupportLuma = 2880.0;
3797 } else {
3798 MaximumSwathWidthSupportLuma = 5760.0;
3799 }
3800
3801 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
3802 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
3803 } else {
3804 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
3805 }
3806 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
3807 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
3808 if (v->BytePerPixelC[k] == 0.0) {
3809 v->MaximumSwathWidthInLineBufferChroma = 0;
3810 } else {
3811 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
3812 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
3813 }
3814 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
3815 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
3816 }
3817
3818 CalculateSwathAndDETConfiguration(
3819 true,
3820 v->NumberOfActivePlanes,
3821 v->DETBufferSizeInKByte[0],
3822 v->MaximumSwathWidthLuma,
3823 v->MaximumSwathWidthChroma,
3824 v->SourceScan,
3825 v->SourcePixelFormat,
3826 v->SurfaceTiling,
3827 v->ViewportWidth,
3828 v->ViewportHeight,
3829 v->SurfaceWidthY,
3830 v->SurfaceWidthC,
3831 v->SurfaceHeightY,
3832 v->SurfaceHeightC,
3833 v->Read256BlockHeightY,
3834 v->Read256BlockHeightC,
3835 v->Read256BlockWidthY,
3836 v->Read256BlockWidthC,
3837 v->odm_combine_dummy,
3838 v->BlendingAndTiming,
3839 v->BytePerPixelY,
3840 v->BytePerPixelC,
3841 v->BytePerPixelInDETY,
3842 v->BytePerPixelInDETC,
3843 v->HActive,
3844 v->HRatio,
3845 v->HRatioChroma,
3846 v->DPPPerPlane,
3847 v->swath_width_luma_ub,
3848 v->swath_width_chroma_ub,
3849 v->SwathWidthY,
3850 v->SwathWidthC,
3851 v->SwathHeightY,
3852 v->SwathHeightC,
3853 v->DETBufferSizeY,
3854 v->DETBufferSizeC,
3855 v->SingleDPPViewportSizeSupportPerPlane,
3856 &v->ViewportSizeSupport[0][0]);
3857
3858 for (i = start_state; i < v->soc.num_states; i++) {
3859 for (j = 0; j < 2; j++) {
3860 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
3861 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
3862 v->RequiredDISPCLK[i][j] = 0.0;
3863 v->DISPCLK_DPPCLK_Support[i][j] = true;
3864 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3865 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3866 * (1.0 + v->DISPCLKRampingMargin / 100.0);
3867 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3868 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3869 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3870 }
3871 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3872 * (1 + v->DISPCLKRampingMargin / 100.0);
3873 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3874 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3875 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3876 }
3877 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3878 * (1 + v->DISPCLKRampingMargin / 100.0);
3879 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3880 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3881 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3882 }
3883
3884 if (v->ODMCombinePolicy == dm_odm_combine_policy_none) {
3885 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
3886 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
3887 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
3888 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3889 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3890 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
3891 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
3892 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
3893 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
3894 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
3895 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3896 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3897 } else {
3898 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
3899 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
3900 }
3901 if (v->DSCEnabled[k] && v->HActive[k] > DCN30_MAX_DSC_IMAGE_WIDTH
3902 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
3903 if (v->HActive[k] / 2 > DCN30_MAX_DSC_IMAGE_WIDTH) {
3904 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
3905 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
3906 } else {
3907 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3908 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3909 }
3910 }
3911 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN30_MAX_FMT_420_BUFFER_WIDTH
3912 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
3913 if (v->HActive[k] / 2 > DCN30_MAX_FMT_420_BUFFER_WIDTH) {
3914 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
3915 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
3916 } else {
3917 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3918 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3919 }
3920 }
3921 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
3922 v->MPCCombine[i][j][k] = false;
3923 v->NoOfDPP[i][j][k] = 4;
3924 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
3925 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
3926 v->MPCCombine[i][j][k] = false;
3927 v->NoOfDPP[i][j][k] = 2;
3928 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
3929 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
3930 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= v->MaxDppclkRoundedDownToDFSGranularity
3931 && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
3932 v->MPCCombine[i][j][k] = false;
3933 v->NoOfDPP[i][j][k] = 1;
3934 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3935 } else {
3936 v->MPCCombine[i][j][k] = true;
3937 v->NoOfDPP[i][j][k] = 2;
3938 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
3939 }
3940 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
3941 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3942 > v->MaxDppclkRoundedDownToDFSGranularity) || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
3943 v->DISPCLK_DPPCLK_Support[i][j] = false;
3944 }
3945 }
3946 v->TotalNumberOfActiveDPP[i][j] = 0;
3947 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
3948 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3949 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
3950 if (v->NoOfDPP[i][j][k] == 1)
3951 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
3952 }
3953 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never) {
3954 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
3955 double BWOfNonSplitPlaneOfMaximumBandwidth = 0;
3956 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
3957 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
3958 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
3959 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3960 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
3961 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
3962 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
3963 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
3964 }
3965 }
3966 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
3967 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
3968 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
3969 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
3970 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
3971 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
3972 }
3973 }
3974 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
3975 v->RequiredDISPCLK[i][j] = 0.0;
3976 v->DISPCLK_DPPCLK_Support[i][j] = true;
3977 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3978 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
3979 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
3980 v->MPCCombine[i][j][k] = true;
3981 v->NoOfDPP[i][j][k] = 2;
3982 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
3983 } else {
3984 v->MPCCombine[i][j][k] = false;
3985 v->NoOfDPP[i][j][k] = 1;
3986 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3987 }
3988 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
3989 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3990 * (1.0 + v->DISPCLKRampingMargin / 100.0);
3991 } else {
3992 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3993 }
3994 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
3995 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3996 > v->MaxDppclkRoundedDownToDFSGranularity) || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
3997 v->DISPCLK_DPPCLK_Support[i][j] = false;
3998 }
3999 }
4000 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4001 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4002 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4003 }
4004 }
4005 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4006 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4007 v->DISPCLK_DPPCLK_Support[i][j] = false;
4008 }
4009 }
4010 }
4011
4012 /*Total Available Pipes Support Check*/
4013
4014 for (i = start_state; i < v->soc.num_states; i++) {
4015 for (j = 0; j < 2; j++) {
4016 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4017 v->TotalAvailablePipesSupport[i][j] = true;
4018 } else {
4019 v->TotalAvailablePipesSupport[i][j] = false;
4020 }
4021 }
4022 }
4023 /*Display IO and DSC Support Check*/
4024
4025 v->NonsupportedDSCInputBPC = false;
4026 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4027 if (!(v->DSCInputBitPerComponent[k] == 12.0
4028 || v->DSCInputBitPerComponent[k] == 10.0
4029 || v->DSCInputBitPerComponent[k] == 8.0)) {
4030 v->NonsupportedDSCInputBPC = true;
4031 }
4032 }
4033
4034 /*Number Of DSC Slices*/
4035 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4036 if (v->BlendingAndTiming[k] == k) {
4037 if (v->PixelClockBackEnd[k] > 3200) {
4038 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4039 } else if (v->PixelClockBackEnd[k] > 1360) {
4040 v->NumberOfDSCSlices[k] = 8;
4041 } else if (v->PixelClockBackEnd[k] > 680) {
4042 v->NumberOfDSCSlices[k] = 4;
4043 } else if (v->PixelClockBackEnd[k] > 340) {
4044 v->NumberOfDSCSlices[k] = 2;
4045 } else {
4046 v->NumberOfDSCSlices[k] = 1;
4047 }
4048 } else {
4049 v->NumberOfDSCSlices[k] = 0;
4050 }
4051 }
4052
4053 for (i = start_state; i < v->soc.num_states; i++) {
4054 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4055 v->RequiresDSC[i][k] = false;
4056 v->RequiresFEC[i][k] = false;
4057 if (v->BlendingAndTiming[k] == k) {
4058 if (v->Output[k] == dm_hdmi) {
4059 v->RequiresDSC[i][k] = false;
4060 v->RequiresFEC[i][k] = false;
4061 v->OutputBppPerState[i][k] = TruncToValidBPP(
4062 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4063 3,
4064 v->HTotal[k],
4065 v->HActive[k],
4066 v->PixelClockBackEnd[k],
4067 v->ForcedOutputLinkBPP[k],
4068 false,
4069 v->Output[k],
4070 v->OutputFormat[k],
4071 v->DSCInputBitPerComponent[k],
4072 v->NumberOfDSCSlices[k],
4073 v->AudioSampleRate[k],
4074 v->AudioSampleLayout[k],
4075 v->ODMCombineEnablePerState[i][k]);
4076 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) {
4077 if (v->DSCEnable[k] == true) {
4078 v->RequiresDSC[i][k] = true;
4079 v->LinkDSCEnable = true;
4080 if (v->Output[k] == dm_dp) {
4081 v->RequiresFEC[i][k] = true;
4082 } else {
4083 v->RequiresFEC[i][k] = false;
4084 }
4085 } else {
4086 v->RequiresDSC[i][k] = false;
4087 v->LinkDSCEnable = false;
4088 v->RequiresFEC[i][k] = false;
4089 }
4090
4091 v->Outbpp = BPP_INVALID;
4092 if (v->PHYCLKPerState[i] >= 270.0) {
4093 v->Outbpp = TruncToValidBPP(
4094 (1.0 - v->Downspreading / 100.0) * 2700,
4095 v->OutputLinkDPLanes[k],
4096 v->HTotal[k],
4097 v->HActive[k],
4098 v->PixelClockBackEnd[k],
4099 v->ForcedOutputLinkBPP[k],
4100 v->LinkDSCEnable,
4101 v->Output[k],
4102 v->OutputFormat[k],
4103 v->DSCInputBitPerComponent[k],
4104 v->NumberOfDSCSlices[k],
4105 v->AudioSampleRate[k],
4106 v->AudioSampleLayout[k],
4107 v->ODMCombineEnablePerState[i][k]);
4108 v->OutputBppPerState[i][k] = v->Outbpp;
4109 // TODO: Need some other way to handle this nonsense
4110 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4111 }
4112 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4113 v->Outbpp = TruncToValidBPP(
4114 (1.0 - v->Downspreading / 100.0) * 5400,
4115 v->OutputLinkDPLanes[k],
4116 v->HTotal[k],
4117 v->HActive[k],
4118 v->PixelClockBackEnd[k],
4119 v->ForcedOutputLinkBPP[k],
4120 v->LinkDSCEnable,
4121 v->Output[k],
4122 v->OutputFormat[k],
4123 v->DSCInputBitPerComponent[k],
4124 v->NumberOfDSCSlices[k],
4125 v->AudioSampleRate[k],
4126 v->AudioSampleLayout[k],
4127 v->ODMCombineEnablePerState[i][k]);
4128 v->OutputBppPerState[i][k] = v->Outbpp;
4129 // TODO: Need some other way to handle this nonsense
4130 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4131 }
4132 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4133 v->Outbpp = TruncToValidBPP(
4134 (1.0 - v->Downspreading / 100.0) * 8100,
4135 v->OutputLinkDPLanes[k],
4136 v->HTotal[k],
4137 v->HActive[k],
4138 v->PixelClockBackEnd[k],
4139 v->ForcedOutputLinkBPP[k],
4140 v->LinkDSCEnable,
4141 v->Output[k],
4142 v->OutputFormat[k],
4143 v->DSCInputBitPerComponent[k],
4144 v->NumberOfDSCSlices[k],
4145 v->AudioSampleRate[k],
4146 v->AudioSampleLayout[k],
4147 v->ODMCombineEnablePerState[i][k]);
4148 if (v->Outbpp == BPP_INVALID && v->ForcedOutputLinkBPP[k] == 0) {
4149 //if (v->Outbpp == BPP_INVALID && v->DSCEnabled[k] == dm_dsc_enable_only_if_necessary && v->ForcedOutputLinkBPP[k] == 0) {
4150 v->RequiresDSC[i][k] = true;
4151 v->LinkDSCEnable = true;
4152 if (v->Output[k] == dm_dp) {
4153 v->RequiresFEC[i][k] = true;
4154 }
4155 v->Outbpp = TruncToValidBPP(
4156 (1.0 - v->Downspreading / 100.0) * 8100,
4157 v->OutputLinkDPLanes[k],
4158 v->HTotal[k],
4159 v->HActive[k],
4160 v->PixelClockBackEnd[k],
4161 v->ForcedOutputLinkBPP[k],
4162 v->LinkDSCEnable,
4163 v->Output[k],
4164 v->OutputFormat[k],
4165 v->DSCInputBitPerComponent[k],
4166 v->NumberOfDSCSlices[k],
4167 v->AudioSampleRate[k],
4168 v->AudioSampleLayout[k],
4169 v->ODMCombineEnablePerState[i][k]);
4170 }
4171 v->OutputBppPerState[i][k] = v->Outbpp;
4172 // TODO: Need some other way to handle this nonsense
4173 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4174 }
4175 }
4176 } else {
4177 v->OutputBppPerState[i][k] = 0;
4178 }
4179 }
4180 }
4181 for (i = start_state; i < v->soc.num_states; i++) {
4182 v->DIOSupport[i] = true;
4183 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4184 if (!v->skip_dio_check[k] && v->BlendingAndTiming[k] == k && (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)
4185 && (v->OutputBppPerState[i][k] == 0
4186 || (v->OutputFormat[k] == dm_420 && v->Interlace[k] == true && v->ProgressiveToInterlaceUnitInOPP == true))) {
4187 v->DIOSupport[i] = false;
4188 }
4189 }
4190 }
4191
4192 for (i = start_state; i < v->soc.num_states; ++i) {
4193 v->ODMCombine4To1SupportCheckOK[i] = true;
4194 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4195 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4196 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)) {
4197 v->ODMCombine4To1SupportCheckOK[i] = false;
4198 }
4199 }
4200 }
4201
4202 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4203
4204 for (i = start_state; i < v->soc.num_states; i++) {
4205 v->NotEnoughDSCUnits[i] = false;
4206 v->TotalDSCUnitsRequired = 0.0;
4207 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4208 if (v->RequiresDSC[i][k] == true) {
4209 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4210 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4211 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4212 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4213 } else {
4214 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4215 }
4216 }
4217 }
4218 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4219 v->NotEnoughDSCUnits[i] = true;
4220 }
4221 }
4222 /*DSC Delay per state*/
4223
4224 for (i = start_state; i < v->soc.num_states; i++) {
4225 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4226 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4227 v->BPP = 0.0;
4228 } else {
4229 v->BPP = v->OutputBppPerState[i][k];
4230 }
4231 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4232 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4233 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4234 v->DSCInputBitPerComponent[k],
4235 v->BPP,
4236 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4237 v->NumberOfDSCSlices[k],
4238 v->OutputFormat[k],
4239 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4240 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4241 v->DSCDelayPerState[i][k] = 2.0
4242 * dscceComputeDelay(
4243 v->DSCInputBitPerComponent[k],
4244 v->BPP,
4245 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4246 v->NumberOfDSCSlices[k] / 2,
4247 v->OutputFormat[k],
4248 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4249 } else {
4250 v->DSCDelayPerState[i][k] = 4.0
4251 * (dscceComputeDelay(
4252 v->DSCInputBitPerComponent[k],
4253 v->BPP,
4254 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4255 v->NumberOfDSCSlices[k] / 4,
4256 v->OutputFormat[k],
4257 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4258 }
4259 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4260 } else {
4261 v->DSCDelayPerState[i][k] = 0.0;
4262 }
4263 }
4264 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4265 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4266 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4267 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4268 }
4269 }
4270 }
4271 }
4272
4273 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4274 //
4275 for (i = start_state; i < mode_lib->soc.num_states; ++i) {
4276 for (j = 0; j <= 1; ++j) {
4277 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4278 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4279 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4280 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4281 }
4282
4283 CalculateSwathAndDETConfiguration(
4284 false,
4285 v->NumberOfActivePlanes,
4286 v->DETBufferSizeInKByte[0],
4287 v->MaximumSwathWidthLuma,
4288 v->MaximumSwathWidthChroma,
4289 v->SourceScan,
4290 v->SourcePixelFormat,
4291 v->SurfaceTiling,
4292 v->ViewportWidth,
4293 v->ViewportHeight,
4294 v->SurfaceWidthY,
4295 v->SurfaceWidthC,
4296 v->SurfaceHeightY,
4297 v->SurfaceHeightC,
4298 v->Read256BlockHeightY,
4299 v->Read256BlockHeightC,
4300 v->Read256BlockWidthY,
4301 v->Read256BlockWidthC,
4302 v->ODMCombineEnableThisState,
4303 v->BlendingAndTiming,
4304 v->BytePerPixelY,
4305 v->BytePerPixelC,
4306 v->BytePerPixelInDETY,
4307 v->BytePerPixelInDETC,
4308 v->HActive,
4309 v->HRatio,
4310 v->HRatioChroma,
4311 v->NoOfDPPThisState,
4312 v->swath_width_luma_ub_this_state,
4313 v->swath_width_chroma_ub_this_state,
4314 v->SwathWidthYThisState,
4315 v->SwathWidthCThisState,
4316 v->SwathHeightYThisState,
4317 v->SwathHeightCThisState,
4318 v->DETBufferSizeYThisState,
4319 v->DETBufferSizeCThisState,
4320 v->dummystring,
4321 &v->ViewportSizeSupport[i][j]);
4322
4323 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4324 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4325 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4326 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4327 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4328 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4329 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4330 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4331 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4332 }
4333
4334 }
4335 }
4336 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4337 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4338 }
4339
4340 for (i = start_state; i < v->soc.num_states; i++) {
4341 for (j = 0; j < 2; j++) {
4342 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4343 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4344 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4345 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4346 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4347 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4348 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4349 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4350 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4351 }
4352
4353 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4354 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4355 if (v->DCCEnable[k] == true) {
4356 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4357 }
4358 }
4359
4360 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4361 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
4362 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4363
4364 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
4365 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
4366 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4367 } else {
4368 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4369 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4370 }
4371
4372 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4373 mode_lib,
4374 v->DCCEnable[k],
4375 v->Read256BlockHeightC[k],
4376 v->Read256BlockWidthY[k],
4377 v->SourcePixelFormat[k],
4378 v->SurfaceTiling[k],
4379 v->BytePerPixelC[k],
4380 v->SourceScan[k],
4381 v->SwathWidthCThisState[k],
4382 v->ViewportHeightChroma[k],
4383 v->GPUVMEnable,
4384 v->HostVMEnable,
4385 v->HostVMMaxNonCachedPageTableLevels,
4386 v->GPUVMMinPageSize,
4387 v->HostVMMinPageSize,
4388 v->PTEBufferSizeInRequestsForChroma,
4389 v->PitchC[k],
4390 0.0,
4391 &v->MacroTileWidthC[k],
4392 &v->MetaRowBytesC,
4393 &v->DPTEBytesPerRowC,
4394 &v->PTEBufferSizeNotExceededC[i][j][k],
4395 &v->dummyinteger7,
4396 &v->dpte_row_height_chroma[k],
4397 &v->dummyinteger28,
4398 &v->dummyinteger26,
4399 &v->dummyinteger23,
4400 &v->meta_row_height_chroma[k],
4401 &v->dummyinteger8,
4402 &v->dummyinteger9,
4403 &v->dummyinteger19,
4404 &v->dummyinteger20,
4405 &v->dummyinteger17,
4406 &v->dummyinteger10,
4407 &v->dummyinteger11);
4408
4409 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4410 mode_lib,
4411 v->VRatioChroma[k],
4412 v->VTAPsChroma[k],
4413 v->Interlace[k],
4414 v->ProgressiveToInterlaceUnitInOPP,
4415 v->SwathHeightCThisState[k],
4416 v->ViewportYStartC[k],
4417 &v->PrefillC[k],
4418 &v->MaxNumSwC[k]);
4419 } else {
4420 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4421 v->PTEBufferSizeInRequestsForChroma = 0;
4422 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4423 v->MetaRowBytesC = 0.0;
4424 v->DPTEBytesPerRowC = 0.0;
4425 v->PrefetchLinesC[i][j][k] = 0.0;
4426 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4427 }
4428 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4429 mode_lib,
4430 v->DCCEnable[k],
4431 v->Read256BlockHeightY[k],
4432 v->Read256BlockWidthY[k],
4433 v->SourcePixelFormat[k],
4434 v->SurfaceTiling[k],
4435 v->BytePerPixelY[k],
4436 v->SourceScan[k],
4437 v->SwathWidthYThisState[k],
4438 v->ViewportHeight[k],
4439 v->GPUVMEnable,
4440 v->HostVMEnable,
4441 v->HostVMMaxNonCachedPageTableLevels,
4442 v->GPUVMMinPageSize,
4443 v->HostVMMinPageSize,
4444 v->PTEBufferSizeInRequestsForLuma,
4445 v->PitchY[k],
4446 v->DCCMetaPitchY[k],
4447 &v->MacroTileWidthY[k],
4448 &v->MetaRowBytesY,
4449 &v->DPTEBytesPerRowY,
4450 &v->PTEBufferSizeNotExceededY[i][j][k],
4451 v->dummyinteger4,
4452 &v->dpte_row_height[k],
4453 &v->dummyinteger29,
4454 &v->dummyinteger27,
4455 &v->dummyinteger24,
4456 &v->meta_row_height[k],
4457 &v->dummyinteger25,
4458 &v->dpte_group_bytes[k],
4459 &v->dummyinteger21,
4460 &v->dummyinteger22,
4461 &v->dummyinteger18,
4462 &v->dummyinteger5,
4463 &v->dummyinteger6);
4464 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4465 mode_lib,
4466 v->VRatio[k],
4467 v->vtaps[k],
4468 v->Interlace[k],
4469 v->ProgressiveToInterlaceUnitInOPP,
4470 v->SwathHeightYThisState[k],
4471 v->ViewportYStartY[k],
4472 &v->PrefillY[k],
4473 &v->MaxNumSwY[k]);
4474 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4475 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4476 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4477
4478 CalculateRowBandwidth(
4479 v->GPUVMEnable,
4480 v->SourcePixelFormat[k],
4481 v->VRatio[k],
4482 v->VRatioChroma[k],
4483 v->DCCEnable[k],
4484 v->HTotal[k] / v->PixelClock[k],
4485 v->MetaRowBytesY,
4486 v->MetaRowBytesC,
4487 v->meta_row_height[k],
4488 v->meta_row_height_chroma[k],
4489 v->DPTEBytesPerRowY,
4490 v->DPTEBytesPerRowC,
4491 v->dpte_row_height[k],
4492 v->dpte_row_height_chroma[k],
4493 &v->meta_row_bandwidth[i][j][k],
4494 &v->dpte_row_bandwidth[i][j][k]);
4495 }
4496 v->UrgLatency[i] = CalculateUrgentLatency(
4497 v->UrgentLatencyPixelDataOnly,
4498 v->UrgentLatencyPixelMixedWithVMData,
4499 v->UrgentLatencyVMDataOnly,
4500 v->DoUrgentLatencyAdjustment,
4501 v->UrgentLatencyAdjustmentFabricClockComponent,
4502 v->UrgentLatencyAdjustmentFabricClockReference,
4503 v->FabricClockPerState[i]);
4504
4505 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4506 CalculateUrgentBurstFactor(
4507 v->swath_width_luma_ub_this_state[k],
4508 v->swath_width_chroma_ub_this_state[k],
4509 v->DETBufferSizeInKByte[0],
4510 v->SwathHeightYThisState[k],
4511 v->SwathHeightCThisState[k],
4512 v->HTotal[k] / v->PixelClock[k],
4513 v->UrgLatency[i],
4514 v->CursorBufferSize,
4515 v->CursorWidth[k][0],
4516 v->CursorBPP[k][0],
4517 v->VRatio[k],
4518 v->VRatioChroma[k],
4519 v->BytePerPixelInDETY[k],
4520 v->BytePerPixelInDETC[k],
4521 v->DETBufferSizeYThisState[k],
4522 v->DETBufferSizeCThisState[k],
4523 &v->UrgentBurstFactorCursor[k],
4524 &v->UrgentBurstFactorLuma[k],
4525 &v->UrgentBurstFactorChroma[k],
4526 &NotUrgentLatencyHiding[k]);
4527 }
4528
4529 v->NotUrgentLatencyHiding[i][j] = false;
4530 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4531 if (NotUrgentLatencyHiding[k]) {
4532 v->NotUrgentLatencyHiding[i][j] = true;
4533 }
4534 }
4535
4536 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4537 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
4538 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
4539 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
4540 }
4541
4542 v->TotalVActivePixelBandwidth[i][j] = 0;
4543 v->TotalVActiveCursorBandwidth[i][j] = 0;
4544 v->TotalMetaRowBandwidth[i][j] = 0;
4545 v->TotalDPTERowBandwidth[i][j] = 0;
4546 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4547 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
4548 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
4549 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
4550 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
4551 }
4552
4553 CalculateDCFCLKDeepSleep(
4554 mode_lib,
4555 v->NumberOfActivePlanes,
4556 v->BytePerPixelY,
4557 v->BytePerPixelC,
4558 v->VRatio,
4559 v->VRatioChroma,
4560 v->SwathWidthYThisState,
4561 v->SwathWidthCThisState,
4562 v->NoOfDPPThisState,
4563 v->HRatio,
4564 v->HRatioChroma,
4565 v->PixelClock,
4566 v->PSCL_FACTOR,
4567 v->PSCL_FACTOR_CHROMA,
4568 v->RequiredDPPCLKThisState,
4569 v->ReadBandwidthLuma,
4570 v->ReadBandwidthChroma,
4571 v->ReturnBusWidth,
4572 &v->ProjectedDCFCLKDeepSleep[i][j]);
4573 }
4574 }
4575
4576 //Calculate Return BW
4577
4578 for (i = start_state; i < mode_lib->soc.num_states; ++i) {
4579 for (j = 0; j <= 1; ++j) {
4580 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4581 if (v->BlendingAndTiming[k] == k) {
4582 if (v->WritebackEnable[k] == true) {
4583 v->WritebackDelayTime[k] = v->WritebackLatency
4584 + CalculateWriteBackDelay(
4585 v->WritebackPixelFormat[k],
4586 v->WritebackHRatio[k],
4587 v->WritebackVRatio[k],
4588 v->WritebackVTaps[k],
4589 v->WritebackDestinationWidth[k],
4590 v->WritebackDestinationHeight[k],
4591 v->WritebackSourceHeight[k],
4592 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
4593 } else {
4594 v->WritebackDelayTime[k] = 0.0;
4595 }
4596 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4597 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
4598 v->WritebackDelayTime[k] = dml_max(
4599 v->WritebackDelayTime[k],
4600 v->WritebackLatency
4601 + CalculateWriteBackDelay(
4602 v->WritebackPixelFormat[m],
4603 v->WritebackHRatio[m],
4604 v->WritebackVRatio[m],
4605 v->WritebackVTaps[m],
4606 v->WritebackDestinationWidth[m],
4607 v->WritebackDestinationHeight[m],
4608 v->WritebackSourceHeight[m],
4609 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
4610 }
4611 }
4612 }
4613 }
4614 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4615 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4616 if (v->BlendingAndTiming[k] == m) {
4617 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
4618 }
4619 }
4620 }
4621 v->MaxMaxVStartup[i][j] = 0;
4622 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4623 v->MaximumVStartup[i][j][k] = v->VTotal[k] - v->VActive[k]
4624 - dml_max(1.0, dml_ceil(1.0 * v->WritebackDelayTime[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0));
4625 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
4626 }
4627 }
4628 }
4629
4630 ReorderingBytes = v->NumberOfChannels
4631 * dml_max3(
4632 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
4633 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
4634 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
4635 v->FinalDRAMClockChangeLatency = (v->DRAMClockChangeLatencyOverride > 0 ? v->DRAMClockChangeLatencyOverride : v->DRAMClockChangeLatency);
4636
4637 for (i = start_state; i < mode_lib->soc.num_states; ++i) {
4638 for (j = 0; j <= 1; ++j) {
4639 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
4640 }
4641 }
4642
4643 if (v->UseMinimumRequiredDCFCLK == true) {
4644 UseMinimumDCFCLK(mode_lib, v, MaxPrefetchMode, ReorderingBytes);
4645
4646 if (v->ClampMinDCFCLK) {
4647 /* Clamp calculated values to actual minimum */
4648 for (i = start_state; i < mode_lib->soc.num_states; ++i) {
4649 for (j = 0; j <= 1; ++j) {
4650 if (v->DCFCLKState[i][j] < mode_lib->soc.min_dcfclk) {
4651 v->DCFCLKState[i][j] = mode_lib->soc.min_dcfclk;
4652 }
4653 }
4654 }
4655 }
4656 }
4657
4658 for (i = start_state; i < mode_lib->soc.num_states; ++i) {
4659 for (j = 0; j <= 1; ++j) {
4660 v->IdealSDPPortBandwidthPerState[i][j] = dml_min3(
4661 v->ReturnBusWidth * v->DCFCLKState[i][j],
4662 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth,
4663 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
4664 if (v->HostVMEnable != true) {
4665 v->ReturnBWPerState[i][j] = v->IdealSDPPortBandwidthPerState[i][j] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly
4666 / 100;
4667 } else {
4668 v->ReturnBWPerState[i][j] = v->IdealSDPPortBandwidthPerState[i][j]
4669 * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100;
4670 }
4671 }
4672 }
4673
4674 //Re-ordering Buffer Support Check
4675
4676 for (i = start_state; i < mode_lib->soc.num_states; ++i) {
4677 for (j = 0; j <= 1; ++j) {
4678 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
4679 > (v->RoundTripPingLatencyCycles + 32) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
4680 v->ROBSupport[i][j] = true;
4681 } else {
4682 v->ROBSupport[i][j] = false;
4683 }
4684 }
4685 }
4686
4687 //Vertical Active BW support check
4688
4689 MaxTotalVActiveRDBandwidth = 0;
4690 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4691 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4692 }
4693
4694 for (i = start_state; i < mode_lib->soc.num_states; ++i) {
4695 for (j = 0; j <= 1; ++j) {
4696 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
4697 v->IdealSDPPortBandwidthPerState[i][j] * v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
4698 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation
4699 / 100);
4700 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
4701 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
4702 } else {
4703 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
4704 }
4705 }
4706 }
4707
4708 //Prefetch Check
4709
4710 for (i = start_state; i < mode_lib->soc.num_states; ++i) {
4711 for (j = 0; j <= 1; ++j) {
4712 int NextPrefetchModeState = MinPrefetchMode;
4713
4714 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
4715
4716 v->BandwidthWithoutPrefetchSupported[i][j] = true;
4717 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j] + v->TotalDPTERowBandwidth[i][j]
4718 > v->ReturnBWPerState[i][j] || v->NotUrgentLatencyHiding[i][j]) {
4719 v->BandwidthWithoutPrefetchSupported[i][j] = false;
4720 }
4721
4722 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4723 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4724 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4725 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4726 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4727 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4728 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4729 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4730 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4731 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4732 v->ODMCombineEnabled[k] = v->ODMCombineEnablePerState[i][k];
4733 }
4734
4735 v->ExtraLatency = CalculateExtraLatency(
4736 v->RoundTripPingLatencyCycles,
4737 ReorderingBytes,
4738 v->DCFCLKState[i][j],
4739 v->TotalNumberOfActiveDPP[i][j],
4740 v->PixelChunkSizeInKByte,
4741 v->TotalNumberOfDCCActiveDPP[i][j],
4742 v->MetaChunkSize,
4743 v->ReturnBWPerState[i][j],
4744 v->GPUVMEnable,
4745 v->HostVMEnable,
4746 v->NumberOfActivePlanes,
4747 v->NoOfDPPThisState,
4748 v->dpte_group_bytes,
4749 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4750 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4751 v->HostVMMinPageSize,
4752 v->HostVMMaxNonCachedPageTableLevels);
4753
4754 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
4755 do {
4756 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
4757 v->MaxVStartup = v->NextMaxVStartup;
4758
4759 v->TWait = CalculateTWait(v->PrefetchModePerState[i][j], v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
4760
4761 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4762 Pipe myPipe = { 0 };
4763
4764 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
4765 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
4766 myPipe.PixelClock = v->PixelClock[k];
4767 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
4768 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
4769 myPipe.ScalerEnabled = v->ScalerEnabled[k];
4770 myPipe.SourceScan = v->SourceScan[k];
4771 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
4772 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
4773 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
4774 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
4775 myPipe.InterlaceEnable = v->Interlace[k];
4776 myPipe.NumberOfCursors = v->NumberOfCursors[k];
4777 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
4778 myPipe.HTotal = v->HTotal[k];
4779 myPipe.DCCEnable = v->DCCEnable[k];
4780 myPipe.ODMCombineEnabled = !!v->ODMCombineEnabled[k];
4781
4782 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
4783 mode_lib,
4784 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4785 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4786 &myPipe,
4787 v->DSCDelayPerState[i][k],
4788 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
4789 v->DPPCLKDelaySCL,
4790 v->DPPCLKDelaySCLLBOnly,
4791 v->DPPCLKDelayCNVCCursor,
4792 v->DISPCLKDelaySubtotal,
4793 v->SwathWidthYThisState[k] / v->HRatio[k],
4794 v->OutputFormat[k],
4795 v->MaxInterDCNTileRepeaters,
4796 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
4797 v->MaximumVStartup[i][j][k],
4798 v->GPUVMMaxPageTableLevels,
4799 v->GPUVMEnable,
4800 v->HostVMEnable,
4801 v->HostVMMaxNonCachedPageTableLevels,
4802 v->HostVMMinPageSize,
4803 v->DynamicMetadataEnable[k],
4804 v->DynamicMetadataVMEnabled,
4805 v->DynamicMetadataLinesBeforeActiveRequired[k],
4806 v->DynamicMetadataTransmittedBytes[k],
4807 v->UrgLatency[i],
4808 v->ExtraLatency,
4809 v->TimeCalc,
4810 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
4811 v->MetaRowBytes[i][j][k],
4812 v->DPTEBytesPerRow[i][j][k],
4813 v->PrefetchLinesY[i][j][k],
4814 v->SwathWidthYThisState[k],
4815 v->BytePerPixelY[k],
4816 v->PrefillY[k],
4817 v->MaxNumSwY[k],
4818 v->PrefetchLinesC[i][j][k],
4819 v->SwathWidthCThisState[k],
4820 v->BytePerPixelC[k],
4821 v->PrefillC[k],
4822 v->MaxNumSwC[k],
4823 v->swath_width_luma_ub_this_state[k],
4824 v->swath_width_chroma_ub_this_state[k],
4825 v->SwathHeightYThisState[k],
4826 v->SwathHeightCThisState[k],
4827 v->TWait,
4828 v->ProgressiveToInterlaceUnitInOPP,
4829 &v->DSTXAfterScaler[k],
4830 &v->DSTYAfterScaler[k],
4831 &v->LineTimesForPrefetch[k],
4832 &v->PrefetchBW[k],
4833 &v->LinesForMetaPTE[k],
4834 &v->LinesForMetaAndDPTERow[k],
4835 &v->VRatioPreY[i][j][k],
4836 &v->VRatioPreC[i][j][k],
4837 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
4838 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
4839 &v->NoTimeForDynamicMetadata[i][j][k],
4840 &v->Tno_bw[k],
4841 &v->prefetch_vmrow_bw[k],
4842 &v->Tdmdl_vm[k],
4843 &v->Tdmdl[k],
4844 &v->VUpdateOffsetPix[k],
4845 &v->VUpdateWidthPix[k],
4846 &v->VReadyOffsetPix[k]);
4847 }
4848
4849 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4850 CalculateUrgentBurstFactor(
4851 v->swath_width_luma_ub_this_state[k],
4852 v->swath_width_chroma_ub_this_state[k],
4853 v->DETBufferSizeInKByte[0],
4854 v->SwathHeightYThisState[k],
4855 v->SwathHeightCThisState[k],
4856 v->HTotal[k] / v->PixelClock[k],
4857 v->UrgLatency[i],
4858 v->CursorBufferSize,
4859 v->CursorWidth[k][0],
4860 v->CursorBPP[k][0],
4861 v->VRatioPreY[i][j][k],
4862 v->VRatioPreC[i][j][k],
4863 v->BytePerPixelInDETY[k],
4864 v->BytePerPixelInDETC[k],
4865 v->DETBufferSizeYThisState[k],
4866 v->DETBufferSizeCThisState[k],
4867 &v->UrgentBurstFactorCursorPre[k],
4868 &v->UrgentBurstFactorLumaPre[k],
4869 &v->UrgentBurstFactorChromaPre[k],
4870 &v->NoUrgentLatencyHidingPre[k]);
4871 }
4872
4873 v->MaximumReadBandwidthWithPrefetch = 0.0;
4874 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4875 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k])
4876 * v->VRatioPreY[i][j][k];
4877
4878 v->MaximumReadBandwidthWithPrefetch = v->MaximumReadBandwidthWithPrefetch
4879 + dml_max4(
4880 v->VActivePixelBandwidth[i][j][k],
4881 v->VActiveCursorBandwidth[i][j][k]
4882 + v->NoOfDPP[i][j][k] * (v->meta_row_bandwidth[i][j][k] + v->dpte_row_bandwidth[i][j][k]),
4883 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
4884 v->NoOfDPP[i][j][k]
4885 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] * v->UrgentBurstFactorLumaPre[k]
4886 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
4887 * v->UrgentBurstFactorChromaPre[k])
4888 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
4889 }
4890
4891 v->NotEnoughUrgentLatencyHidingPre = false;
4892 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4893 if (v->NoUrgentLatencyHidingPre[k] == true) {
4894 v->NotEnoughUrgentLatencyHidingPre = true;
4895 }
4896 }
4897
4898 v->PrefetchSupported[i][j] = true;
4899 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
4900 || v->NotEnoughUrgentLatencyHidingPre == 1) {
4901 v->PrefetchSupported[i][j] = false;
4902 }
4903 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4904 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
4905 || v->NoTimeForPrefetch[i][j][k] == true) {
4906 v->PrefetchSupported[i][j] = false;
4907 }
4908 }
4909
4910 v->DynamicMetadataSupported[i][j] = true;
4911 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4912 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
4913 v->DynamicMetadataSupported[i][j] = false;
4914 }
4915 }
4916
4917 v->VRatioInPrefetchSupported[i][j] = true;
4918 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4919 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
4920 v->VRatioInPrefetchSupported[i][j] = false;
4921 }
4922 }
4923 v->AnyLinesForVMOrRowTooLarge = false;
4924 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4925 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
4926 v->AnyLinesForVMOrRowTooLarge = true;
4927 }
4928 }
4929
4930 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
4931 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
4932 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4933 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
4934 - dml_max(
4935 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
4936 v->NoOfDPP[i][j][k]
4937 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] * v->UrgentBurstFactorLumaPre[k]
4938 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
4939 * v->UrgentBurstFactorChromaPre[k])
4940 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
4941 }
4942 v->TotImmediateFlipBytes = 0.0;
4943 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4944 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes + v->NoOfDPP[i][j][k] * (v->PDEAndMetaPTEBytesPerFrame[i][j][k]
4945 + v->MetaRowBytes[i][j][k] + v->DPTEBytesPerRow[i][j][k]);
4946 }
4947
4948 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4949 CalculateFlipSchedule(
4950 mode_lib,
4951 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4952 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4953 v->ExtraLatency,
4954 v->UrgLatency[i],
4955 v->GPUVMMaxPageTableLevels,
4956 v->HostVMEnable,
4957 v->HostVMMaxNonCachedPageTableLevels,
4958 v->GPUVMEnable,
4959 v->HostVMMinPageSize,
4960 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
4961 v->MetaRowBytes[i][j][k],
4962 v->DPTEBytesPerRow[i][j][k],
4963 v->BandwidthAvailableForImmediateFlip,
4964 v->TotImmediateFlipBytes,
4965 v->SourcePixelFormat[k],
4966 v->HTotal[k] / v->PixelClock[k],
4967 v->VRatio[k],
4968 v->VRatioChroma[k],
4969 v->Tno_bw[k],
4970 v->DCCEnable[k],
4971 v->dpte_row_height[k],
4972 v->meta_row_height[k],
4973 v->dpte_row_height_chroma[k],
4974 v->meta_row_height_chroma[k],
4975 &v->DestinationLinesToRequestVMInImmediateFlip[k],
4976 &v->DestinationLinesToRequestRowInImmediateFlip[k],
4977 &v->final_flip_bw[k],
4978 &v->ImmediateFlipSupportedForPipe[k]);
4979 }
4980 v->total_dcn_read_bw_with_flip = 0.0;
4981 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4982 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
4983 + dml_max3(
4984 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
4985 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
4986 + v->VActiveCursorBandwidth[i][j][k],
4987 v->NoOfDPP[i][j][k]
4988 * (v->final_flip_bw[k]
4989 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
4990 * v->UrgentBurstFactorLumaPre[k]
4991 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
4992 * v->UrgentBurstFactorChromaPre[k])
4993 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
4994 }
4995 v->ImmediateFlipSupportedForState[i][j] = true;
4996 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
4997 v->ImmediateFlipSupportedForState[i][j] = false;
4998 }
4999 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5000 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5001 v->ImmediateFlipSupportedForState[i][j] = false;
5002 }
5003 }
5004 } else {
5005 v->ImmediateFlipSupportedForState[i][j] = false;
5006 }
5007 if (v->MaxVStartup <= 13 || v->AnyLinesForVMOrRowTooLarge == false) {
5008 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5009 NextPrefetchModeState = NextPrefetchModeState + 1;
5010 } else {
5011 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5012 }
5013 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5014 && ((v->HostVMEnable == false && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5015 || v->ImmediateFlipSupportedForState[i][j] == true))
5016 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5017
5018 CalculateWatermarksAndDRAMSpeedChangeSupport(
5019 mode_lib,
5020 v->PrefetchModePerState[i][j],
5021 v->NumberOfActivePlanes,
5022 v->MaxLineBufferLines,
5023 v->LineBufferSize,
5024 v->DPPOutputBufferPixels,
5025 v->DETBufferSizeInKByte[0],
5026 v->WritebackInterfaceBufferSize,
5027 v->DCFCLKState[i][j],
5028 v->ReturnBWPerState[i][j],
5029 v->GPUVMEnable,
5030 v->dpte_group_bytes,
5031 v->MetaChunkSize,
5032 v->UrgLatency[i],
5033 v->ExtraLatency,
5034 v->WritebackLatency,
5035 v->WritebackChunkSize,
5036 v->SOCCLKPerState[i],
5037 v->FinalDRAMClockChangeLatency,
5038 v->SRExitTime,
5039 v->SREnterPlusExitTime,
5040 v->ProjectedDCFCLKDeepSleep[i][j],
5041 v->NoOfDPPThisState,
5042 v->DCCEnable,
5043 v->RequiredDPPCLKThisState,
5044 v->DETBufferSizeYThisState,
5045 v->DETBufferSizeCThisState,
5046 v->SwathHeightYThisState,
5047 v->SwathHeightCThisState,
5048 v->LBBitPerPixel,
5049 v->SwathWidthYThisState,
5050 v->SwathWidthCThisState,
5051 v->HRatio,
5052 v->HRatioChroma,
5053 v->vtaps,
5054 v->VTAPsChroma,
5055 v->VRatio,
5056 v->VRatioChroma,
5057 v->HTotal,
5058 v->PixelClock,
5059 v->BlendingAndTiming,
5060 v->BytePerPixelInDETY,
5061 v->BytePerPixelInDETC,
5062 v->DSTXAfterScaler,
5063 v->DSTYAfterScaler,
5064 v->WritebackEnable,
5065 v->WritebackPixelFormat,
5066 v->WritebackDestinationWidth,
5067 v->WritebackDestinationHeight,
5068 v->WritebackSourceHeight,
5069 &v->DRAMClockChangeSupport[i][j],
5070 &v->UrgentWatermark,
5071 &v->WritebackUrgentWatermark,
5072 &v->DRAMClockChangeWatermark,
5073 &v->WritebackDRAMClockChangeWatermark,
5074 &v->StutterExitWatermark,
5075 &v->StutterEnterPlusExitWatermark,
5076 &v->MinActiveDRAMClockChangeLatencySupported);
5077 }
5078 }
5079
5080 /*PTE Buffer Size Check*/
5081
5082 for (i = start_state; i < v->soc.num_states; i++) {
5083 for (j = 0; j < 2; j++) {
5084 v->PTEBufferSizeNotExceeded[i][j] = true;
5085 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5086 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5087 v->PTEBufferSizeNotExceeded[i][j] = false;
5088 }
5089 }
5090 }
5091 }
5092 /*Cursor Support Check*/
5093
5094 v->CursorSupport = true;
5095 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5096 if (v->CursorWidth[k][0] > 0.0) {
5097 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5098 v->CursorSupport = false;
5099 }
5100 }
5101 }
5102 /*Valid Pitch Check*/
5103
5104 v->PitchSupport = true;
5105 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5106 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5107 if (v->DCCEnable[k] == true) {
5108 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5109 } else {
5110 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5111 }
5112 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
5113 && v->SourcePixelFormat[k] != dm_rgbe && v->SourcePixelFormat[k] != dm_mono_8) {
5114 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5115 if (v->DCCEnable[k] == true) {
5116 v->AlignedDCCMetaPitchC[k] = dml_ceil(dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]), 64.0 * v->Read256BlockWidthC[k]);
5117 } else {
5118 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5119 }
5120 } else {
5121 v->AlignedCPitch[k] = v->PitchC[k];
5122 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5123 }
5124 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k] || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k]
5125 || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5126 v->PitchSupport = false;
5127 }
5128 }
5129
5130 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5131 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k])
5132 ViewportExceedsSurface = true;
5133
5134 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5135 && v->SourcePixelFormat[k] != dm_444_8 && v->SourcePixelFormat[k] != dm_rgbe) {
5136 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k] || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5137 ViewportExceedsSurface = true;
5138 }
5139 }
5140 }
5141 /*Mode Support, Voltage State and SOC Configuration*/
5142
5143 for (i = v->soc.num_states - 1; i >= start_state; i--) {
5144 for (j = 0; j < 2; j++) {
5145 if (v->ScaleRatioAndTapsSupport == 1 && v->SourceFormatPixelAndScanSupport == 1 && v->ViewportSizeSupport[i][j] == 1
5146 && v->DIOSupport[i] == 1 && v->ODMCombine4To1SupportCheckOK[i] == 1
5147 && v->NotEnoughDSCUnits[i] == 0
5148 && v->DTBCLKRequiredMoreThanSupported[i] == 0
5149 && v->ROBSupport[i][j] == 1 && v->DISPCLK_DPPCLK_Support[i][j] == 1 && v->TotalAvailablePipesSupport[i][j] == 1
5150 && EnoughWritebackUnits == 1 && WritebackModeSupport == 1
5151 && v->WritebackLatencySupport == 1 && v->WritebackScaleRatioAndTapsSupport == 1 && v->CursorSupport == 1 && v->PitchSupport == 1
5152 && ViewportExceedsSurface == 0 && v->PrefetchSupported[i][j] == 1 && v->DynamicMetadataSupported[i][j] == 1
5153 && v->TotalVerticalActiveBandwidthSupport[i][j] == 1 && v->VRatioInPrefetchSupported[i][j] == 1
5154 && v->PTEBufferSizeNotExceeded[i][j] == 1 && v->NonsupportedDSCInputBPC == 0
5155 && ((v->HostVMEnable == 0 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5156 || v->ImmediateFlipSupportedForState[i][j] == true)) {
5157 v->ModeSupport[i][j] = true;
5158 } else {
5159 v->ModeSupport[i][j] = false;
5160 }
5161 }
5162 }
5163 {
5164 unsigned int MaximumMPCCombine = 0;
5165 for (i = v->soc.num_states; i >= start_state; i--) {
5166 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5167 v->VoltageLevel = i;
5168 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5169 if (v->ModeSupport[i][1] == true) {
5170 MaximumMPCCombine = 1;
5171 } else {
5172 MaximumMPCCombine = 0;
5173 }
5174 }
5175 }
5176 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5177 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5178 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5179 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5180 }
5181 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5182 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5183 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5184 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5185 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5186 v->maxMpcComb = MaximumMPCCombine;
5187 }
5188 }
5189
CalculateWatermarksAndDRAMSpeedChangeSupport(struct display_mode_lib * mode_lib,unsigned int PrefetchMode,unsigned int NumberOfActivePlanes,unsigned int MaxLineBufferLines,unsigned int LineBufferSize,unsigned int DPPOutputBufferPixels,unsigned int DETBufferSizeInKByte,unsigned int WritebackInterfaceBufferSize,double DCFCLK,double ReturnBW,bool GPUVMEnable,unsigned int dpte_group_bytes[],unsigned int MetaChunkSize,double UrgentLatency,double ExtraLatency,double WritebackLatency,double WritebackChunkSize,double SOCCLK,double DRAMClockChangeLatency,double SRExitTime,double SREnterPlusExitTime,double DCFCLKDeepSleep,unsigned int DPPPerPlane[],bool DCCEnable[],double DPPCLK[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],unsigned int LBBitPerPixel[],double SwathWidthY[],double SwathWidthC[],double HRatio[],double HRatioChroma[],unsigned int vtaps[],unsigned int VTAPsChroma[],double VRatio[],double VRatioChroma[],unsigned int HTotal[],double PixelClock[],unsigned int BlendingAndTiming[],double BytePerPixelDETY[],double BytePerPixelDETC[],double DSTXAfterScaler[],double DSTYAfterScaler[],bool WritebackEnable[],enum source_format_class WritebackPixelFormat[],double WritebackDestinationWidth[],double WritebackDestinationHeight[],double WritebackSourceHeight[],enum clock_change_support * DRAMClockChangeSupport,double * UrgentWatermark,double * WritebackUrgentWatermark,double * DRAMClockChangeWatermark,double * WritebackDRAMClockChangeWatermark,double * StutterExitWatermark,double * StutterEnterPlusExitWatermark,double * MinActiveDRAMClockChangeLatencySupported)5190 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5191 struct display_mode_lib *mode_lib,
5192 unsigned int PrefetchMode,
5193 unsigned int NumberOfActivePlanes,
5194 unsigned int MaxLineBufferLines,
5195 unsigned int LineBufferSize,
5196 unsigned int DPPOutputBufferPixels,
5197 unsigned int DETBufferSizeInKByte,
5198 unsigned int WritebackInterfaceBufferSize,
5199 double DCFCLK,
5200 double ReturnBW,
5201 bool GPUVMEnable,
5202 unsigned int dpte_group_bytes[],
5203 unsigned int MetaChunkSize,
5204 double UrgentLatency,
5205 double ExtraLatency,
5206 double WritebackLatency,
5207 double WritebackChunkSize,
5208 double SOCCLK,
5209 double DRAMClockChangeLatency,
5210 double SRExitTime,
5211 double SREnterPlusExitTime,
5212 double DCFCLKDeepSleep,
5213 unsigned int DPPPerPlane[],
5214 bool DCCEnable[],
5215 double DPPCLK[],
5216 unsigned int DETBufferSizeY[],
5217 unsigned int DETBufferSizeC[],
5218 unsigned int SwathHeightY[],
5219 unsigned int SwathHeightC[],
5220 unsigned int LBBitPerPixel[],
5221 double SwathWidthY[],
5222 double SwathWidthC[],
5223 double HRatio[],
5224 double HRatioChroma[],
5225 unsigned int vtaps[],
5226 unsigned int VTAPsChroma[],
5227 double VRatio[],
5228 double VRatioChroma[],
5229 unsigned int HTotal[],
5230 double PixelClock[],
5231 unsigned int BlendingAndTiming[],
5232 double BytePerPixelDETY[],
5233 double BytePerPixelDETC[],
5234 double DSTXAfterScaler[],
5235 double DSTYAfterScaler[],
5236 bool WritebackEnable[],
5237 enum source_format_class WritebackPixelFormat[],
5238 double WritebackDestinationWidth[],
5239 double WritebackDestinationHeight[],
5240 double WritebackSourceHeight[],
5241 enum clock_change_support *DRAMClockChangeSupport,
5242 double *UrgentWatermark,
5243 double *WritebackUrgentWatermark,
5244 double *DRAMClockChangeWatermark,
5245 double *WritebackDRAMClockChangeWatermark,
5246 double *StutterExitWatermark,
5247 double *StutterEnterPlusExitWatermark,
5248 double *MinActiveDRAMClockChangeLatencySupported)
5249 {
5250 double EffectiveLBLatencyHidingY = 0;
5251 double EffectiveLBLatencyHidingC = 0;
5252 double LinesInDETY[DC__NUM_DPP__MAX] = { 0 };
5253 double LinesInDETC = 0;
5254 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX] = { 0 };
5255 unsigned int LinesInDETCRoundedDownToSwath = 0;
5256 double FullDETBufferingTimeY[DC__NUM_DPP__MAX] = { 0 };
5257 double FullDETBufferingTimeC = 0;
5258 double ActiveDRAMClockChangeLatencyMarginY = 0;
5259 double ActiveDRAMClockChangeLatencyMarginC = 0;
5260 double WritebackDRAMClockChangeLatencyMargin = 0;
5261 double PlaneWithMinActiveDRAMClockChangeMargin = 0;
5262 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 0;
5263 double FullDETBufferingTimeYStutterCriticalPlane = 0;
5264 double TimeToFinishSwathTransferStutterCriticalPlane = 0;
5265 double WritebackDRAMClockChangeLatencyHiding = 0;
5266 unsigned int k, j;
5267
5268 mode_lib->vba.TotalActiveDPP = 0;
5269 mode_lib->vba.TotalDCCActiveDPP = 0;
5270 for (k = 0; k < NumberOfActivePlanes; ++k) {
5271 mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + DPPPerPlane[k];
5272 if (DCCEnable[k] == true) {
5273 mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP + DPPPerPlane[k];
5274 }
5275 }
5276
5277 *UrgentWatermark = UrgentLatency + ExtraLatency;
5278
5279 *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark;
5280
5281 mode_lib->vba.TotalActiveWriteback = 0;
5282 for (k = 0; k < NumberOfActivePlanes; ++k) {
5283 if (WritebackEnable[k] == true) {
5284 mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1;
5285 }
5286 }
5287
5288 if (mode_lib->vba.TotalActiveWriteback <= 1) {
5289 *WritebackUrgentWatermark = WritebackLatency;
5290 } else {
5291 *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5292 }
5293
5294 if (mode_lib->vba.TotalActiveWriteback <= 1) {
5295 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency;
5296 } else {
5297 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5298 }
5299
5300 for (k = 0; k < NumberOfActivePlanes; ++k) {
5301
5302 mode_lib->vba.LBLatencyHidingSourceLinesY = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1);
5303
5304 mode_lib->vba.LBLatencyHidingSourceLinesC = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1);
5305
5306 EffectiveLBLatencyHidingY = mode_lib->vba.LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]);
5307
5308 EffectiveLBLatencyHidingC = mode_lib->vba.LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
5309
5310 LinesInDETY[k] = (double) DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k];
5311 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5312 FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
5313 if (BytePerPixelDETC[k] > 0) {
5314 LinesInDETC = mode_lib->vba.DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5315 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5316 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k];
5317 } else {
5318 LinesInDETC = 0;
5319 FullDETBufferingTimeC = 999999;
5320 }
5321
5322 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY[k] - *UrgentWatermark - (HTotal[k] / PixelClock[k]) * (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) - *DRAMClockChangeWatermark;
5323
5324 if (NumberOfActivePlanes > 1) {
5325 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k];
5326 }
5327
5328 if (BytePerPixelDETC[k] > 0) {
5329 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC - *UrgentWatermark - (HTotal[k] / PixelClock[k]) * (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) - *DRAMClockChangeWatermark;
5330
5331 if (NumberOfActivePlanes > 1) {
5332 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k];
5333 }
5334 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5335 } else {
5336 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5337 }
5338
5339 if (WritebackEnable[k] == true) {
5340
5341 WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
5342 if (WritebackPixelFormat[k] == dm_444_64) {
5343 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5344 }
5345 if (mode_lib->vba.WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave) {
5346 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding * 2;
5347 }
5348 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - mode_lib->vba.WritebackDRAMClockChangeWatermark;
5349 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5350 }
5351 }
5352
5353 mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999;
5354 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5355 for (k = 0; k < NumberOfActivePlanes; ++k) {
5356 if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] < mode_lib->vba.MinActiveDRAMClockChangeMargin) {
5357 mode_lib->vba.MinActiveDRAMClockChangeMargin = mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
5358 if (BlendingAndTiming[k] == k) {
5359 PlaneWithMinActiveDRAMClockChangeMargin = k;
5360 } else {
5361 for (j = 0; j < NumberOfActivePlanes; ++j) {
5362 if (BlendingAndTiming[k] == j) {
5363 PlaneWithMinActiveDRAMClockChangeMargin = j;
5364 }
5365 }
5366 }
5367 }
5368 }
5369
5370 *MinActiveDRAMClockChangeLatencySupported = mode_lib->vba.MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency;
5371
5372 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5373 for (k = 0; k < NumberOfActivePlanes; ++k) {
5374 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) && mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5375 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
5376 }
5377 }
5378
5379 mode_lib->vba.TotalNumberOfActiveOTG = 0;
5380 for (k = 0; k < NumberOfActivePlanes; ++k) {
5381 if (BlendingAndTiming[k] == k) {
5382 mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG + 1;
5383 }
5384 }
5385
5386 if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) {
5387 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5388 } else if (((mode_lib->vba.SynchronizedVBlank == true || mode_lib->vba.TotalNumberOfActiveOTG == 1 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0)) {
5389 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5390 } else {
5391 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5392 }
5393
5394 FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[0];
5395 for (k = 0; k < NumberOfActivePlanes; ++k) {
5396 if (FullDETBufferingTimeY[k] <= FullDETBufferingTimeYStutterCriticalPlane) {
5397 FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[k];
5398 TimeToFinishSwathTransferStutterCriticalPlane = (SwathHeightY[k] - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k])) * (HTotal[k] / PixelClock[k]) / VRatio[k];
5399 }
5400 }
5401
5402 *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5403 *StutterEnterPlusExitWatermark = dml_max(SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep, TimeToFinishSwathTransferStutterCriticalPlane);
5404
5405 }
5406
CalculateDCFCLKDeepSleep(struct display_mode_lib * mode_lib,unsigned int NumberOfActivePlanes,int BytePerPixelY[],int BytePerPixelC[],double VRatio[],double VRatioChroma[],double SwathWidthY[],double SwathWidthC[],unsigned int DPPPerPlane[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double DPPCLK[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],int ReturnBusWidth,double * DCFCLKDeepSleep)5407 static void CalculateDCFCLKDeepSleep(
5408 struct display_mode_lib *mode_lib,
5409 unsigned int NumberOfActivePlanes,
5410 int BytePerPixelY[],
5411 int BytePerPixelC[],
5412 double VRatio[],
5413 double VRatioChroma[],
5414 double SwathWidthY[],
5415 double SwathWidthC[],
5416 unsigned int DPPPerPlane[],
5417 double HRatio[],
5418 double HRatioChroma[],
5419 double PixelClock[],
5420 double PSCL_THROUGHPUT[],
5421 double PSCL_THROUGHPUT_CHROMA[],
5422 double DPPCLK[],
5423 double ReadBandwidthLuma[],
5424 double ReadBandwidthChroma[],
5425 int ReturnBusWidth,
5426 double *DCFCLKDeepSleep)
5427 {
5428 double DisplayPipeLineDeliveryTimeLuma = 0;
5429 double DisplayPipeLineDeliveryTimeChroma = 0;
5430 unsigned int k;
5431 double ReadBandwidth = 0.0;
5432
5433 //double DCFCLKDeepSleepPerPlane[DC__NUM_DPP__MAX];
5434 for (k = 0; k < NumberOfActivePlanes; ++k) {
5435
5436 if (VRatio[k] <= 1) {
5437 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5438 } else {
5439 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5440 }
5441 if (BytePerPixelC[k] == 0) {
5442 DisplayPipeLineDeliveryTimeChroma = 0;
5443 } else {
5444 if (VRatioChroma[k] <= 1) {
5445 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5446 } else {
5447 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5448 }
5449 }
5450
5451 if (BytePerPixelC[k] > 0) {
5452 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(1.1 * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 1.1 * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5453 } else {
5454 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = 1.1 * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5455 }
5456 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(mode_lib->vba.DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5457
5458 }
5459
5460 for (k = 0; k < NumberOfActivePlanes; ++k) {
5461 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5462 }
5463
5464 *DCFCLKDeepSleep = dml_max(8.0, ReadBandwidth / ReturnBusWidth);
5465
5466 for (k = 0; k < NumberOfActivePlanes; ++k) {
5467 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, mode_lib->vba.DCFCLKDeepSleepPerPlane[k]);
5468 }
5469 }
5470
CalculateUrgentBurstFactor(long swath_width_luma_ub,long swath_width_chroma_ub,unsigned int DETBufferSizeInKByte,unsigned int SwathHeightY,unsigned int SwathHeightC,double LineTime,double UrgentLatency,double CursorBufferSize,unsigned int CursorWidth,unsigned int CursorBPP,double VRatio,double VRatioC,double BytePerPixelInDETY,double BytePerPixelInDETC,double DETBufferSizeY,double DETBufferSizeC,double * UrgentBurstFactorCursor,double * UrgentBurstFactorLuma,double * UrgentBurstFactorChroma,bool * NotEnoughUrgentLatencyHiding)5471 static void CalculateUrgentBurstFactor(
5472 long swath_width_luma_ub,
5473 long swath_width_chroma_ub,
5474 unsigned int DETBufferSizeInKByte,
5475 unsigned int SwathHeightY,
5476 unsigned int SwathHeightC,
5477 double LineTime,
5478 double UrgentLatency,
5479 double CursorBufferSize,
5480 unsigned int CursorWidth,
5481 unsigned int CursorBPP,
5482 double VRatio,
5483 double VRatioC,
5484 double BytePerPixelInDETY,
5485 double BytePerPixelInDETC,
5486 double DETBufferSizeY,
5487 double DETBufferSizeC,
5488 double *UrgentBurstFactorCursor,
5489 double *UrgentBurstFactorLuma,
5490 double *UrgentBurstFactorChroma,
5491 bool *NotEnoughUrgentLatencyHiding)
5492 {
5493 double LinesInDETLuma = 0;
5494 double LinesInDETChroma = 0;
5495 unsigned int LinesInCursorBuffer = 0;
5496 double CursorBufferSizeInTime = 0;
5497 double DETBufferSizeInTimeLuma = 0;
5498 double DETBufferSizeInTimeChroma = 0;
5499
5500 *NotEnoughUrgentLatencyHiding = 0;
5501
5502 if (CursorWidth > 0) {
5503 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5504 if (VRatio > 0) {
5505 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5506 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5507 *NotEnoughUrgentLatencyHiding = 1;
5508 *UrgentBurstFactorCursor = 0;
5509 } else {
5510 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5511 }
5512 } else {
5513 *UrgentBurstFactorCursor = 1;
5514 }
5515 }
5516
5517 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5518 if (VRatio > 0) {
5519 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5520 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5521 *NotEnoughUrgentLatencyHiding = 1;
5522 *UrgentBurstFactorLuma = 0;
5523 } else {
5524 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5525 }
5526 } else {
5527 *UrgentBurstFactorLuma = 1;
5528 }
5529
5530 if (BytePerPixelInDETC > 0) {
5531 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5532 if (VRatio > 0) {
5533 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
5534 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5535 *NotEnoughUrgentLatencyHiding = 1;
5536 *UrgentBurstFactorChroma = 0;
5537 } else {
5538 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5539 }
5540 } else {
5541 *UrgentBurstFactorChroma = 1;
5542 }
5543 }
5544 }
5545
CalculatePixelDeliveryTimes(unsigned int NumberOfActivePlanes,double VRatio[],double VRatioChroma[],double VRatioPrefetchY[],double VRatioPrefetchC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],unsigned int DPPPerPlane[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double DPPCLK[],int BytePerPixelC[],enum scan_direction_class SourceScan[],unsigned int NumberOfCursors[],unsigned int CursorWidth[][2],unsigned int CursorBPP[][2],unsigned int BlockWidth256BytesY[],unsigned int BlockHeight256BytesY[],unsigned int BlockWidth256BytesC[],unsigned int BlockHeight256BytesC[],double DisplayPipeLineDeliveryTimeLuma[],double DisplayPipeLineDeliveryTimeChroma[],double DisplayPipeLineDeliveryTimeLumaPrefetch[],double DisplayPipeLineDeliveryTimeChromaPrefetch[],double DisplayPipeRequestDeliveryTimeLuma[],double DisplayPipeRequestDeliveryTimeChroma[],double DisplayPipeRequestDeliveryTimeLumaPrefetch[],double DisplayPipeRequestDeliveryTimeChromaPrefetch[],double CursorRequestDeliveryTime[],double CursorRequestDeliveryTimePrefetch[])5546 static void CalculatePixelDeliveryTimes(
5547 unsigned int NumberOfActivePlanes,
5548 double VRatio[],
5549 double VRatioChroma[],
5550 double VRatioPrefetchY[],
5551 double VRatioPrefetchC[],
5552 unsigned int swath_width_luma_ub[],
5553 unsigned int swath_width_chroma_ub[],
5554 unsigned int DPPPerPlane[],
5555 double HRatio[],
5556 double HRatioChroma[],
5557 double PixelClock[],
5558 double PSCL_THROUGHPUT[],
5559 double PSCL_THROUGHPUT_CHROMA[],
5560 double DPPCLK[],
5561 int BytePerPixelC[],
5562 enum scan_direction_class SourceScan[],
5563 unsigned int NumberOfCursors[],
5564 unsigned int CursorWidth[][2],
5565 unsigned int CursorBPP[][2],
5566 unsigned int BlockWidth256BytesY[],
5567 unsigned int BlockHeight256BytesY[],
5568 unsigned int BlockWidth256BytesC[],
5569 unsigned int BlockHeight256BytesC[],
5570 double DisplayPipeLineDeliveryTimeLuma[],
5571 double DisplayPipeLineDeliveryTimeChroma[],
5572 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
5573 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
5574 double DisplayPipeRequestDeliveryTimeLuma[],
5575 double DisplayPipeRequestDeliveryTimeChroma[],
5576 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
5577 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
5578 double CursorRequestDeliveryTime[],
5579 double CursorRequestDeliveryTimePrefetch[])
5580 {
5581 double req_per_swath_ub = 0;
5582 unsigned int k;
5583
5584 for (k = 0; k < NumberOfActivePlanes; ++k) {
5585 if (VRatio[k] <= 1) {
5586 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5587 } else {
5588 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5589 }
5590
5591 if (BytePerPixelC[k] == 0) {
5592 DisplayPipeLineDeliveryTimeChroma[k] = 0;
5593 } else {
5594 if (VRatioChroma[k] <= 1) {
5595 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5596 } else {
5597 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5598 }
5599 }
5600
5601 if (VRatioPrefetchY[k] <= 1) {
5602 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5603 } else {
5604 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5605 }
5606
5607 if (BytePerPixelC[k] == 0) {
5608 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
5609 } else {
5610 if (VRatioPrefetchC[k] <= 1) {
5611 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5612 } else {
5613 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5614 }
5615 }
5616 }
5617
5618 for (k = 0; k < NumberOfActivePlanes; ++k) {
5619 if (SourceScan[k] != dm_vert) {
5620 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
5621 } else {
5622 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
5623 }
5624 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
5625 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
5626 if (BytePerPixelC[k] == 0) {
5627 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
5628 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
5629 } else {
5630 if (SourceScan[k] != dm_vert) {
5631 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
5632 } else {
5633 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
5634 }
5635 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
5636 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
5637 }
5638 }
5639
5640 for (k = 0; k < NumberOfActivePlanes; ++k) {
5641 int cursor_req_per_width = 0;
5642 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
5643 if (NumberOfCursors[k] > 0) {
5644 if (VRatio[k] <= 1) {
5645 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
5646 } else {
5647 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
5648 }
5649 if (VRatioPrefetchY[k] <= 1) {
5650 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
5651 } else {
5652 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
5653 }
5654 } else {
5655 CursorRequestDeliveryTime[k] = 0;
5656 CursorRequestDeliveryTimePrefetch[k] = 0;
5657 }
5658 }
5659 }
5660
CalculateMetaAndPTETimes(int NumberOfActivePlanes,bool GPUVMEnable,int MetaChunkSize,int MinMetaChunkSizeBytes,int HTotal[],double VRatio[],double VRatioChroma[],double DestinationLinesToRequestRowInVBlank[],double DestinationLinesToRequestRowInImmediateFlip[],bool DCCEnable[],double PixelClock[],int BytePerPixelY[],int BytePerPixelC[],enum scan_direction_class SourceScan[],int dpte_row_height[],int dpte_row_height_chroma[],int meta_row_width[],int meta_row_width_chroma[],int meta_row_height[],int meta_row_height_chroma[],int meta_req_width[],int meta_req_width_chroma[],int meta_req_height[],int meta_req_height_chroma[],int dpte_group_bytes[],int PTERequestSizeY[],int PTERequestSizeC[],int PixelPTEReqWidthY[],int PixelPTEReqHeightY[],int PixelPTEReqWidthC[],int PixelPTEReqHeightC[],int dpte_row_width_luma_ub[],int dpte_row_width_chroma_ub[],double DST_Y_PER_PTE_ROW_NOM_L[],double DST_Y_PER_PTE_ROW_NOM_C[],double DST_Y_PER_META_ROW_NOM_L[],double DST_Y_PER_META_ROW_NOM_C[],double TimePerMetaChunkNominal[],double TimePerChromaMetaChunkNominal[],double TimePerMetaChunkVBlank[],double TimePerChromaMetaChunkVBlank[],double TimePerMetaChunkFlip[],double TimePerChromaMetaChunkFlip[],double time_per_pte_group_nom_luma[],double time_per_pte_group_vblank_luma[],double time_per_pte_group_flip_luma[],double time_per_pte_group_nom_chroma[],double time_per_pte_group_vblank_chroma[],double time_per_pte_group_flip_chroma[])5661 static void CalculateMetaAndPTETimes(
5662 int NumberOfActivePlanes,
5663 bool GPUVMEnable,
5664 int MetaChunkSize,
5665 int MinMetaChunkSizeBytes,
5666 int HTotal[],
5667 double VRatio[],
5668 double VRatioChroma[],
5669 double DestinationLinesToRequestRowInVBlank[],
5670 double DestinationLinesToRequestRowInImmediateFlip[],
5671 bool DCCEnable[],
5672 double PixelClock[],
5673 int BytePerPixelY[],
5674 int BytePerPixelC[],
5675 enum scan_direction_class SourceScan[],
5676 int dpte_row_height[],
5677 int dpte_row_height_chroma[],
5678 int meta_row_width[],
5679 int meta_row_width_chroma[],
5680 int meta_row_height[],
5681 int meta_row_height_chroma[],
5682 int meta_req_width[],
5683 int meta_req_width_chroma[],
5684 int meta_req_height[],
5685 int meta_req_height_chroma[],
5686 int dpte_group_bytes[],
5687 int PTERequestSizeY[],
5688 int PTERequestSizeC[],
5689 int PixelPTEReqWidthY[],
5690 int PixelPTEReqHeightY[],
5691 int PixelPTEReqWidthC[],
5692 int PixelPTEReqHeightC[],
5693 int dpte_row_width_luma_ub[],
5694 int dpte_row_width_chroma_ub[],
5695 double DST_Y_PER_PTE_ROW_NOM_L[],
5696 double DST_Y_PER_PTE_ROW_NOM_C[],
5697 double DST_Y_PER_META_ROW_NOM_L[],
5698 double DST_Y_PER_META_ROW_NOM_C[],
5699 double TimePerMetaChunkNominal[],
5700 double TimePerChromaMetaChunkNominal[],
5701 double TimePerMetaChunkVBlank[],
5702 double TimePerChromaMetaChunkVBlank[],
5703 double TimePerMetaChunkFlip[],
5704 double TimePerChromaMetaChunkFlip[],
5705 double time_per_pte_group_nom_luma[],
5706 double time_per_pte_group_vblank_luma[],
5707 double time_per_pte_group_flip_luma[],
5708 double time_per_pte_group_nom_chroma[],
5709 double time_per_pte_group_vblank_chroma[],
5710 double time_per_pte_group_flip_chroma[])
5711 {
5712 unsigned int meta_chunk_width = 0;
5713 unsigned int min_meta_chunk_width = 0;
5714 unsigned int meta_chunk_per_row_int = 0;
5715 unsigned int meta_row_remainder = 0;
5716 unsigned int meta_chunk_threshold = 0;
5717 unsigned int meta_chunks_per_row_ub = 0;
5718 unsigned int meta_chunk_width_chroma = 0;
5719 unsigned int min_meta_chunk_width_chroma = 0;
5720 unsigned int meta_chunk_per_row_int_chroma = 0;
5721 unsigned int meta_row_remainder_chroma = 0;
5722 unsigned int meta_chunk_threshold_chroma = 0;
5723 unsigned int meta_chunks_per_row_ub_chroma = 0;
5724 unsigned int dpte_group_width_luma = 0;
5725 unsigned int dpte_groups_per_row_luma_ub = 0;
5726 unsigned int dpte_group_width_chroma = 0;
5727 unsigned int dpte_groups_per_row_chroma_ub = 0;
5728 unsigned int k;
5729
5730 for (k = 0; k < NumberOfActivePlanes; ++k) {
5731 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
5732 if (BytePerPixelC[k] == 0) {
5733 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
5734 } else {
5735 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
5736 }
5737 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
5738 if (BytePerPixelC[k] == 0) {
5739 DST_Y_PER_META_ROW_NOM_C[k] = 0;
5740 } else {
5741 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
5742 }
5743 }
5744
5745 for (k = 0; k < NumberOfActivePlanes; ++k) {
5746 if (DCCEnable[k] == true) {
5747 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
5748 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
5749 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
5750 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
5751 if (SourceScan[k] != dm_vert) {
5752 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
5753 } else {
5754 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
5755 }
5756 if (meta_row_remainder <= meta_chunk_threshold) {
5757 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
5758 } else {
5759 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
5760 }
5761 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5762 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5763 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5764 if (BytePerPixelC[k] == 0) {
5765 TimePerChromaMetaChunkNominal[k] = 0;
5766 TimePerChromaMetaChunkVBlank[k] = 0;
5767 TimePerChromaMetaChunkFlip[k] = 0;
5768 } else {
5769 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
5770 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
5771 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
5772 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
5773 if (SourceScan[k] != dm_vert) {
5774 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
5775 } else {
5776 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
5777 }
5778 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
5779 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
5780 } else {
5781 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
5782 }
5783 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5784 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5785 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5786 }
5787 } else {
5788 TimePerMetaChunkNominal[k] = 0;
5789 TimePerMetaChunkVBlank[k] = 0;
5790 TimePerMetaChunkFlip[k] = 0;
5791 TimePerChromaMetaChunkNominal[k] = 0;
5792 TimePerChromaMetaChunkVBlank[k] = 0;
5793 TimePerChromaMetaChunkFlip[k] = 0;
5794 }
5795 }
5796
5797 for (k = 0; k < NumberOfActivePlanes; ++k) {
5798 if (GPUVMEnable == true) {
5799 if (SourceScan[k] != dm_vert) {
5800 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
5801 } else {
5802 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
5803 }
5804 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
5805 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5806 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5807 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5808 if (BytePerPixelC[k] == 0) {
5809 time_per_pte_group_nom_chroma[k] = 0;
5810 time_per_pte_group_vblank_chroma[k] = 0;
5811 time_per_pte_group_flip_chroma[k] = 0;
5812 } else {
5813 if (SourceScan[k] != dm_vert) {
5814 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5815 } else {
5816 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5817 }
5818 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
5819 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5820 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5821 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5822 }
5823 } else {
5824 time_per_pte_group_nom_luma[k] = 0;
5825 time_per_pte_group_vblank_luma[k] = 0;
5826 time_per_pte_group_flip_luma[k] = 0;
5827 time_per_pte_group_nom_chroma[k] = 0;
5828 time_per_pte_group_vblank_chroma[k] = 0;
5829 time_per_pte_group_flip_chroma[k] = 0;
5830 }
5831 }
5832 }
5833
CalculateVMGroupAndRequestTimes(unsigned int NumberOfActivePlanes,bool GPUVMEnable,unsigned int GPUVMMaxPageTableLevels,unsigned int HTotal[],int BytePerPixelC[],double DestinationLinesToRequestVMInVBlank[],double DestinationLinesToRequestVMInImmediateFlip[],bool DCCEnable[],double PixelClock[],int dpte_row_width_luma_ub[],int dpte_row_width_chroma_ub[],int vm_group_bytes[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],int meta_pte_bytes_per_frame_ub_l[],int meta_pte_bytes_per_frame_ub_c[],double TimePerVMGroupVBlank[],double TimePerVMGroupFlip[],double TimePerVMRequestVBlank[],double TimePerVMRequestFlip[])5834 static void CalculateVMGroupAndRequestTimes(
5835 unsigned int NumberOfActivePlanes,
5836 bool GPUVMEnable,
5837 unsigned int GPUVMMaxPageTableLevels,
5838 unsigned int HTotal[],
5839 int BytePerPixelC[],
5840 double DestinationLinesToRequestVMInVBlank[],
5841 double DestinationLinesToRequestVMInImmediateFlip[],
5842 bool DCCEnable[],
5843 double PixelClock[],
5844 int dpte_row_width_luma_ub[],
5845 int dpte_row_width_chroma_ub[],
5846 int vm_group_bytes[],
5847 unsigned int dpde0_bytes_per_frame_ub_l[],
5848 unsigned int dpde0_bytes_per_frame_ub_c[],
5849 int meta_pte_bytes_per_frame_ub_l[],
5850 int meta_pte_bytes_per_frame_ub_c[],
5851 double TimePerVMGroupVBlank[],
5852 double TimePerVMGroupFlip[],
5853 double TimePerVMRequestVBlank[],
5854 double TimePerVMRequestFlip[])
5855 {
5856 int num_group_per_lower_vm_stage = 0;
5857 int num_req_per_lower_vm_stage = 0;
5858 unsigned int k;
5859
5860 for (k = 0; k < NumberOfActivePlanes; ++k) {
5861 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5862 if (DCCEnable[k] == false) {
5863 if (BytePerPixelC[k] > 0) {
5864 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k])
5865 / (double) (vm_group_bytes[k]), 1) + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k])
5866 / (double) (vm_group_bytes[k]), 1);
5867 } else {
5868 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k])
5869 / (double) (vm_group_bytes[k]), 1);
5870 }
5871 } else {
5872 if (GPUVMMaxPageTableLevels == 1) {
5873 if (BytePerPixelC[k] > 0) {
5874 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k])
5875 / (double) (vm_group_bytes[k]), 1) + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k])
5876 / (double) (vm_group_bytes[k]), 1);
5877 } else {
5878 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k])
5879 / (double) (vm_group_bytes[k]), 1);
5880 }
5881 } else {
5882 if (BytePerPixelC[k] > 0) {
5883 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
5884 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
5885 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
5886 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
5887 } else {
5888 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
5889 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
5890 }
5891 }
5892 }
5893
5894 if (DCCEnable[k] == false) {
5895 if (BytePerPixelC[k] > 0) {
5896 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
5897 } else {
5898 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5899 }
5900 } else {
5901 if (GPUVMMaxPageTableLevels == 1) {
5902 if (BytePerPixelC[k] > 0) {
5903 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64
5904 + meta_pte_bytes_per_frame_ub_c[k] / 64;
5905 } else {
5906 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5907 }
5908 } else {
5909 if (BytePerPixelC[k] > 0) {
5910 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
5911 + dpde0_bytes_per_frame_ub_c[k] / 64 + meta_pte_bytes_per_frame_ub_l[k]
5912 / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
5913 } else {
5914 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
5915 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5916 }
5917 }
5918 }
5919
5920 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k]
5921 / num_group_per_lower_vm_stage;
5922 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k]
5923 / num_group_per_lower_vm_stage;
5924 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k]
5925 / num_req_per_lower_vm_stage;
5926 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k]
5927 / num_req_per_lower_vm_stage;
5928
5929 if (GPUVMMaxPageTableLevels > 2) {
5930 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
5931 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
5932 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
5933 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
5934 }
5935
5936 } else {
5937 TimePerVMGroupVBlank[k] = 0;
5938 TimePerVMGroupFlip[k] = 0;
5939 TimePerVMRequestVBlank[k] = 0;
5940 TimePerVMRequestFlip[k] = 0;
5941 }
5942 }
5943 }
5944
CalculateStutterEfficiency(int NumberOfActivePlanes,long ROBBufferSizeInKByte,double TotalDataReadBandwidth,double DCFCLK,double ReturnBW,double SRExitTime,bool SynchronizedVBlank,int DPPPerPlane[],unsigned int DETBufferSizeY[],int BytePerPixelY[],double BytePerPixelDETY[],double SwathWidthY[],int SwathHeightY[],int SwathHeightC[],double DCCRateLuma[],double DCCRateChroma[],int HTotal[],int VTotal[],double PixelClock[],double VRatio[],enum scan_direction_class SourceScan[],int BlockHeight256BytesY[],int BlockWidth256BytesY[],int BlockHeight256BytesC[],int BlockWidth256BytesC[],int DCCYMaxUncompressedBlock[],int DCCCMaxUncompressedBlock[],int VActive[],bool DCCEnable[],bool WritebackEnable[],double ReadBandwidthPlaneLuma[],double ReadBandwidthPlaneChroma[],double meta_row_bw[],double dpte_row_bw[],double * StutterEfficiencyNotIncludingVBlank,double * StutterEfficiency,double * StutterPeriodOut)5945 static void CalculateStutterEfficiency(
5946 int NumberOfActivePlanes,
5947 long ROBBufferSizeInKByte,
5948 double TotalDataReadBandwidth,
5949 double DCFCLK,
5950 double ReturnBW,
5951 double SRExitTime,
5952 bool SynchronizedVBlank,
5953 int DPPPerPlane[],
5954 unsigned int DETBufferSizeY[],
5955 int BytePerPixelY[],
5956 double BytePerPixelDETY[],
5957 double SwathWidthY[],
5958 int SwathHeightY[],
5959 int SwathHeightC[],
5960 double DCCRateLuma[],
5961 double DCCRateChroma[],
5962 int HTotal[],
5963 int VTotal[],
5964 double PixelClock[],
5965 double VRatio[],
5966 enum scan_direction_class SourceScan[],
5967 int BlockHeight256BytesY[],
5968 int BlockWidth256BytesY[],
5969 int BlockHeight256BytesC[],
5970 int BlockWidth256BytesC[],
5971 int DCCYMaxUncompressedBlock[],
5972 int DCCCMaxUncompressedBlock[],
5973 int VActive[],
5974 bool DCCEnable[],
5975 bool WritebackEnable[],
5976 double ReadBandwidthPlaneLuma[],
5977 double ReadBandwidthPlaneChroma[],
5978 double meta_row_bw[],
5979 double dpte_row_bw[],
5980 double *StutterEfficiencyNotIncludingVBlank,
5981 double *StutterEfficiency,
5982 double *StutterPeriodOut)
5983 {
5984 double FullDETBufferingTimeY[DC__NUM_DPP__MAX] = { 0 };
5985 double FrameTimeForMinFullDETBufferingTime = 0;
5986 double StutterPeriod = 0;
5987 double AverageReadBandwidth = 0;
5988 double TotalRowReadBandwidth = 0;
5989 double AverageDCCCompressionRate = 0;
5990 double PartOfBurstThatFitsInROB = 0;
5991 double StutterBurstTime = 0;
5992 int TotalActiveWriteback = 0;
5993 double VBlankTime = 0;
5994 double SmallestVBlank = 0;
5995 int BytePerPixelYCriticalPlane = 0;
5996 double SwathWidthYCriticalPlane = 0;
5997 double LinesInDETY[DC__NUM_DPP__MAX] = { 0 };
5998 double LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX] = { 0 };
5999 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6000 double MaximumEffectiveCompressionLuma = 0;
6001 double MaximumEffectiveCompressionChroma = 0;
6002 unsigned int k;
6003
6004 for (k = 0; k < NumberOfActivePlanes; ++k) {
6005 LinesInDETY[k] = DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k];
6006 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
6007 FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
6008 }
6009
6010 StutterPeriod = FullDETBufferingTimeY[0];
6011 FrameTimeForMinFullDETBufferingTime = VTotal[0] * HTotal[0] / PixelClock[0];
6012 BytePerPixelYCriticalPlane = BytePerPixelY[0];
6013 SwathWidthYCriticalPlane = SwathWidthY[0];
6014 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[0]
6015 - (LinesInDETY[0] - LinesInDETYRoundedDownToSwath[0]);
6016
6017 for (k = 0; k < NumberOfActivePlanes; ++k) {
6018 if (FullDETBufferingTimeY[k] < StutterPeriod) {
6019 StutterPeriod = FullDETBufferingTimeY[k];
6020 FrameTimeForMinFullDETBufferingTime = VTotal[k] * HTotal[k] / PixelClock[k];
6021 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6022 SwathWidthYCriticalPlane = SwathWidthY[k];
6023 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k]
6024 - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k]);
6025 }
6026 }
6027
6028 AverageReadBandwidth = 0;
6029 TotalRowReadBandwidth = 0;
6030 for (k = 0; k < NumberOfActivePlanes; ++k) {
6031 if (DCCEnable[k] == true) {
6032 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k])
6033 || (SourceScan[k] != dm_vert
6034 && BlockHeight256BytesY[k] > SwathHeightY[k])
6035 || DCCYMaxUncompressedBlock[k] < 256) {
6036 MaximumEffectiveCompressionLuma = 2;
6037 } else {
6038 MaximumEffectiveCompressionLuma = 4;
6039 }
6040 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(DCCRateLuma[k], MaximumEffectiveCompressionLuma);
6041
6042 if (ReadBandwidthPlaneChroma[k] > 0) {
6043 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6044 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k])
6045 || DCCCMaxUncompressedBlock[k] < 256) {
6046 MaximumEffectiveCompressionChroma = 2;
6047 } else {
6048 MaximumEffectiveCompressionChroma = 4;
6049 }
6050 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneChroma[k] / dml_min(DCCRateChroma[k], MaximumEffectiveCompressionChroma);
6051 }
6052 } else {
6053 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6054 }
6055 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6056 }
6057
6058 AverageDCCCompressionRate = TotalDataReadBandwidth / AverageReadBandwidth;
6059 PartOfBurstThatFitsInROB = dml_min(StutterPeriod * TotalDataReadBandwidth, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6060 StutterBurstTime = PartOfBurstThatFitsInROB / AverageDCCCompressionRate / ReturnBW + (StutterPeriod * TotalDataReadBandwidth
6061 - PartOfBurstThatFitsInROB) / (DCFCLK * 64) + StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6062 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6063
6064 TotalActiveWriteback = 0;
6065 for (k = 0; k < NumberOfActivePlanes; ++k) {
6066 if (WritebackEnable[k] == true) {
6067 TotalActiveWriteback = TotalActiveWriteback + 1;
6068 }
6069 }
6070
6071 if (TotalActiveWriteback == 0) {
6072 *StutterEfficiencyNotIncludingVBlank = (1
6073 - (SRExitTime + StutterBurstTime) / StutterPeriod) * 100;
6074 } else {
6075 *StutterEfficiencyNotIncludingVBlank = 0;
6076 }
6077
6078 if (SynchronizedVBlank == true || NumberOfActivePlanes == 1) {
6079 SmallestVBlank = (VTotal[0] - VActive[0]) * HTotal[0] / PixelClock[0];
6080 } else {
6081 SmallestVBlank = 0;
6082 }
6083 for (k = 0; k < NumberOfActivePlanes; ++k) {
6084 if (SynchronizedVBlank == true || NumberOfActivePlanes == 1) {
6085 VBlankTime = (VTotal[k] - VActive[k]) * HTotal[k] / PixelClock[k];
6086 } else {
6087 VBlankTime = 0;
6088 }
6089 SmallestVBlank = dml_min(SmallestVBlank, VBlankTime);
6090 }
6091
6092 *StutterEfficiency = (*StutterEfficiencyNotIncludingVBlank / 100.0 * (FrameTimeForMinFullDETBufferingTime - SmallestVBlank) + SmallestVBlank) / FrameTimeForMinFullDETBufferingTime * 100;
6093
6094 if (StutterPeriodOut)
6095 *StutterPeriodOut = StutterPeriod;
6096 }
6097
CalculateSwathAndDETConfiguration(bool ForceSingleDPP,int NumberOfActivePlanes,unsigned int DETBufferSizeInKByte,double MaximumSwathWidthLuma[],double MaximumSwathWidthChroma[],enum scan_direction_class SourceScan[],enum source_format_class SourcePixelFormat[],enum dm_swizzle_mode SurfaceTiling[],int ViewportWidth[],int ViewportHeight[],int SurfaceWidthY[],int SurfaceWidthC[],int SurfaceHeightY[],int SurfaceHeightC[],int Read256BytesBlockHeightY[],int Read256BytesBlockHeightC[],int Read256BytesBlockWidthY[],int Read256BytesBlockWidthC[],enum odm_combine_mode ODMCombineEnabled[],int BlendingAndTiming[],int BytePerPixY[],int BytePerPixC[],double BytePerPixDETY[],double BytePerPixDETC[],int HActive[],double HRatio[],double HRatioChroma[],int DPPPerPlane[],int swath_width_luma_ub[],int swath_width_chroma_ub[],double SwathWidth[],double SwathWidthChroma[],int SwathHeightY[],int SwathHeightC[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],bool ViewportSizeSupportPerPlane[],bool * ViewportSizeSupport)6098 static void CalculateSwathAndDETConfiguration(
6099 bool ForceSingleDPP,
6100 int NumberOfActivePlanes,
6101 unsigned int DETBufferSizeInKByte,
6102 double MaximumSwathWidthLuma[],
6103 double MaximumSwathWidthChroma[],
6104 enum scan_direction_class SourceScan[],
6105 enum source_format_class SourcePixelFormat[],
6106 enum dm_swizzle_mode SurfaceTiling[],
6107 int ViewportWidth[],
6108 int ViewportHeight[],
6109 int SurfaceWidthY[],
6110 int SurfaceWidthC[],
6111 int SurfaceHeightY[],
6112 int SurfaceHeightC[],
6113 int Read256BytesBlockHeightY[],
6114 int Read256BytesBlockHeightC[],
6115 int Read256BytesBlockWidthY[],
6116 int Read256BytesBlockWidthC[],
6117 enum odm_combine_mode ODMCombineEnabled[],
6118 int BlendingAndTiming[],
6119 int BytePerPixY[],
6120 int BytePerPixC[],
6121 double BytePerPixDETY[],
6122 double BytePerPixDETC[],
6123 int HActive[],
6124 double HRatio[],
6125 double HRatioChroma[],
6126 int DPPPerPlane[],
6127 int swath_width_luma_ub[],
6128 int swath_width_chroma_ub[],
6129 double SwathWidth[],
6130 double SwathWidthChroma[],
6131 int SwathHeightY[],
6132 int SwathHeightC[],
6133 unsigned int DETBufferSizeY[],
6134 unsigned int DETBufferSizeC[],
6135 bool ViewportSizeSupportPerPlane[],
6136 bool *ViewportSizeSupport)
6137 {
6138 int MaximumSwathHeightY[DC__NUM_DPP__MAX] = { 0 };
6139 int MaximumSwathHeightC[DC__NUM_DPP__MAX] = { 0 };
6140 int MinimumSwathHeightY = 0;
6141 int MinimumSwathHeightC = 0;
6142 long RoundedUpMaxSwathSizeBytesY = 0;
6143 long RoundedUpMaxSwathSizeBytesC = 0;
6144 long RoundedUpMinSwathSizeBytesY = 0;
6145 long RoundedUpMinSwathSizeBytesC = 0;
6146 long RoundedUpSwathSizeBytesY = 0;
6147 long RoundedUpSwathSizeBytesC = 0;
6148 double SwathWidthSingleDPP[DC__NUM_DPP__MAX] = { 0 };
6149 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX] = { 0 };
6150 int k;
6151
6152 CalculateSwathWidth(
6153 ForceSingleDPP,
6154 NumberOfActivePlanes,
6155 SourcePixelFormat,
6156 SourceScan,
6157 ViewportWidth,
6158 ViewportHeight,
6159 SurfaceWidthY,
6160 SurfaceWidthC,
6161 SurfaceHeightY,
6162 SurfaceHeightC,
6163 ODMCombineEnabled,
6164 BytePerPixY,
6165 BytePerPixC,
6166 Read256BytesBlockHeightY,
6167 Read256BytesBlockHeightC,
6168 Read256BytesBlockWidthY,
6169 Read256BytesBlockWidthC,
6170 BlendingAndTiming,
6171 HActive,
6172 HRatio,
6173 DPPPerPlane,
6174 SwathWidthSingleDPP,
6175 SwathWidthSingleDPPChroma,
6176 SwathWidth,
6177 SwathWidthChroma,
6178 MaximumSwathHeightY,
6179 MaximumSwathHeightC,
6180 swath_width_luma_ub,
6181 swath_width_chroma_ub);
6182
6183 *ViewportSizeSupport = true;
6184 for (k = 0; k < NumberOfActivePlanes; ++k) {
6185 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32
6186 || SourcePixelFormat[k] == dm_444_16
6187 || SourcePixelFormat[k] == dm_mono_16
6188 || SourcePixelFormat[k] == dm_mono_8
6189 || SourcePixelFormat[k] == dm_rgbe)) {
6190 if (SurfaceTiling[k] == dm_sw_linear
6191 || (SourcePixelFormat[k] == dm_444_64
6192 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6193 && SourceScan[k] != dm_vert)) {
6194 MinimumSwathHeightY = MaximumSwathHeightY[k];
6195 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6196 MinimumSwathHeightY = MaximumSwathHeightY[k];
6197 } else {
6198 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6199 }
6200 MinimumSwathHeightC = MaximumSwathHeightC[k];
6201 } else {
6202 if (SurfaceTiling[k] == dm_sw_linear) {
6203 MinimumSwathHeightY = MaximumSwathHeightY[k];
6204 MinimumSwathHeightC = MaximumSwathHeightC[k];
6205 } else if (SourcePixelFormat[k] == dm_rgbe_alpha
6206 && SourceScan[k] == dm_vert) {
6207 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6208 MinimumSwathHeightC = MaximumSwathHeightC[k];
6209 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6210 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6211 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6212 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6213 MinimumSwathHeightY = MaximumSwathHeightY[k];
6214 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6215 } else {
6216 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6217 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6218 }
6219 }
6220
6221 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k]
6222 * MaximumSwathHeightY[k];
6223 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k]
6224 * MinimumSwathHeightY;
6225 if (SourcePixelFormat[k] == dm_420_10) {
6226 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6227 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6228 }
6229 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k]
6230 * MaximumSwathHeightC[k];
6231 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k]
6232 * MinimumSwathHeightC;
6233 if (SourcePixelFormat[k] == dm_420_10) {
6234 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6235 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6236 }
6237
6238 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
6239 <= DETBufferSizeInKByte * 1024 / 2) {
6240 SwathHeightY[k] = MaximumSwathHeightY[k];
6241 SwathHeightC[k] = MaximumSwathHeightC[k];
6242 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6243 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6244 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6245 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
6246 <= DETBufferSizeInKByte * 1024 / 2) {
6247 SwathHeightY[k] = MinimumSwathHeightY;
6248 SwathHeightC[k] = MaximumSwathHeightC[k];
6249 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6250 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6251 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6252 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC
6253 <= DETBufferSizeInKByte * 1024 / 2) {
6254 SwathHeightY[k] = MaximumSwathHeightY[k];
6255 SwathHeightC[k] = MinimumSwathHeightC;
6256 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6257 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6258 } else {
6259 SwathHeightY[k] = MinimumSwathHeightY;
6260 SwathHeightC[k] = MinimumSwathHeightC;
6261 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6262 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6263 }
6264
6265 if (SwathHeightC[k] == 0) {
6266 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024;
6267 DETBufferSizeC[k] = 0;
6268 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6269 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024 / 2;
6270 DETBufferSizeC[k] = DETBufferSizeInKByte * 1024 / 2;
6271 } else {
6272 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024 * 2 / 3;
6273 DETBufferSizeC[k] = DETBufferSizeInKByte * 1024 / 3;
6274 }
6275
6276 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC
6277 > DETBufferSizeInKByte * 1024 / 2
6278 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6279 || (SwathHeightC[k] > 0
6280 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6281 *ViewportSizeSupport = false;
6282 ViewportSizeSupportPerPlane[k] = false;
6283 } else {
6284 ViewportSizeSupportPerPlane[k] = true;
6285 }
6286 }
6287 }
6288
CalculateSwathWidth(bool ForceSingleDPP,int NumberOfActivePlanes,enum source_format_class SourcePixelFormat[],enum scan_direction_class SourceScan[],unsigned int ViewportWidth[],unsigned int ViewportHeight[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],enum odm_combine_mode ODMCombineEnabled[],int BytePerPixY[],int BytePerPixC[],int Read256BytesBlockHeightY[],int Read256BytesBlockHeightC[],int Read256BytesBlockWidthY[],int Read256BytesBlockWidthC[],int BlendingAndTiming[],unsigned int HActive[],double HRatio[],int DPPPerPlane[],double SwathWidthSingleDPPY[],double SwathWidthSingleDPPC[],double SwathWidthY[],double SwathWidthC[],int MaximumSwathHeightY[],int MaximumSwathHeightC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[])6289 static void CalculateSwathWidth(
6290 bool ForceSingleDPP,
6291 int NumberOfActivePlanes,
6292 enum source_format_class SourcePixelFormat[],
6293 enum scan_direction_class SourceScan[],
6294 unsigned int ViewportWidth[],
6295 unsigned int ViewportHeight[],
6296 unsigned int SurfaceWidthY[],
6297 unsigned int SurfaceWidthC[],
6298 unsigned int SurfaceHeightY[],
6299 unsigned int SurfaceHeightC[],
6300 enum odm_combine_mode ODMCombineEnabled[],
6301 int BytePerPixY[],
6302 int BytePerPixC[],
6303 int Read256BytesBlockHeightY[],
6304 int Read256BytesBlockHeightC[],
6305 int Read256BytesBlockWidthY[],
6306 int Read256BytesBlockWidthC[],
6307 int BlendingAndTiming[],
6308 unsigned int HActive[],
6309 double HRatio[],
6310 int DPPPerPlane[],
6311 double SwathWidthSingleDPPY[],
6312 double SwathWidthSingleDPPC[],
6313 double SwathWidthY[],
6314 double SwathWidthC[],
6315 int MaximumSwathHeightY[],
6316 int MaximumSwathHeightC[],
6317 unsigned int swath_width_luma_ub[],
6318 unsigned int swath_width_chroma_ub[])
6319 {
6320 unsigned int k, j;
6321 long surface_width_ub_l;
6322 long surface_height_ub_l;
6323 long surface_width_ub_c;
6324 long surface_height_ub_c;
6325
6326 for (k = 0; k < NumberOfActivePlanes; ++k) {
6327 enum odm_combine_mode MainPlaneODMCombine = 0;
6328
6329 if (SourceScan[k] != dm_vert) {
6330 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6331 } else {
6332 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6333 }
6334
6335 MainPlaneODMCombine = ODMCombineEnabled[k];
6336 for (j = 0; j < NumberOfActivePlanes; ++j) {
6337 if (BlendingAndTiming[k] == j) {
6338 MainPlaneODMCombine = ODMCombineEnabled[j];
6339 }
6340 }
6341
6342 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) {
6343 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
6344 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) {
6345 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
6346 } else if (DPPPerPlane[k] == 2) {
6347 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6348 } else {
6349 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6350 }
6351
6352 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6353 SwathWidthC[k] = SwathWidthY[k] / 2;
6354 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6355 } else {
6356 SwathWidthC[k] = SwathWidthY[k];
6357 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6358 }
6359
6360 if (ForceSingleDPP == true) {
6361 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6362 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6363 }
6364
6365 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6366 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6367
6368 if (SourceScan[k] != dm_vert) {
6369 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6370 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6371 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,
6372 Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6373 if (BytePerPixC[k] > 0) {
6374 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6375 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
6376 Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
6377 } else {
6378 swath_width_chroma_ub[k] = 0;
6379 }
6380 } else {
6381 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
6382 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
6383 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,
6384 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
6385 if (BytePerPixC[k] > 0) {
6386 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6387 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
6388 Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
6389 } else {
6390 swath_width_chroma_ub[k] = 0;
6391 }
6392 }
6393 }
6394 }
6395
CalculateExtraLatency(long RoundTripPingLatencyCycles,long ReorderingBytes,double DCFCLK,int TotalNumberOfActiveDPP,int PixelChunkSizeInKByte,int TotalNumberOfDCCActiveDPP,int MetaChunkSize,double ReturnBW,bool GPUVMEnable,bool HostVMEnable,int NumberOfActivePlanes,int NumberOfDPP[],int dpte_group_bytes[],double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,double HostVMMinPageSize,int HostVMMaxNonCachedPageTableLevels)6396 static double CalculateExtraLatency(
6397 long RoundTripPingLatencyCycles,
6398 long ReorderingBytes,
6399 double DCFCLK,
6400 int TotalNumberOfActiveDPP,
6401 int PixelChunkSizeInKByte,
6402 int TotalNumberOfDCCActiveDPP,
6403 int MetaChunkSize,
6404 double ReturnBW,
6405 bool GPUVMEnable,
6406 bool HostVMEnable,
6407 int NumberOfActivePlanes,
6408 int NumberOfDPP[],
6409 int dpte_group_bytes[],
6410 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6411 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6412 double HostVMMinPageSize,
6413 int HostVMMaxNonCachedPageTableLevels)
6414 {
6415 double ExtraLatencyBytes = 0;
6416 ExtraLatencyBytes = CalculateExtraLatencyBytes(
6417 ReorderingBytes,
6418 TotalNumberOfActiveDPP,
6419 PixelChunkSizeInKByte,
6420 TotalNumberOfDCCActiveDPP,
6421 MetaChunkSize,
6422 GPUVMEnable,
6423 HostVMEnable,
6424 NumberOfActivePlanes,
6425 NumberOfDPP,
6426 dpte_group_bytes,
6427 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6428 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6429 HostVMMinPageSize,
6430 HostVMMaxNonCachedPageTableLevels);
6431
6432 return (RoundTripPingLatencyCycles + 32) / DCFCLK + ExtraLatencyBytes / ReturnBW;
6433 }
6434
CalculateExtraLatencyBytes(long ReorderingBytes,int TotalNumberOfActiveDPP,int PixelChunkSizeInKByte,int TotalNumberOfDCCActiveDPP,int MetaChunkSize,bool GPUVMEnable,bool HostVMEnable,int NumberOfActivePlanes,int NumberOfDPP[],int dpte_group_bytes[],double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,double HostVMMinPageSize,int HostVMMaxNonCachedPageTableLevels)6435 static double CalculateExtraLatencyBytes(
6436 long ReorderingBytes,
6437 int TotalNumberOfActiveDPP,
6438 int PixelChunkSizeInKByte,
6439 int TotalNumberOfDCCActiveDPP,
6440 int MetaChunkSize,
6441 bool GPUVMEnable,
6442 bool HostVMEnable,
6443 int NumberOfActivePlanes,
6444 int NumberOfDPP[],
6445 int dpte_group_bytes[],
6446 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6447 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6448 double HostVMMinPageSize,
6449 int HostVMMaxNonCachedPageTableLevels)
6450 {
6451 double ret = 0;
6452 double HostVMInefficiencyFactor = 0;
6453 int HostVMDynamicLevels = 0;
6454 unsigned int k;
6455
6456 if (GPUVMEnable == true && HostVMEnable == true) {
6457 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
6458 if (HostVMMinPageSize < 2048) {
6459 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
6460 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
6461 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
6462 } else {
6463 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
6464 }
6465 } else {
6466 HostVMInefficiencyFactor = 1;
6467 HostVMDynamicLevels = 0;
6468 }
6469
6470 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
6471
6472 if (GPUVMEnable == true) {
6473 for (k = 0; k < NumberOfActivePlanes; ++k) {
6474 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
6475 }
6476 }
6477 return ret;
6478 }
6479
6480
CalculateUrgentLatency(double UrgentLatencyPixelDataOnly,double UrgentLatencyPixelMixedWithVMData,double UrgentLatencyVMDataOnly,bool DoUrgentLatencyAdjustment,double UrgentLatencyAdjustmentFabricClockComponent,double UrgentLatencyAdjustmentFabricClockReference,double FabricClock)6481 static double CalculateUrgentLatency(
6482 double UrgentLatencyPixelDataOnly,
6483 double UrgentLatencyPixelMixedWithVMData,
6484 double UrgentLatencyVMDataOnly,
6485 bool DoUrgentLatencyAdjustment,
6486 double UrgentLatencyAdjustmentFabricClockComponent,
6487 double UrgentLatencyAdjustmentFabricClockReference,
6488 double FabricClock)
6489 {
6490 double ret;
6491
6492 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
6493 if (DoUrgentLatencyAdjustment == true) {
6494 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
6495 }
6496 return ret;
6497 }
6498
UseMinimumDCFCLK(struct display_mode_lib * mode_lib,struct vba_vars_st * v,int MaxPrefetchMode,int ReorderingBytes)6499 static noinline_for_stack void UseMinimumDCFCLK(
6500 struct display_mode_lib *mode_lib,
6501 struct vba_vars_st *v,
6502 int MaxPrefetchMode,
6503 int ReorderingBytes)
6504 {
6505 double NormalEfficiency = 0;
6506 double PTEEfficiency = 0;
6507 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2] = { { 0 } };
6508 unsigned int i, j, k;
6509
6510 NormalEfficiency = (v->HostVMEnable == true ? v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
6511 : v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly) / 100.0;
6512 PTEEfficiency = (v->HostVMEnable == true ? v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly
6513 / v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData : 1.0);
6514 for (i = 0; i < mode_lib->soc.num_states; ++i) {
6515 for (j = 0; j <= 1; ++j) {
6516 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX] = { 0 };
6517 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX] = { 0 };
6518 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX] = { 0 };
6519 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX] = { 0 };
6520 double MinimumTWait = 0;
6521 double NonDPTEBandwidth = 0;
6522 double DPTEBandwidth = 0;
6523 double DCFCLKRequiredForAverageBandwidth = 0;
6524 double ExtraLatencyBytes = 0;
6525 double ExtraLatencyCycles = 0;
6526 double DCFCLKRequiredForPeakBandwidth = 0;
6527 int NoOfDPPState[DC__NUM_DPP__MAX] = { 0 };
6528 double MinimumTvmPlus2Tr0 = 0;
6529
6530 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
6531 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
6532 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
6533 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
6534 }
6535
6536 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
6537 NoOfDPPState[k] = v->NoOfDPP[i][j][k];
6538 }
6539
6540 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
6541 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
6542 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
6543 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
6544 DCFCLKRequiredForAverageBandwidth = dml_max3(v->ProjectedDCFCLKDeepSleep[i][j],
6545 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth / (v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
6546 (NonDPTEBandwidth + DPTEBandwidth / PTEEfficiency) / NormalEfficiency / v->ReturnBusWidth);
6547
6548 ExtraLatencyBytes = CalculateExtraLatencyBytes(ReorderingBytes, v->TotalNumberOfActiveDPP[i][j], v->PixelChunkSizeInKByte, v->TotalNumberOfDCCActiveDPP[i][j],
6549 v->MetaChunkSize, v->GPUVMEnable, v->HostVMEnable, v->NumberOfActivePlanes, NoOfDPPState, v->dpte_group_bytes,
6550 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6551 v->HostVMMinPageSize, v->HostVMMaxNonCachedPageTableLevels);
6552 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + 32 + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
6553 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
6554 double DCFCLKCyclesRequiredInPrefetch = { 0 };
6555 double ExpectedPrefetchBWAcceleration = { 0 };
6556 double PrefetchTime = { 0 };
6557
6558 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
6559 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
6560 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / PTEEfficiency
6561 / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0) + 2 * v->DPTEBytesPerRow[i][j][k] / PTEEfficiency
6562 / NormalEfficiency / v->ReturnBusWidth + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
6563 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
6564 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k]) / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
6565 DynamicMetadataVMExtraLatency[k] = (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
6566 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
6567 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - v->UrgLatency[i] * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels
6568 : v->GPUVMMaxPageTableLevels - 2) * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - DynamicMetadataVMExtraLatency[k];
6569
6570 if (PrefetchTime > 0) {
6571 double ExpectedVRatioPrefetch = { 0 };
6572 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
6573 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
6574 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
6575 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
6576 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
6577 + NoOfDPPState[k] * DPTEBandwidth / PTEEfficiency / NormalEfficiency / v->ReturnBusWidth;
6578 }
6579 } else {
6580 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
6581 }
6582 if (v->DynamicMetadataEnable[k] == true) {
6583 double TsetupPipe = { 0 };
6584 double TdmbfPipe = { 0 };
6585 double TdmsksPipe = { 0 };
6586 double TdmecPipe = { 0 };
6587 double AllowedTimeForUrgentExtraLatency = { 0 };
6588
6589 CalculateDynamicMetadataParameters(
6590 v->MaxInterDCNTileRepeaters,
6591 v->RequiredDPPCLK[i][j][k],
6592 v->RequiredDISPCLK[i][j],
6593 v->ProjectedDCFCLKDeepSleep[i][j],
6594 v->PixelClock[k],
6595 v->HTotal[k],
6596 v->VTotal[k] - v->VActive[k],
6597 v->DynamicMetadataTransmittedBytes[k],
6598 v->DynamicMetadataLinesBeforeActiveRequired[k],
6599 v->Interlace[k],
6600 v->ProgressiveToInterlaceUnitInOPP,
6601 &TsetupPipe,
6602 &TdmbfPipe,
6603 &TdmecPipe,
6604 &TdmsksPipe);
6605 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TsetupPipe
6606 - TdmbfPipe - TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
6607 if (AllowedTimeForUrgentExtraLatency > 0) {
6608 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(DCFCLKRequiredForPeakBandwidthPerPlane[k],
6609 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
6610 } else {
6611 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
6612 }
6613 }
6614 }
6615 DCFCLKRequiredForPeakBandwidth = 0;
6616 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
6617 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
6618 }
6619 MinimumTvmPlus2Tr0 = v->UrgLatency[i] * (v->GPUVMEnable == true ? (v->HostVMEnable == true ?
6620 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) : 0);
6621 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
6622 double MaximumTvmPlus2Tr0PlusTsw = { 0 };
6623 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
6624 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
6625 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
6626 } else {
6627 DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth, 2 * ExtraLatencyCycles
6628 / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
6629 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
6630 }
6631 }
6632 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * (1 + mode_lib->vba.PercentMarginOverMinimumRequiredDCFCLK / 100)
6633 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
6634 }
6635 }
6636 }
6637
6638