1 /*
2 * Copyright 2020 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25
26 #include "dc.h"
27 #include "../display_mode_lib.h"
28 #include "display_mode_vba_30.h"
29 #include "../dml_inline_defs.h"
30
31
32 /*
33 * NOTE:
34 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
35 *
36 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
37 * ways. Unless there is something clearly wrong with it the code should
38 * remain as-is as it provides us with a guarantee from HW that it is correct.
39 */
40
41
42 typedef struct {
43 double DPPCLK;
44 double DISPCLK;
45 double PixelClock;
46 double DCFCLKDeepSleep;
47 unsigned int DPPPerPlane;
48 bool ScalerEnabled;
49 enum scan_direction_class SourceScan;
50 unsigned int BlockWidth256BytesY;
51 unsigned int BlockHeight256BytesY;
52 unsigned int BlockWidth256BytesC;
53 unsigned int BlockHeight256BytesC;
54 unsigned int InterlaceEnable;
55 unsigned int NumberOfCursors;
56 unsigned int VBlank;
57 unsigned int HTotal;
58 unsigned int DCCEnable;
59 bool ODMCombineEnabled;
60 } Pipe;
61
62 #define BPP_INVALID 0
63 #define BPP_BLENDED_PIPE 0xffffffff
64 #define DCN30_MAX_DSC_IMAGE_WIDTH 5184
65 #define DCN30_MAX_FMT_420_BUFFER_WIDTH 4096
66
67 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
68 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
69 struct display_mode_lib *mode_lib);
70 static unsigned int dscceComputeDelay(
71 unsigned int bpc,
72 double BPP,
73 unsigned int sliceWidth,
74 unsigned int numSlices,
75 enum output_format_class pixelFormat,
76 enum output_encoder_class Output);
77 static unsigned int dscComputeDelay(
78 enum output_format_class pixelFormat,
79 enum output_encoder_class Output);
80 static bool CalculatePrefetchSchedule(
81 struct display_mode_lib *mode_lib,
82 unsigned int k,
83 Pipe *myPipe,
84 unsigned int DSCDelay,
85 unsigned int DPP_RECOUT_WIDTH,
86 unsigned int VStartup,
87 unsigned int MaxVStartup,
88 double UrgentLatency,
89 double UrgentExtraLatency,
90 double TCalc,
91 unsigned int PDEAndMetaPTEBytesFrame,
92 unsigned int MetaRowByte,
93 unsigned int PixelPTEBytesPerRow,
94 double PrefetchSourceLinesY,
95 unsigned int SwathWidthY,
96 int BytePerPixelY,
97 double VInitPreFillY,
98 unsigned int MaxNumSwathY,
99 double PrefetchSourceLinesC,
100 unsigned int SwathWidthC,
101 double VInitPreFillC,
102 unsigned int MaxNumSwathC,
103 long swath_width_luma_ub,
104 long swath_width_chroma_ub,
105 unsigned int SwathHeightY,
106 unsigned int SwathHeightC,
107 double TWait,
108 double *DestinationLinesForPrefetch,
109 double *PrefetchBandwidth,
110 double *DestinationLinesToRequestVMInVBlank,
111 double *DestinationLinesToRequestRowInVBlank,
112 double *VRatioPrefetchY,
113 double *VRatioPrefetchC,
114 double *RequiredPrefetchPixDataBWLuma,
115 double *RequiredPrefetchPixDataBWChroma,
116 bool *NotEnoughTimeForDynamicMetadata);
117 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
118 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
119 static void CalculateDCCConfiguration(
120 bool DCCEnabled,
121 bool DCCProgrammingAssumesScanDirectionUnknown,
122 enum source_format_class SourcePixelFormat,
123 unsigned int ViewportWidthLuma,
124 unsigned int ViewportWidthChroma,
125 unsigned int ViewportHeightLuma,
126 unsigned int ViewportHeightChroma,
127 double DETBufferSize,
128 unsigned int RequestHeight256ByteLuma,
129 unsigned int RequestHeight256ByteChroma,
130 enum dm_swizzle_mode TilingFormat,
131 unsigned int BytePerPixelY,
132 unsigned int BytePerPixelC,
133 double BytePerPixelDETY,
134 double BytePerPixelDETC,
135 enum scan_direction_class ScanOrientation,
136 unsigned int *MaxUncompressedBlockLuma,
137 unsigned int *MaxUncompressedBlockChroma,
138 unsigned int *MaxCompressedBlockLuma,
139 unsigned int *MaxCompressedBlockChroma,
140 unsigned int *IndependentBlockLuma,
141 unsigned int *IndependentBlockChroma);
142 static double CalculatePrefetchSourceLines(
143 struct display_mode_lib *mode_lib,
144 double VRatio,
145 double vtaps,
146 bool Interlace,
147 bool ProgressiveToInterlaceUnitInOPP,
148 unsigned int SwathHeight,
149 unsigned int ViewportYStart,
150 double *VInitPreFill,
151 unsigned int *MaxNumSwath);
152 static unsigned int CalculateVMAndRowBytes(
153 struct display_mode_lib *mode_lib,
154 bool DCCEnable,
155 unsigned int BlockHeight256Bytes,
156 unsigned int BlockWidth256Bytes,
157 enum source_format_class SourcePixelFormat,
158 unsigned int SurfaceTiling,
159 unsigned int BytePerPixel,
160 enum scan_direction_class ScanDirection,
161 unsigned int SwathWidth,
162 unsigned int ViewportHeight,
163 bool GPUVMEnable,
164 bool HostVMEnable,
165 unsigned int HostVMMaxNonCachedPageTableLevels,
166 unsigned int GPUVMMinPageSize,
167 unsigned int HostVMMinPageSize,
168 unsigned int PTEBufferSizeInRequests,
169 unsigned int Pitch,
170 unsigned int DCCMetaPitch,
171 unsigned int *MacroTileWidth,
172 unsigned int *MetaRowByte,
173 unsigned int *PixelPTEBytesPerRow,
174 bool *PTEBufferSizeNotExceeded,
175 unsigned int *dpte_row_width_ub,
176 unsigned int *dpte_row_height,
177 unsigned int *MetaRequestWidth,
178 unsigned int *MetaRequestHeight,
179 unsigned int *meta_row_width,
180 unsigned int *meta_row_height,
181 unsigned int *vm_group_bytes,
182 unsigned int *dpte_group_bytes,
183 unsigned int *PixelPTEReqWidth,
184 unsigned int *PixelPTEReqHeight,
185 unsigned int *PTERequestSize,
186 unsigned int *DPDE0BytesFrame,
187 unsigned int *MetaPTEBytesFrame);
188 static double CalculateTWait(
189 unsigned int PrefetchMode,
190 double DRAMClockChangeLatency,
191 double UrgentLatency,
192 double SREnterPlusExitTime);
193 static void CalculateRowBandwidth(
194 bool GPUVMEnable,
195 enum source_format_class SourcePixelFormat,
196 double VRatio,
197 double VRatioChroma,
198 bool DCCEnable,
199 double LineTime,
200 unsigned int MetaRowByteLuma,
201 unsigned int MetaRowByteChroma,
202 unsigned int meta_row_height_luma,
203 unsigned int meta_row_height_chroma,
204 unsigned int PixelPTEBytesPerRowLuma,
205 unsigned int PixelPTEBytesPerRowChroma,
206 unsigned int dpte_row_height_luma,
207 unsigned int dpte_row_height_chroma,
208 double *meta_row_bw,
209 double *dpte_row_bw);
210 static void CalculateFlipSchedule(
211 struct display_mode_lib *mode_lib,
212 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
213 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
214 double UrgentExtraLatency,
215 double UrgentLatency,
216 unsigned int GPUVMMaxPageTableLevels,
217 bool HostVMEnable,
218 unsigned int HostVMMaxNonCachedPageTableLevels,
219 bool GPUVMEnable,
220 double HostVMMinPageSize,
221 double PDEAndMetaPTEBytesPerFrame,
222 double MetaRowBytes,
223 double DPTEBytesPerRow,
224 double BandwidthAvailableForImmediateFlip,
225 unsigned int TotImmediateFlipBytes,
226 enum source_format_class SourcePixelFormat,
227 double LineTime,
228 double VRatio,
229 double VRatioChroma,
230 double Tno_bw,
231 bool DCCEnable,
232 unsigned int dpte_row_height,
233 unsigned int meta_row_height,
234 unsigned int dpte_row_height_chroma,
235 unsigned int meta_row_height_chroma,
236 double *DestinationLinesToRequestVMInImmediateFlip,
237 double *DestinationLinesToRequestRowInImmediateFlip,
238 double *final_flip_bw,
239 bool *ImmediateFlipSupportedForPipe);
240 static double CalculateWriteBackDelay(
241 enum source_format_class WritebackPixelFormat,
242 double WritebackHRatio,
243 double WritebackVRatio,
244 unsigned int WritebackVTaps,
245 long WritebackDestinationWidth,
246 long WritebackDestinationHeight,
247 long WritebackSourceHeight,
248 unsigned int HTotal);
249 static void CalculateDynamicMetadataParameters(
250 int MaxInterDCNTileRepeaters,
251 double DPPCLK,
252 double DISPCLK,
253 double DCFClkDeepSleep,
254 double PixelClock,
255 unsigned int HTotal,
256 unsigned int VBlank,
257 unsigned int DynamicMetadataTransmittedBytes,
258 int DynamicMetadataLinesBeforeActiveRequired,
259 int InterlaceEnable,
260 bool ProgressiveToInterlaceUnitInOPP,
261 double *Tsetup,
262 double *Tdmbf,
263 double *Tdmec,
264 double *Tdmsks);
265 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
266 struct display_mode_lib *mode_lib,
267 unsigned int PrefetchMode,
268 double DCFCLK,
269 double ReturnBW,
270 double UrgentLatency,
271 double ExtraLatency,
272 double SOCCLK,
273 double DCFCLKDeepSleep,
274 unsigned int DPPPerPlane[],
275 double DPPCLK[],
276 unsigned int DETBufferSizeY[],
277 unsigned int DETBufferSizeC[],
278 unsigned int SwathHeightY[],
279 unsigned int SwathHeightC[],
280 double SwathWidthY[],
281 double SwathWidthC[],
282 double BytePerPixelDETY[],
283 double BytePerPixelDETC[],
284 enum clock_change_support *DRAMClockChangeSupport);
285 static void CalculateDCFCLKDeepSleep(
286 struct display_mode_lib *mode_lib,
287 unsigned int NumberOfActivePlanes,
288 int BytePerPixelY[],
289 int BytePerPixelC[],
290 double VRatio[],
291 double VRatioChroma[],
292 double SwathWidthY[],
293 double SwathWidthC[],
294 unsigned int DPPPerPlane[],
295 double HRatio[],
296 double HRatioChroma[],
297 double PixelClock[],
298 double PSCL_THROUGHPUT[],
299 double PSCL_THROUGHPUT_CHROMA[],
300 double DPPCLK[],
301 double ReadBandwidthLuma[],
302 double ReadBandwidthChroma[],
303 int ReturnBusWidth,
304 double *DCFCLKDeepSleep);
305 static void CalculateUrgentBurstFactor(
306 long swath_width_luma_ub,
307 long swath_width_chroma_ub,
308 unsigned int DETBufferSizeInKByte,
309 unsigned int SwathHeightY,
310 unsigned int SwathHeightC,
311 double LineTime,
312 double UrgentLatency,
313 double CursorBufferSize,
314 unsigned int CursorWidth,
315 unsigned int CursorBPP,
316 double VRatio,
317 double VRatioC,
318 double BytePerPixelInDETY,
319 double BytePerPixelInDETC,
320 double DETBufferSizeY,
321 double DETBufferSizeC,
322 double *UrgentBurstFactorCursor,
323 double *UrgentBurstFactorLuma,
324 double *UrgentBurstFactorChroma,
325 bool *NotEnoughUrgentLatencyHiding);
326
327 static void UseMinimumDCFCLK(
328 struct display_mode_lib *mode_lib,
329 struct vba_vars_st *v,
330 int MaxPrefetchMode,
331 int ReorderingBytes);
332
333 static void CalculatePixelDeliveryTimes(
334 unsigned int NumberOfActivePlanes,
335 double VRatio[],
336 double VRatioChroma[],
337 double VRatioPrefetchY[],
338 double VRatioPrefetchC[],
339 unsigned int swath_width_luma_ub[],
340 unsigned int swath_width_chroma_ub[],
341 unsigned int DPPPerPlane[],
342 double HRatio[],
343 double HRatioChroma[],
344 double PixelClock[],
345 double PSCL_THROUGHPUT[],
346 double PSCL_THROUGHPUT_CHROMA[],
347 double DPPCLK[],
348 int BytePerPixelC[],
349 enum scan_direction_class SourceScan[],
350 unsigned int NumberOfCursors[],
351 unsigned int CursorWidth[][2],
352 unsigned int CursorBPP[][2],
353 unsigned int BlockWidth256BytesY[],
354 unsigned int BlockHeight256BytesY[],
355 unsigned int BlockWidth256BytesC[],
356 unsigned int BlockHeight256BytesC[],
357 double DisplayPipeLineDeliveryTimeLuma[],
358 double DisplayPipeLineDeliveryTimeChroma[],
359 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
360 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
361 double DisplayPipeRequestDeliveryTimeLuma[],
362 double DisplayPipeRequestDeliveryTimeChroma[],
363 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
364 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
365 double CursorRequestDeliveryTime[],
366 double CursorRequestDeliveryTimePrefetch[]);
367
368 static void CalculateMetaAndPTETimes(
369 int NumberOfActivePlanes,
370 bool GPUVMEnable,
371 int MetaChunkSize,
372 int MinMetaChunkSizeBytes,
373 int HTotal[],
374 double VRatio[],
375 double VRatioChroma[],
376 double DestinationLinesToRequestRowInVBlank[],
377 double DestinationLinesToRequestRowInImmediateFlip[],
378 bool DCCEnable[],
379 double PixelClock[],
380 int BytePerPixelY[],
381 int BytePerPixelC[],
382 enum scan_direction_class SourceScan[],
383 int dpte_row_height[],
384 int dpte_row_height_chroma[],
385 int meta_row_width[],
386 int meta_row_width_chroma[],
387 int meta_row_height[],
388 int meta_row_height_chroma[],
389 int meta_req_width[],
390 int meta_req_width_chroma[],
391 int meta_req_height[],
392 int meta_req_height_chroma[],
393 int dpte_group_bytes[],
394 int PTERequestSizeY[],
395 int PTERequestSizeC[],
396 int PixelPTEReqWidthY[],
397 int PixelPTEReqHeightY[],
398 int PixelPTEReqWidthC[],
399 int PixelPTEReqHeightC[],
400 int dpte_row_width_luma_ub[],
401 int dpte_row_width_chroma_ub[],
402 double DST_Y_PER_PTE_ROW_NOM_L[],
403 double DST_Y_PER_PTE_ROW_NOM_C[],
404 double DST_Y_PER_META_ROW_NOM_L[],
405 double DST_Y_PER_META_ROW_NOM_C[],
406 double TimePerMetaChunkNominal[],
407 double TimePerChromaMetaChunkNominal[],
408 double TimePerMetaChunkVBlank[],
409 double TimePerChromaMetaChunkVBlank[],
410 double TimePerMetaChunkFlip[],
411 double TimePerChromaMetaChunkFlip[],
412 double time_per_pte_group_nom_luma[],
413 double time_per_pte_group_vblank_luma[],
414 double time_per_pte_group_flip_luma[],
415 double time_per_pte_group_nom_chroma[],
416 double time_per_pte_group_vblank_chroma[],
417 double time_per_pte_group_flip_chroma[]);
418
419 static void CalculateVMGroupAndRequestTimes(
420 unsigned int NumberOfActivePlanes,
421 bool GPUVMEnable,
422 unsigned int GPUVMMaxPageTableLevels,
423 unsigned int HTotal[],
424 int BytePerPixelC[],
425 double DestinationLinesToRequestVMInVBlank[],
426 double DestinationLinesToRequestVMInImmediateFlip[],
427 bool DCCEnable[],
428 double PixelClock[],
429 int dpte_row_width_luma_ub[],
430 int dpte_row_width_chroma_ub[],
431 int vm_group_bytes[],
432 unsigned int dpde0_bytes_per_frame_ub_l[],
433 unsigned int dpde0_bytes_per_frame_ub_c[],
434 int meta_pte_bytes_per_frame_ub_l[],
435 int meta_pte_bytes_per_frame_ub_c[],
436 double TimePerVMGroupVBlank[],
437 double TimePerVMGroupFlip[],
438 double TimePerVMRequestVBlank[],
439 double TimePerVMRequestFlip[]);
440
441 static void CalculateStutterEfficiency(
442 int NumberOfActivePlanes,
443 long ROBBufferSizeInKByte,
444 double TotalDataReadBandwidth,
445 double DCFCLK,
446 double ReturnBW,
447 double SRExitTime,
448 bool SynchronizedVBlank,
449 int DPPPerPlane[],
450 unsigned int DETBufferSizeY[],
451 int BytePerPixelY[],
452 double BytePerPixelDETY[],
453 double SwathWidthY[],
454 int SwathHeightY[],
455 int SwathHeightC[],
456 double DCCRateLuma[],
457 double DCCRateChroma[],
458 int HTotal[],
459 int VTotal[],
460 double PixelClock[],
461 double VRatio[],
462 enum scan_direction_class SourceScan[],
463 int BlockHeight256BytesY[],
464 int BlockWidth256BytesY[],
465 int BlockHeight256BytesC[],
466 int BlockWidth256BytesC[],
467 int DCCYMaxUncompressedBlock[],
468 int DCCCMaxUncompressedBlock[],
469 int VActive[],
470 bool DCCEnable[],
471 bool WritebackEnable[],
472 double ReadBandwidthPlaneLuma[],
473 double ReadBandwidthPlaneChroma[],
474 double meta_row_bw[],
475 double dpte_row_bw[],
476 double *StutterEfficiencyNotIncludingVBlank,
477 double *StutterEfficiency,
478 double *StutterPeriodOut);
479
480 static void CalculateSwathAndDETConfiguration(
481 bool ForceSingleDPP,
482 int NumberOfActivePlanes,
483 unsigned int DETBufferSizeInKByte,
484 double MaximumSwathWidthLuma[],
485 double MaximumSwathWidthChroma[],
486 enum scan_direction_class SourceScan[],
487 enum source_format_class SourcePixelFormat[],
488 enum dm_swizzle_mode SurfaceTiling[],
489 int ViewportWidth[],
490 int ViewportHeight[],
491 int SurfaceWidthY[],
492 int SurfaceWidthC[],
493 int SurfaceHeightY[],
494 int SurfaceHeightC[],
495 int Read256BytesBlockHeightY[],
496 int Read256BytesBlockHeightC[],
497 int Read256BytesBlockWidthY[],
498 int Read256BytesBlockWidthC[],
499 enum odm_combine_mode ODMCombineEnabled[],
500 int BlendingAndTiming[],
501 int BytePerPixY[],
502 int BytePerPixC[],
503 double BytePerPixDETY[],
504 double BytePerPixDETC[],
505 int HActive[],
506 double HRatio[],
507 double HRatioChroma[],
508 int DPPPerPlane[],
509 int swath_width_luma_ub[],
510 int swath_width_chroma_ub[],
511 double SwathWidth[],
512 double SwathWidthChroma[],
513 int SwathHeightY[],
514 int SwathHeightC[],
515 unsigned int DETBufferSizeY[],
516 unsigned int DETBufferSizeC[],
517 bool ViewportSizeSupportPerPlane[],
518 bool *ViewportSizeSupport);
519 static void CalculateSwathWidth(
520 bool ForceSingleDPP,
521 int NumberOfActivePlanes,
522 enum source_format_class SourcePixelFormat[],
523 enum scan_direction_class SourceScan[],
524 unsigned int ViewportWidth[],
525 unsigned int ViewportHeight[],
526 unsigned int SurfaceWidthY[],
527 unsigned int SurfaceWidthC[],
528 unsigned int SurfaceHeightY[],
529 unsigned int SurfaceHeightC[],
530 enum odm_combine_mode ODMCombineEnabled[],
531 int BytePerPixY[],
532 int BytePerPixC[],
533 int Read256BytesBlockHeightY[],
534 int Read256BytesBlockHeightC[],
535 int Read256BytesBlockWidthY[],
536 int Read256BytesBlockWidthC[],
537 int BlendingAndTiming[],
538 unsigned int HActive[],
539 double HRatio[],
540 int DPPPerPlane[],
541 double SwathWidthSingleDPPY[],
542 double SwathWidthSingleDPPC[],
543 double SwathWidthY[],
544 double SwathWidthC[],
545 int MaximumSwathHeightY[],
546 int MaximumSwathHeightC[],
547 unsigned int swath_width_luma_ub[],
548 unsigned int swath_width_chroma_ub[]);
549 static double CalculateExtraLatency(
550 long RoundTripPingLatencyCycles,
551 long ReorderingBytes,
552 double DCFCLK,
553 int TotalNumberOfActiveDPP,
554 int PixelChunkSizeInKByte,
555 int TotalNumberOfDCCActiveDPP,
556 int MetaChunkSize,
557 double ReturnBW,
558 bool GPUVMEnable,
559 bool HostVMEnable,
560 int NumberOfActivePlanes,
561 int NumberOfDPP[],
562 int dpte_group_bytes[],
563 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
564 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
565 double HostVMMinPageSize,
566 int HostVMMaxNonCachedPageTableLevels);
567 static double CalculateExtraLatencyBytes(
568 long ReorderingBytes,
569 int TotalNumberOfActiveDPP,
570 int PixelChunkSizeInKByte,
571 int TotalNumberOfDCCActiveDPP,
572 int MetaChunkSize,
573 bool GPUVMEnable,
574 bool HostVMEnable,
575 int NumberOfActivePlanes,
576 int NumberOfDPP[],
577 int dpte_group_bytes[],
578 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
579 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
580 double HostVMMinPageSize,
581 int HostVMMaxNonCachedPageTableLevels);
582 static double CalculateUrgentLatency(
583 double UrgentLatencyPixelDataOnly,
584 double UrgentLatencyPixelMixedWithVMData,
585 double UrgentLatencyVMDataOnly,
586 bool DoUrgentLatencyAdjustment,
587 double UrgentLatencyAdjustmentFabricClockComponent,
588 double UrgentLatencyAdjustmentFabricClockReference,
589 double FabricClockSingle);
590
dml30_recalculate(struct display_mode_lib * mode_lib)591 void dml30_recalculate(struct display_mode_lib *mode_lib)
592 {
593 ModeSupportAndSystemConfiguration(mode_lib);
594 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
595 DisplayPipeConfiguration(mode_lib);
596 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
597 }
598
dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat,enum output_encoder_class Output)599 static unsigned int dscceComputeDelay(
600 unsigned int bpc,
601 double BPP,
602 unsigned int sliceWidth,
603 unsigned int numSlices,
604 enum output_format_class pixelFormat,
605 enum output_encoder_class Output)
606 {
607 // valid bpc = source bits per component in the set of {8, 10, 12}
608 // valid bpp = increments of 1/16 of a bit
609 // min = 6/7/8 in N420/N422/444, respectively
610 // max = such that compression is 1:1
611 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
612 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
613 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
614
615 // fixed value
616 unsigned int rcModelSize = 8192;
617
618 // N422/N420 operate at 2 pixels per clock
619 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L,
620 Delay, pixels;
621
622 if (pixelFormat == dm_420)
623 pixelsPerClock = 2;
624 // #all other modes operate at 1 pixel per clock
625 else if (pixelFormat == dm_444)
626 pixelsPerClock = 1;
627 else if (pixelFormat == dm_n422)
628 pixelsPerClock = 2;
629 else
630 pixelsPerClock = 1;
631
632 //initial transmit delay as per PPS
633 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
634
635 //compute ssm delay
636 if (bpc == 8)
637 D = 81;
638 else if (bpc == 10)
639 D = 89;
640 else
641 D = 113;
642
643 //divide by pixel per cycle to compute slice width as seen by DSC
644 w = sliceWidth / pixelsPerClock;
645
646 //422 mode has an additional cycle of delay
647 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
648 s = 0;
649 else
650 s = 1;
651
652 //main calculation for the dscce
653 ix = initalXmitDelay + 45;
654 wx = (w + 2) / 3;
655 P = 3 * wx - w;
656 l0 = ix / w;
657 a = ix + P * l0;
658 ax = (a + 2) / 3 + D + 6 + 1;
659 L = (ax + wx - 1) / wx;
660 if ((ix % w) == 0 && P != 0)
661 lstall = 1;
662 else
663 lstall = 0;
664 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
665
666 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
667 pixels = Delay * 3 * pixelsPerClock;
668 return pixels;
669 }
670
dscComputeDelay(enum output_format_class pixelFormat,enum output_encoder_class Output)671 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
672 {
673 unsigned int Delay = 0;
674
675 if (pixelFormat == dm_420) {
676 // sfr
677 Delay = Delay + 2;
678 // dsccif
679 Delay = Delay + 0;
680 // dscc - input deserializer
681 Delay = Delay + 3;
682 // dscc gets pixels every other cycle
683 Delay = Delay + 2;
684 // dscc - input cdc fifo
685 Delay = Delay + 12;
686 // dscc gets pixels every other cycle
687 Delay = Delay + 13;
688 // dscc - cdc uncertainty
689 Delay = Delay + 2;
690 // dscc - output cdc fifo
691 Delay = Delay + 7;
692 // dscc gets pixels every other cycle
693 Delay = Delay + 3;
694 // dscc - cdc uncertainty
695 Delay = Delay + 2;
696 // dscc - output serializer
697 Delay = Delay + 1;
698 // sft
699 Delay = Delay + 1;
700 } else if (pixelFormat == dm_n422) {
701 // sfr
702 Delay = Delay + 2;
703 // dsccif
704 Delay = Delay + 1;
705 // dscc - input deserializer
706 Delay = Delay + 5;
707 // dscc - input cdc fifo
708 Delay = Delay + 25;
709 // dscc - cdc uncertainty
710 Delay = Delay + 2;
711 // dscc - output cdc fifo
712 Delay = Delay + 10;
713 // dscc - cdc uncertainty
714 Delay = Delay + 2;
715 // dscc - output serializer
716 Delay = Delay + 1;
717 // sft
718 Delay = Delay + 1;
719 } else {
720 // sfr
721 Delay = Delay + 2;
722 // dsccif
723 Delay = Delay + 0;
724 // dscc - input deserializer
725 Delay = Delay + 3;
726 // dscc - input cdc fifo
727 Delay = Delay + 12;
728 // dscc - cdc uncertainty
729 Delay = Delay + 2;
730 // dscc - output cdc fifo
731 Delay = Delay + 7;
732 // dscc - output serializer
733 Delay = Delay + 1;
734 // dscc - cdc uncertainty
735 Delay = Delay + 2;
736 // sft
737 Delay = Delay + 1;
738 }
739
740 return Delay;
741 }
742
CalculatePrefetchSchedule(struct display_mode_lib * mode_lib,unsigned int k,Pipe * myPipe,unsigned int DSCDelay,unsigned int DPP_RECOUT_WIDTH,unsigned int VStartup,unsigned int MaxVStartup,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,int BytePerPixelY,double VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,unsigned int SwathWidthC,double VInitPreFillC,unsigned int MaxNumSwathC,long swath_width_luma_ub,long swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,bool * NotEnoughTimeForDynamicMetadata)743 static bool CalculatePrefetchSchedule(
744 struct display_mode_lib *mode_lib,
745 unsigned int k,
746 Pipe *myPipe,
747 unsigned int DSCDelay,
748 unsigned int DPP_RECOUT_WIDTH,
749 unsigned int VStartup,
750 unsigned int MaxVStartup,
751 double UrgentLatency,
752 double UrgentExtraLatency,
753 double TCalc,
754 unsigned int PDEAndMetaPTEBytesFrame,
755 unsigned int MetaRowByte,
756 unsigned int PixelPTEBytesPerRow,
757 double PrefetchSourceLinesY,
758 unsigned int SwathWidthY,
759 int BytePerPixelY,
760 double VInitPreFillY,
761 unsigned int MaxNumSwathY,
762 double PrefetchSourceLinesC,
763 unsigned int SwathWidthC,
764 double VInitPreFillC,
765 unsigned int MaxNumSwathC,
766 long swath_width_luma_ub,
767 long swath_width_chroma_ub,
768 unsigned int SwathHeightY,
769 unsigned int SwathHeightC,
770 double TWait,
771 double *DestinationLinesForPrefetch,
772 double *PrefetchBandwidth,
773 double *DestinationLinesToRequestVMInVBlank,
774 double *DestinationLinesToRequestRowInVBlank,
775 double *VRatioPrefetchY,
776 double *VRatioPrefetchC,
777 double *RequiredPrefetchPixDataBWLuma,
778 double *RequiredPrefetchPixDataBWChroma,
779 bool *NotEnoughTimeForDynamicMetadata)
780 {
781 struct vba_vars_st *v = &mode_lib->vba;
782 double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater;
783 bool MyError = false;
784 unsigned int DPPCycles = 0, DISPCLKCycles = 0;
785 double DSTTotalPixelsAfterScaler = 0;
786 double LineTime = 0, Tsetup = 0;
787 double dst_y_prefetch_equ = 0;
788 double Tsw_oto = 0;
789 double prefetch_bw_oto = 0;
790 double Tvm_oto = 0;
791 double Tr0_oto = 0;
792 double Tvm_oto_lines = 0;
793 double Tr0_oto_lines = 0;
794 double dst_y_prefetch_oto = 0;
795 double TimeForFetchingMetaPTE = 0;
796 double TimeForFetchingRowInVBlank = 0;
797 double LinesToRequestPrefetchPixelData = 0;
798 double HostVMInefficiencyFactor = 0;
799 unsigned int HostVMDynamicLevelsTrips = 0;
800 double trip_to_mem = 0;
801 double Tvm_trips = 0;
802 double Tr0_trips = 0;
803 double Tvm_trips_rounded = 0;
804 double Tr0_trips_rounded = 0;
805 double Lsw_oto = 0;
806 double Tpre_rounded = 0;
807 double prefetch_bw_equ = 0;
808 double Tvm_equ = 0;
809 double Tr0_equ = 0;
810 double Tdmbf = 0;
811 double Tdmec = 0;
812 double Tdmsks = 0;
813
814 if (v->GPUVMEnable == true && v->HostVMEnable == true) {
815 HostVMInefficiencyFactor = v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
816 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
817 } else {
818 HostVMInefficiencyFactor = 1;
819 HostVMDynamicLevelsTrips = 0;
820 }
821
822 CalculateDynamicMetadataParameters(
823 v->MaxInterDCNTileRepeaters,
824 myPipe->DPPCLK,
825 myPipe->DISPCLK,
826 myPipe->DCFCLKDeepSleep,
827 myPipe->PixelClock,
828 myPipe->HTotal,
829 myPipe->VBlank,
830 v->DynamicMetadataTransmittedBytes[k],
831 v->DynamicMetadataLinesBeforeActiveRequired[k],
832 myPipe->InterlaceEnable,
833 v->ProgressiveToInterlaceUnitInOPP,
834 &Tsetup,
835 &Tdmbf,
836 &Tdmec,
837 &Tdmsks);
838
839 LineTime = myPipe->HTotal / myPipe->PixelClock;
840 trip_to_mem = UrgentLatency;
841 Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
842
843 if (v->DynamicMetadataVMEnabled == true && v->GPUVMEnable == true) {
844 v->Tdmdl[k] = TWait + Tvm_trips + trip_to_mem;
845 } else {
846 v->Tdmdl[k] = TWait + UrgentExtraLatency;
847 }
848
849 if (v->DynamicMetadataEnable[k] == true) {
850 if (VStartup * LineTime < Tsetup + v->Tdmdl[k] + Tdmbf + Tdmec + Tdmsks) {
851 *NotEnoughTimeForDynamicMetadata = true;
852 } else {
853 *NotEnoughTimeForDynamicMetadata = false;
854 dml_print("DML: Not Enough Time for Dynamic Meta!\n");
855 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
856 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
857 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
858 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", v->Tdmdl[k]);
859 }
860 } else {
861 *NotEnoughTimeForDynamicMetadata = false;
862 }
863
864 v->Tdmdl_vm[k] = (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true && v->GPUVMEnable == true ? TWait + Tvm_trips : 0);
865
866 if (myPipe->ScalerEnabled)
867 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL;
868 else
869 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly;
870
871 DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor;
872
873 DISPCLKCycles = v->DISPCLKDelaySubtotal;
874
875 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
876 return true;
877
878 v->DSTXAfterScaler[k] = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK
879 + DSCDelay;
880
881 v->DSTXAfterScaler[k] = v->DSTXAfterScaler[k] + ((myPipe->ODMCombineEnabled)?18:0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
882
883 if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && v->ProgressiveToInterlaceUnitInOPP))
884 v->DSTYAfterScaler[k] = 1;
885 else
886 v->DSTYAfterScaler[k] = 0;
887
888 DSTTotalPixelsAfterScaler = v->DSTYAfterScaler[k] * myPipe->HTotal + v->DSTXAfterScaler[k];
889 v->DSTYAfterScaler[k] = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
890 v->DSTXAfterScaler[k] = DSTTotalPixelsAfterScaler - ((double) (v->DSTYAfterScaler[k] * myPipe->HTotal));
891
892 MyError = false;
893
894
895 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
896 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
897 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
898
899 if (v->GPUVMEnable) {
900 if (v->GPUVMMaxPageTableLevels >= 3) {
901 v->Tno_bw[k] = UrgentExtraLatency + trip_to_mem * ((v->GPUVMMaxPageTableLevels - 2) - 1);
902 } else
903 v->Tno_bw[k] = 0;
904 } else if (!myPipe->DCCEnable)
905 v->Tno_bw[k] = LineTime;
906 else
907 v->Tno_bw[k] = LineTime / 4;
908
909 dst_y_prefetch_equ = VStartup - (Tsetup + dml_max(TWait + TCalc, v->Tdmdl[k])) / LineTime
910 - (v->DSTYAfterScaler[k] + v->DSTXAfterScaler[k] / myPipe->HTotal);
911 dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
912
913 Lsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC);
914 Tsw_oto = Lsw_oto * LineTime;
915
916 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * v->BytePerPixelC[k]) / Tsw_oto;
917
918 if (v->GPUVMEnable == true) {
919 Tvm_oto = dml_max3(v->Tno_bw[k] + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
920 Tvm_trips,
921 LineTime / 4.0);
922 } else
923 Tvm_oto = LineTime / 4.0;
924
925 if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
926 Tr0_oto = dml_max3(
927 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
928 LineTime - Tvm_oto, LineTime / 4);
929 } else
930 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
931
932 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
933 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
934 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
935
936 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
937 Tpre_rounded = dst_y_prefetch_equ * LineTime;
938
939 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
940 dml_print("DML: dst_y_prefetch_equ: %f\n", dst_y_prefetch_equ);
941
942 dml_print("DML: LineTime: %f\n", LineTime);
943 dml_print("DML: VStartup: %d\n", VStartup);
944 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
945 dml_print("DML: Tsetup: %fus - time from vstartup to vready\n", Tsetup);
946 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
947 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
948 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
949 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
950 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
951 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", v->Tdmdl_vm[k]);
952 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", v->Tdmdl[k]);
953 dml_print("DML: dst_x_after_scl: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", v->DSTXAfterScaler[k]);
954 dml_print("DML: dst_y_after_scl: %d lines - number of lines of pipeline and buffer delay after scaler \n", (int)v->DSTYAfterScaler[k]);
955
956 *PrefetchBandwidth = 0;
957 *DestinationLinesToRequestVMInVBlank = 0;
958 *DestinationLinesToRequestRowInVBlank = 0;
959 *VRatioPrefetchY = 0;
960 *VRatioPrefetchC = 0;
961 *RequiredPrefetchPixDataBWLuma = 0;
962 if (dst_y_prefetch_equ > 1) {
963 double PrefetchBandwidth1 = 0;
964 double PrefetchBandwidth2 = 0;
965 double PrefetchBandwidth3 = 0;
966 double PrefetchBandwidth4 = 0;
967
968 if (Tpre_rounded - v->Tno_bw[k] > 0)
969 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
970 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
971 + PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY
972 + PrefetchSourceLinesC * swath_width_chroma_ub * v->BytePerPixelC[k])
973 / (Tpre_rounded - v->Tno_bw[k]);
974 else
975 PrefetchBandwidth1 = 0;
976
977 if (VStartup == MaxVStartup && (PrefetchBandwidth1 > 4 * prefetch_bw_oto) && (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - v->Tno_bw[k]) > 0) {
978 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - v->Tno_bw[k]);
979 }
980
981 if (Tpre_rounded - v->Tno_bw[k] - 2 * Tr0_trips_rounded > 0)
982 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame *
983 HostVMInefficiencyFactor + PrefetchSourceLinesY *
984 swath_width_luma_ub * BytePerPixelY +
985 PrefetchSourceLinesC * swath_width_chroma_ub *
986 v->BytePerPixelC[k]) /
987 (Tpre_rounded - v->Tno_bw[k] - 2 * Tr0_trips_rounded);
988 else
989 PrefetchBandwidth2 = 0;
990
991 if (Tpre_rounded - Tvm_trips_rounded > 0)
992 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow *
993 HostVMInefficiencyFactor + PrefetchSourceLinesY *
994 swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC *
995 swath_width_chroma_ub * v->BytePerPixelC[k]) / (Tpre_rounded -
996 Tvm_trips_rounded);
997 else
998 PrefetchBandwidth3 = 0;
999
1000 if (VStartup == MaxVStartup && (PrefetchBandwidth3 > 4 * prefetch_bw_oto) && Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1001 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - Tvm_trips_rounded);
1002 }
1003
1004 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1005 PrefetchBandwidth4 = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * v->BytePerPixelC[k])
1006 / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1007 else
1008 PrefetchBandwidth4 = 0;
1009
1010 {
1011 bool Case1OK;
1012 bool Case2OK;
1013 bool Case3OK;
1014
1015 if (PrefetchBandwidth1 > 0) {
1016 if (v->Tno_bw[k] + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
1017 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1018 Case1OK = true;
1019 } else {
1020 Case1OK = false;
1021 }
1022 } else {
1023 Case1OK = false;
1024 }
1025
1026 if (PrefetchBandwidth2 > 0) {
1027 if (v->Tno_bw[k] + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
1028 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1029 Case2OK = true;
1030 } else {
1031 Case2OK = false;
1032 }
1033 } else {
1034 Case2OK = false;
1035 }
1036
1037 if (PrefetchBandwidth3 > 0) {
1038 if (v->Tno_bw[k] + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3
1039 < Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1040 Case3OK = true;
1041 } else {
1042 Case3OK = false;
1043 }
1044 } else {
1045 Case3OK = false;
1046 }
1047
1048 if (Case1OK) {
1049 prefetch_bw_equ = PrefetchBandwidth1;
1050 } else if (Case2OK) {
1051 prefetch_bw_equ = PrefetchBandwidth2;
1052 } else if (Case3OK) {
1053 prefetch_bw_equ = PrefetchBandwidth3;
1054 } else {
1055 prefetch_bw_equ = PrefetchBandwidth4;
1056 }
1057
1058 dml_print("DML: prefetch_bw_equ: %f\n", prefetch_bw_equ);
1059
1060 if (prefetch_bw_equ > 0) {
1061 if (v->GPUVMEnable) {
1062 Tvm_equ = dml_max3(v->Tno_bw[k] + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1063 } else {
1064 Tvm_equ = LineTime / 4;
1065 }
1066
1067 if ((v->GPUVMEnable || myPipe->DCCEnable)) {
1068 Tr0_equ = dml_max4(
1069 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1070 Tr0_trips,
1071 (LineTime - Tvm_equ) / 2,
1072 LineTime / 4);
1073 } else {
1074 Tr0_equ = (LineTime - Tvm_equ) / 2;
1075 }
1076 } else {
1077 Tvm_equ = 0;
1078 Tr0_equ = 0;
1079 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1080 }
1081 }
1082
1083 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1084 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1085 TimeForFetchingMetaPTE = Tvm_oto;
1086 TimeForFetchingRowInVBlank = Tr0_oto;
1087 *PrefetchBandwidth = prefetch_bw_oto;
1088 } else {
1089 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1090 TimeForFetchingMetaPTE = Tvm_equ;
1091 TimeForFetchingRowInVBlank = Tr0_equ;
1092 *PrefetchBandwidth = prefetch_bw_equ;
1093 }
1094
1095 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1096
1097 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1098
1099
1100 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank
1101 - 2 * *DestinationLinesToRequestRowInVBlank;
1102
1103 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1104
1105 *VRatioPrefetchY = (double) PrefetchSourceLinesY
1106 / LinesToRequestPrefetchPixelData;
1107 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1108 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1109 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1110 *VRatioPrefetchY = dml_max((double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1111 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1112 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1113 } else {
1114 MyError = true;
1115 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1116 *VRatioPrefetchY = 0;
1117 }
1118 }
1119
1120 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1121 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1122
1123 if ((SwathHeightC > 4)) {
1124 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1125 *VRatioPrefetchC = dml_max(*VRatioPrefetchC,
1126 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1127 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1128 } else {
1129 MyError = true;
1130 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1131 *VRatioPrefetchC = 0;
1132 }
1133 }
1134
1135 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * BytePerPixelY * swath_width_luma_ub / LineTime;
1136 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * v->BytePerPixelC[k] * swath_width_chroma_ub / LineTime;
1137 } else {
1138 MyError = true;
1139 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1140 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1141 *VRatioPrefetchY = 0;
1142 *VRatioPrefetchC = 0;
1143 *RequiredPrefetchPixDataBWLuma = 0;
1144 *RequiredPrefetchPixDataBWChroma = 0;
1145 }
1146
1147 dml_print("DML: Tpre: %fus - sum of tim to request meta pte, 2 x data pte + meta data, swaths\n", (double)LinesToRequestPrefetchPixelData * LineTime + 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1148 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1149 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1150 dml_print("DML: Tr1: %fus - time to fetch second row of data pagetables and second row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1151 dml_print("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)LinesToRequestPrefetchPixelData * LineTime);
1152 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (v->DSTYAfterScaler[k] + ((v->DSTXAfterScaler[k]) / (double) myPipe->HTotal)) * LineTime);
1153 dml_print("DML: Tvstartup - Tsetup - Tcalc - Twait - Tpre - To > 0\n");
1154 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank - (v->DSTYAfterScaler[k] + ((v->DSTXAfterScaler[k]) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - Tsetup);
1155 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1156
1157 } else {
1158 MyError = true;
1159 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1160 }
1161
1162 {
1163 double prefetch_vm_bw = 0;
1164 double prefetch_row_bw = 0;
1165
1166 if (PDEAndMetaPTEBytesFrame == 0) {
1167 prefetch_vm_bw = 0;
1168 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1169 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1170 } else {
1171 prefetch_vm_bw = 0;
1172 MyError = true;
1173 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1174 }
1175 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1176 prefetch_row_bw = 0;
1177 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1178 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1179 } else {
1180 prefetch_row_bw = 0;
1181 MyError = true;
1182 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1183 }
1184
1185 v->prefetch_vmrow_bw[k] = dml_max(prefetch_vm_bw, prefetch_row_bw);
1186 }
1187
1188 if (MyError) {
1189 *PrefetchBandwidth = 0;
1190 *DestinationLinesToRequestVMInVBlank = 0;
1191 *DestinationLinesToRequestRowInVBlank = 0;
1192 *DestinationLinesForPrefetch = 0;
1193 *VRatioPrefetchY = 0;
1194 *VRatioPrefetchC = 0;
1195 *RequiredPrefetchPixDataBWLuma = 0;
1196 *RequiredPrefetchPixDataBWChroma = 0;
1197 }
1198
1199 return MyError;
1200 }
1201
RoundToDFSGranularityUp(double Clock,double VCOSpeed)1202 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1203 {
1204 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1205 }
1206
RoundToDFSGranularityDown(double Clock,double VCOSpeed)1207 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1208 {
1209 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1210 }
1211
CalculateDCCConfiguration(bool DCCEnabled,bool DCCProgrammingAssumesScanDirectionUnknown,enum source_format_class SourcePixelFormat,unsigned int SurfaceWidthLuma,unsigned int SurfaceWidthChroma,unsigned int SurfaceHeightLuma,unsigned int SurfaceHeightChroma,double DETBufferSize,unsigned int RequestHeight256ByteLuma,unsigned int RequestHeight256ByteChroma,enum dm_swizzle_mode TilingFormat,unsigned int BytePerPixelY,unsigned int BytePerPixelC,double BytePerPixelDETY,double BytePerPixelDETC,enum scan_direction_class ScanOrientation,unsigned int * MaxUncompressedBlockLuma,unsigned int * MaxUncompressedBlockChroma,unsigned int * MaxCompressedBlockLuma,unsigned int * MaxCompressedBlockChroma,unsigned int * IndependentBlockLuma,unsigned int * IndependentBlockChroma)1212 static void CalculateDCCConfiguration(
1213 bool DCCEnabled,
1214 bool DCCProgrammingAssumesScanDirectionUnknown,
1215 enum source_format_class SourcePixelFormat,
1216 unsigned int SurfaceWidthLuma,
1217 unsigned int SurfaceWidthChroma,
1218 unsigned int SurfaceHeightLuma,
1219 unsigned int SurfaceHeightChroma,
1220 double DETBufferSize,
1221 unsigned int RequestHeight256ByteLuma,
1222 unsigned int RequestHeight256ByteChroma,
1223 enum dm_swizzle_mode TilingFormat,
1224 unsigned int BytePerPixelY,
1225 unsigned int BytePerPixelC,
1226 double BytePerPixelDETY,
1227 double BytePerPixelDETC,
1228 enum scan_direction_class ScanOrientation,
1229 unsigned int *MaxUncompressedBlockLuma,
1230 unsigned int *MaxUncompressedBlockChroma,
1231 unsigned int *MaxCompressedBlockLuma,
1232 unsigned int *MaxCompressedBlockChroma,
1233 unsigned int *IndependentBlockLuma,
1234 unsigned int *IndependentBlockChroma)
1235 {
1236 int yuv420 = 0;
1237 int horz_div_l = 0;
1238 int horz_div_c = 0;
1239 int vert_div_l = 0;
1240 int vert_div_c = 0;
1241
1242 int req128_horz_wc_l = 0;
1243 int req128_horz_wc_c = 0;
1244 int req128_vert_wc_l = 0;
1245 int req128_vert_wc_c = 0;
1246 int segment_order_horz_contiguous_luma = 0;
1247 int segment_order_horz_contiguous_chroma = 0;
1248 int segment_order_vert_contiguous_luma = 0;
1249 int segment_order_vert_contiguous_chroma = 0;
1250
1251 long full_swath_bytes_horz_wc_l = 0;
1252 long full_swath_bytes_horz_wc_c = 0;
1253 long full_swath_bytes_vert_wc_l = 0;
1254 long full_swath_bytes_vert_wc_c = 0;
1255
1256 long swath_buf_size = 0;
1257 double detile_buf_vp_horz_limit = 0;
1258 double detile_buf_vp_vert_limit = 0;
1259
1260 long MAS_vp_horz_limit = 0;
1261 long MAS_vp_vert_limit = 0;
1262 long max_vp_horz_width = 0;
1263 long max_vp_vert_height = 0;
1264 long eff_surf_width_l = 0;
1265 long eff_surf_width_c = 0;
1266 long eff_surf_height_l = 0;
1267 long eff_surf_height_c = 0;
1268
1269 typedef enum {
1270 REQ_256Bytes,
1271 REQ_128BytesNonContiguous,
1272 REQ_128BytesContiguous,
1273 REQ_NA
1274 } RequestType;
1275
1276 RequestType RequestLuma;
1277 RequestType RequestChroma;
1278
1279 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1280 horz_div_l = 1;
1281 horz_div_c = 1;
1282 vert_div_l = 1;
1283 vert_div_c = 1;
1284
1285 if (BytePerPixelY == 1)
1286 vert_div_l = 0;
1287 if (BytePerPixelC == 1)
1288 vert_div_c = 0;
1289 if (BytePerPixelY == 8
1290 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t
1291 || TilingFormat == dm_sw_64kb_s_x))
1292 horz_div_l = 0;
1293 if (BytePerPixelC == 8
1294 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t
1295 || TilingFormat == dm_sw_64kb_s_x))
1296 horz_div_c = 0;
1297
1298 if (BytePerPixelC == 0) {
1299 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1300 detile_buf_vp_horz_limit = (double) swath_buf_size
1301 / ((double) RequestHeight256ByteLuma * BytePerPixelY
1302 / (1 + horz_div_l));
1303 detile_buf_vp_vert_limit = (double) swath_buf_size
1304 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1305 } else {
1306 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1307 detile_buf_vp_horz_limit = (double) swath_buf_size
1308 / ((double) RequestHeight256ByteLuma * BytePerPixelY
1309 / (1 + horz_div_l)
1310 + (double) RequestHeight256ByteChroma
1311 * BytePerPixelC / (1 + horz_div_c)
1312 / (1 + yuv420));
1313 detile_buf_vp_vert_limit = (double) swath_buf_size
1314 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)
1315 + 256.0 / RequestHeight256ByteChroma
1316 / (1 + vert_div_c) / (1 + yuv420));
1317 }
1318
1319 if (SourcePixelFormat == dm_420_10) {
1320 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1321 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1322 }
1323
1324 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1325 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1326
1327 MAS_vp_horz_limit = 5760;
1328 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1329 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1330 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1331 eff_surf_width_l =
1332 (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1333 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1334 eff_surf_height_l = (
1335 SurfaceHeightLuma > max_vp_vert_height ?
1336 max_vp_vert_height : SurfaceHeightLuma);
1337 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1338
1339 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1340 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1341 if (BytePerPixelC > 0) {
1342 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma
1343 * BytePerPixelC;
1344 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1345 } else {
1346 full_swath_bytes_horz_wc_c = 0;
1347 full_swath_bytes_vert_wc_c = 0;
1348 }
1349
1350 if (SourcePixelFormat == dm_420_10) {
1351 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1352 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1353 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1354 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1355 }
1356
1357 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1358 req128_horz_wc_l = 0;
1359 req128_horz_wc_c = 0;
1360 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c
1361 && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c
1362 <= DETBufferSize) {
1363 req128_horz_wc_l = 0;
1364 req128_horz_wc_c = 1;
1365 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c
1366 && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c
1367 <= DETBufferSize) {
1368 req128_horz_wc_l = 1;
1369 req128_horz_wc_c = 0;
1370 } else {
1371 req128_horz_wc_l = 1;
1372 req128_horz_wc_c = 1;
1373 }
1374
1375 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1376 req128_vert_wc_l = 0;
1377 req128_vert_wc_c = 0;
1378 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c
1379 && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c
1380 <= DETBufferSize) {
1381 req128_vert_wc_l = 0;
1382 req128_vert_wc_c = 1;
1383 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c
1384 && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c
1385 <= DETBufferSize) {
1386 req128_vert_wc_l = 1;
1387 req128_vert_wc_c = 0;
1388 } else {
1389 req128_vert_wc_l = 1;
1390 req128_vert_wc_c = 1;
1391 }
1392
1393 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1394 segment_order_horz_contiguous_luma = 0;
1395 } else {
1396 segment_order_horz_contiguous_luma = 1;
1397 }
1398 if ((BytePerPixelY == 8
1399 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x
1400 || TilingFormat == dm_sw_64kb_d_t
1401 || TilingFormat == dm_sw_64kb_r_x))
1402 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1403 segment_order_vert_contiguous_luma = 0;
1404 } else {
1405 segment_order_vert_contiguous_luma = 1;
1406 }
1407 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1408 segment_order_horz_contiguous_chroma = 0;
1409 } else {
1410 segment_order_horz_contiguous_chroma = 1;
1411 }
1412 if ((BytePerPixelC == 8
1413 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x
1414 || TilingFormat == dm_sw_64kb_d_t
1415 || TilingFormat == dm_sw_64kb_r_x))
1416 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1417 segment_order_vert_contiguous_chroma = 0;
1418 } else {
1419 segment_order_vert_contiguous_chroma = 1;
1420 }
1421
1422 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1423 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1424 RequestLuma = REQ_256Bytes;
1425 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0)
1426 || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1427 RequestLuma = REQ_128BytesNonContiguous;
1428 } else {
1429 RequestLuma = REQ_128BytesContiguous;
1430 }
1431 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1432 RequestChroma = REQ_256Bytes;
1433 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0)
1434 || (req128_vert_wc_c == 1
1435 && segment_order_vert_contiguous_chroma == 0)) {
1436 RequestChroma = REQ_128BytesNonContiguous;
1437 } else {
1438 RequestChroma = REQ_128BytesContiguous;
1439 }
1440 } else if (ScanOrientation != dm_vert) {
1441 if (req128_horz_wc_l == 0) {
1442 RequestLuma = REQ_256Bytes;
1443 } else if (segment_order_horz_contiguous_luma == 0) {
1444 RequestLuma = REQ_128BytesNonContiguous;
1445 } else {
1446 RequestLuma = REQ_128BytesContiguous;
1447 }
1448 if (req128_horz_wc_c == 0) {
1449 RequestChroma = REQ_256Bytes;
1450 } else if (segment_order_horz_contiguous_chroma == 0) {
1451 RequestChroma = REQ_128BytesNonContiguous;
1452 } else {
1453 RequestChroma = REQ_128BytesContiguous;
1454 }
1455 } else {
1456 if (req128_vert_wc_l == 0) {
1457 RequestLuma = REQ_256Bytes;
1458 } else if (segment_order_vert_contiguous_luma == 0) {
1459 RequestLuma = REQ_128BytesNonContiguous;
1460 } else {
1461 RequestLuma = REQ_128BytesContiguous;
1462 }
1463 if (req128_vert_wc_c == 0) {
1464 RequestChroma = REQ_256Bytes;
1465 } else if (segment_order_vert_contiguous_chroma == 0) {
1466 RequestChroma = REQ_128BytesNonContiguous;
1467 } else {
1468 RequestChroma = REQ_128BytesContiguous;
1469 }
1470 }
1471
1472 if (RequestLuma == REQ_256Bytes) {
1473 *MaxUncompressedBlockLuma = 256;
1474 *MaxCompressedBlockLuma = 256;
1475 *IndependentBlockLuma = 0;
1476 } else if (RequestLuma == REQ_128BytesContiguous) {
1477 *MaxUncompressedBlockLuma = 256;
1478 *MaxCompressedBlockLuma = 128;
1479 *IndependentBlockLuma = 128;
1480 } else {
1481 *MaxUncompressedBlockLuma = 256;
1482 *MaxCompressedBlockLuma = 64;
1483 *IndependentBlockLuma = 64;
1484 }
1485
1486 if (RequestChroma == REQ_256Bytes) {
1487 *MaxUncompressedBlockChroma = 256;
1488 *MaxCompressedBlockChroma = 256;
1489 *IndependentBlockChroma = 0;
1490 } else if (RequestChroma == REQ_128BytesContiguous) {
1491 *MaxUncompressedBlockChroma = 256;
1492 *MaxCompressedBlockChroma = 128;
1493 *IndependentBlockChroma = 128;
1494 } else {
1495 *MaxUncompressedBlockChroma = 256;
1496 *MaxCompressedBlockChroma = 64;
1497 *IndependentBlockChroma = 64;
1498 }
1499
1500 if (DCCEnabled != true || BytePerPixelC == 0) {
1501 *MaxUncompressedBlockChroma = 0;
1502 *MaxCompressedBlockChroma = 0;
1503 *IndependentBlockChroma = 0;
1504 }
1505
1506 if (DCCEnabled != true) {
1507 *MaxUncompressedBlockLuma = 0;
1508 *MaxCompressedBlockLuma = 0;
1509 *IndependentBlockLuma = 0;
1510 }
1511 }
1512
1513
CalculatePrefetchSourceLines(struct display_mode_lib * mode_lib,double VRatio,double vtaps,bool Interlace,bool ProgressiveToInterlaceUnitInOPP,unsigned int SwathHeight,unsigned int ViewportYStart,double * VInitPreFill,unsigned int * MaxNumSwath)1514 static double CalculatePrefetchSourceLines(
1515 struct display_mode_lib *mode_lib,
1516 double VRatio,
1517 double vtaps,
1518 bool Interlace,
1519 bool ProgressiveToInterlaceUnitInOPP,
1520 unsigned int SwathHeight,
1521 unsigned int ViewportYStart,
1522 double *VInitPreFill,
1523 unsigned int *MaxNumSwath)
1524 {
1525 unsigned int MaxPartialSwath = 0;
1526
1527 if (ProgressiveToInterlaceUnitInOPP)
1528 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1529 else
1530 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1531
1532 if (!mode_lib->vba.IgnoreViewportPositioning) {
1533
1534 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1535
1536 if (*VInitPreFill > 1.0)
1537 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1538 else
1539 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2)
1540 % SwathHeight;
1541 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1542
1543 } else {
1544
1545 if (ViewportYStart != 0)
1546 dml_print(
1547 "WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1548
1549 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1550
1551 if (*VInitPreFill > 1.0)
1552 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1553 else
1554 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1)
1555 % SwathHeight;
1556 }
1557
1558 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1559 }
1560
CalculateVMAndRowBytes(struct display_mode_lib * mode_lib,bool DCCEnable,unsigned int BlockHeight256Bytes,unsigned int BlockWidth256Bytes,enum source_format_class SourcePixelFormat,unsigned int SurfaceTiling,unsigned int BytePerPixel,enum scan_direction_class ScanDirection,unsigned int SwathWidth,unsigned int ViewportHeight,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,unsigned int GPUVMMinPageSize,unsigned int HostVMMinPageSize,unsigned int PTEBufferSizeInRequests,unsigned int Pitch,unsigned int DCCMetaPitch,unsigned int * MacroTileWidth,unsigned int * MetaRowByte,unsigned int * PixelPTEBytesPerRow,bool * PTEBufferSizeNotExceeded,unsigned int * dpte_row_width_ub,unsigned int * dpte_row_height,unsigned int * MetaRequestWidth,unsigned int * MetaRequestHeight,unsigned int * meta_row_width,unsigned int * meta_row_height,unsigned int * vm_group_bytes,unsigned int * dpte_group_bytes,unsigned int * PixelPTEReqWidth,unsigned int * PixelPTEReqHeight,unsigned int * PTERequestSize,unsigned int * DPDE0BytesFrame,unsigned int * MetaPTEBytesFrame)1561 static unsigned int CalculateVMAndRowBytes(
1562 struct display_mode_lib *mode_lib,
1563 bool DCCEnable,
1564 unsigned int BlockHeight256Bytes,
1565 unsigned int BlockWidth256Bytes,
1566 enum source_format_class SourcePixelFormat,
1567 unsigned int SurfaceTiling,
1568 unsigned int BytePerPixel,
1569 enum scan_direction_class ScanDirection,
1570 unsigned int SwathWidth,
1571 unsigned int ViewportHeight,
1572 bool GPUVMEnable,
1573 bool HostVMEnable,
1574 unsigned int HostVMMaxNonCachedPageTableLevels,
1575 unsigned int GPUVMMinPageSize,
1576 unsigned int HostVMMinPageSize,
1577 unsigned int PTEBufferSizeInRequests,
1578 unsigned int Pitch,
1579 unsigned int DCCMetaPitch,
1580 unsigned int *MacroTileWidth,
1581 unsigned int *MetaRowByte,
1582 unsigned int *PixelPTEBytesPerRow,
1583 bool *PTEBufferSizeNotExceeded,
1584 unsigned int *dpte_row_width_ub,
1585 unsigned int *dpte_row_height,
1586 unsigned int *MetaRequestWidth,
1587 unsigned int *MetaRequestHeight,
1588 unsigned int *meta_row_width,
1589 unsigned int *meta_row_height,
1590 unsigned int *vm_group_bytes,
1591 unsigned int *dpte_group_bytes,
1592 unsigned int *PixelPTEReqWidth,
1593 unsigned int *PixelPTEReqHeight,
1594 unsigned int *PTERequestSize,
1595 unsigned int *DPDE0BytesFrame,
1596 unsigned int *MetaPTEBytesFrame)
1597 {
1598 unsigned int MPDEBytesFrame = 0;
1599 unsigned int DCCMetaSurfaceBytes = 0;
1600 unsigned int MacroTileSizeBytes = 0;
1601 unsigned int MacroTileHeight = 0;
1602 unsigned int ExtraDPDEBytesFrame = 0;
1603 unsigned int PDEAndMetaPTEBytesFrame = 0;
1604 unsigned int PixelPTEReqHeightPTEs = 0;
1605 unsigned int HostVMDynamicLevels = 0;
1606
1607 double FractionOfPTEReturnDrop;
1608
1609 if (GPUVMEnable == true && HostVMEnable == true) {
1610 if (HostVMMinPageSize < 2048) {
1611 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1612 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1613 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1614 } else {
1615 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1616 }
1617 }
1618
1619 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1620 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1621 if (ScanDirection != dm_vert) {
1622 *meta_row_height = *MetaRequestHeight;
1623 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth)
1624 + *MetaRequestWidth;
1625 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1626 } else {
1627 *meta_row_height = *MetaRequestWidth;
1628 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight)
1629 + *MetaRequestHeight;
1630 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1631 }
1632 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes)
1633 + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1634 if (GPUVMEnable == true) {
1635 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1636 MPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 1);
1637 } else {
1638 *MetaPTEBytesFrame = 0;
1639 MPDEBytesFrame = 0;
1640 }
1641
1642 if (DCCEnable != true) {
1643 *MetaPTEBytesFrame = 0;
1644 MPDEBytesFrame = 0;
1645 *MetaRowByte = 0;
1646 }
1647
1648 if (SurfaceTiling == dm_sw_linear) {
1649 MacroTileSizeBytes = 256;
1650 MacroTileHeight = BlockHeight256Bytes;
1651 } else {
1652 MacroTileSizeBytes = 65536;
1653 MacroTileHeight = 16 * BlockHeight256Bytes;
1654 }
1655 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1656
1657 if (GPUVMEnable == true && mode_lib->vba.GPUVMMaxPageTableLevels > 1) {
1658 if (ScanDirection != dm_vert) {
1659 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1660 } else {
1661 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1662 }
1663 ExtraDPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 2);
1664 } else {
1665 *DPDE0BytesFrame = 0;
1666 ExtraDPDEBytesFrame = 0;
1667 }
1668
1669 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame
1670 + ExtraDPDEBytesFrame;
1671
1672 if (HostVMEnable == true) {
1673 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1674 }
1675
1676 if (SurfaceTiling == dm_sw_linear) {
1677 PixelPTEReqHeightPTEs = 1;
1678 *PixelPTEReqHeight = 1;
1679 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1680 *PTERequestSize = 64;
1681 FractionOfPTEReturnDrop = 0;
1682 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1683 PixelPTEReqHeightPTEs = 16;
1684 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1685 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1686 *PTERequestSize = 128;
1687 FractionOfPTEReturnDrop = 0;
1688 } else {
1689 PixelPTEReqHeightPTEs = 1;
1690 *PixelPTEReqHeight = MacroTileHeight;
1691 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1692 *PTERequestSize = 64;
1693 FractionOfPTEReturnDrop = 0;
1694 }
1695
1696 if (SurfaceTiling == dm_sw_linear) {
1697 if (PTEBufferSizeInRequests == 0)
1698 *dpte_row_height = 1;
1699 else
1700 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
1701 *dpte_row_width_ub = (dml_ceil(((double) SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1702 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1703 } else if (ScanDirection != dm_vert) {
1704 *dpte_row_height = *PixelPTEReqHeight;
1705 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1706 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1707 } else {
1708 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1709 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1710 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1711 }
1712 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop)
1713 <= 64 * PTEBufferSizeInRequests) {
1714 *PTEBufferSizeNotExceeded = true;
1715 } else {
1716 *PTEBufferSizeNotExceeded = false;
1717 }
1718
1719 if (GPUVMEnable != true) {
1720 *PixelPTEBytesPerRow = 0;
1721 *PTEBufferSizeNotExceeded = true;
1722 }
1723 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
1724
1725 if (HostVMEnable == true) {
1726 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
1727 }
1728
1729 if (HostVMEnable == true) {
1730 *vm_group_bytes = 512;
1731 *dpte_group_bytes = 512;
1732 } else if (GPUVMEnable == true) {
1733 *vm_group_bytes = 2048;
1734 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
1735 *dpte_group_bytes = 512;
1736 } else {
1737 *dpte_group_bytes = 2048;
1738 }
1739 } else {
1740 *vm_group_bytes = 0;
1741 *dpte_group_bytes = 0;
1742 }
1743
1744 return PDEAndMetaPTEBytesFrame;
1745 }
1746
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib * mode_lib)1747 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
1748 struct display_mode_lib *mode_lib)
1749 {
1750 struct vba_vars_st *v = &mode_lib->vba;
1751 unsigned int j, k;
1752 long ReorderBytes = 0;
1753 unsigned int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
1754 double MaxTotalRDBandwidth = 0;
1755 double MaxTotalRDBandwidthNoUrgentBurst = 0;
1756 bool DestinationLineTimesForPrefetchLessThan2 = false;
1757 bool VRatioPrefetchMoreThan4 = false;
1758 double TWait;
1759
1760 v->WritebackDISPCLK = 0.0;
1761 v->DISPCLKWithRamping = 0;
1762 v->DISPCLKWithoutRamping = 0;
1763 v->GlobalDPPCLK = 0.0;
1764 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */
1765 v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] = dml_min3(
1766 v->ReturnBusWidth * v->DCFCLK,
1767 v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth,
1768 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
1769 if (v->HostVMEnable != true) {
1770 v->ReturnBW = v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100;
1771 } else {
1772 v->ReturnBW = v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100;
1773 }
1774 /* End DAL custom code */
1775
1776 // DISPCLK and DPPCLK Calculation
1777 //
1778 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1779 if (v->WritebackEnable[k]) {
1780 v->WritebackDISPCLK = dml_max(v->WritebackDISPCLK,
1781 dml30_CalculateWriteBackDISPCLK(
1782 v->WritebackPixelFormat[k],
1783 v->PixelClock[k],
1784 v->WritebackHRatio[k],
1785 v->WritebackVRatio[k],
1786 v->WritebackHTaps[k],
1787 v->WritebackVTaps[k],
1788 v->WritebackSourceWidth[k],
1789 v->WritebackDestinationWidth[k],
1790 v->HTotal[k],
1791 v->WritebackLineBufferSize));
1792 }
1793 }
1794
1795 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1796 if (v->HRatio[k] > 1) {
1797 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
1798 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
1799 } else {
1800 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
1801 v->MaxDCHUBToPSCLThroughput,
1802 v->MaxPSCLToLBThroughput);
1803 }
1804
1805 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
1806 * dml_max(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
1807 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
1808
1809 if ((v->htaps[k] > 6 || v->vtaps[k] > 6)
1810 && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
1811 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
1812 }
1813
1814 if ((v->SourcePixelFormat[k] != dm_420_8
1815 && v->SourcePixelFormat[k] != dm_420_10
1816 && v->SourcePixelFormat[k] != dm_420_12
1817 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
1818 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
1819 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
1820 } else {
1821 if (v->HRatioChroma[k] > 1) {
1822 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
1823 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
1824 } else {
1825 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
1826 v->MaxDCHUBToPSCLThroughput,
1827 v->MaxPSCLToLBThroughput);
1828 }
1829 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
1830 * dml_max3(v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
1831 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k], 1.0);
1832
1833 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6)
1834 && v->DPPCLKUsingSingleDPPChroma
1835 < 2 * v->PixelClock[k]) {
1836 v->DPPCLKUsingSingleDPPChroma = 2
1837 * v->PixelClock[k];
1838 }
1839
1840 v->DPPCLKUsingSingleDPP[k] = dml_max(
1841 v->DPPCLKUsingSingleDPPLuma,
1842 v->DPPCLKUsingSingleDPPChroma);
1843 }
1844 }
1845
1846 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1847 if (v->BlendingAndTiming[k] != k)
1848 continue;
1849 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
1850 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
1851 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1852 * (1 + v->DISPCLKRampingMargin / 100));
1853 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
1854 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
1855 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
1856 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
1857 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1858 * (1 + v->DISPCLKRampingMargin / 100));
1859 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
1860 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
1861 } else {
1862 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
1863 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1864 * (1 + v->DISPCLKRampingMargin / 100));
1865 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
1866 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
1867 }
1868 }
1869
1870 v->DISPCLKWithRamping = dml_max(
1871 v->DISPCLKWithRamping,
1872 v->WritebackDISPCLK);
1873 v->DISPCLKWithoutRamping = dml_max(
1874 v->DISPCLKWithoutRamping,
1875 v->WritebackDISPCLK);
1876
1877 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
1878 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
1879 v->DISPCLKWithRamping,
1880 v->DISPCLKDPPCLKVCOSpeed);
1881 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
1882 v->DISPCLKWithoutRamping,
1883 v->DISPCLKDPPCLKVCOSpeed);
1884 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
1885 v->soc.clock_limits[mode_lib->soc.num_states - 1].dispclk_mhz,
1886 v->DISPCLKDPPCLKVCOSpeed);
1887 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity
1888 > v->MaxDispclkRoundedToDFSGranularity) {
1889 v->DISPCLK_calculated =
1890 v->DISPCLKWithoutRampingRoundedToDFSGranularity;
1891 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity
1892 > v->MaxDispclkRoundedToDFSGranularity) {
1893 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
1894 } else {
1895 v->DISPCLK_calculated =
1896 v->DISPCLKWithRampingRoundedToDFSGranularity;
1897 }
1898 v->DISPCLK = v->DISPCLK_calculated;
1899 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
1900
1901 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1902 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k]
1903 / v->DPPPerPlane[k]
1904 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1905 v->GlobalDPPCLK = dml_max(
1906 v->GlobalDPPCLK,
1907 v->DPPCLK_calculated[k]);
1908 }
1909 v->GlobalDPPCLK = RoundToDFSGranularityUp(
1910 v->GlobalDPPCLK,
1911 v->DISPCLKDPPCLKVCOSpeed);
1912 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1913 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255
1914 * dml_ceil(
1915 v->DPPCLK_calculated[k] * 255.0
1916 / v->GlobalDPPCLK,
1917 1);
1918 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
1919 v->DPPCLK[k] = v->DPPCLK_calculated[k];
1920 }
1921
1922 // Urgent and B P-State/DRAM Clock Change Watermark
1923 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
1924 DTRACE(" return_bus_bw = %f", v->ReturnBW);
1925
1926 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1927 dml30_CalculateBytePerPixelAnd256BBlockSizes(
1928 v->SourcePixelFormat[k],
1929 v->SurfaceTiling[k],
1930 &v->BytePerPixelY[k],
1931 &v->BytePerPixelC[k],
1932 &v->BytePerPixelDETY[k],
1933 &v->BytePerPixelDETC[k],
1934 &v->BlockHeight256BytesY[k],
1935 &v->BlockHeight256BytesC[k],
1936 &v->BlockWidth256BytesY[k],
1937 &v->BlockWidth256BytesC[k]);
1938 }
1939
1940 CalculateSwathWidth(
1941 false,
1942 v->NumberOfActivePlanes,
1943 v->SourcePixelFormat,
1944 v->SourceScan,
1945 v->ViewportWidth,
1946 v->ViewportHeight,
1947 v->SurfaceWidthY,
1948 v->SurfaceWidthC,
1949 v->SurfaceHeightY,
1950 v->SurfaceHeightC,
1951 v->ODMCombineEnabled,
1952 v->BytePerPixelY,
1953 v->BytePerPixelC,
1954 v->BlockHeight256BytesY,
1955 v->BlockHeight256BytesC,
1956 v->BlockWidth256BytesY,
1957 v->BlockWidth256BytesC,
1958 v->BlendingAndTiming,
1959 v->HActive,
1960 v->HRatio,
1961 v->DPPPerPlane,
1962 v->SwathWidthSingleDPPY,
1963 v->SwathWidthSingleDPPC,
1964 v->SwathWidthY,
1965 v->SwathWidthC,
1966 v->dummyinteger3,
1967 v->dummyinteger4,
1968 v->swath_width_luma_ub,
1969 v->swath_width_chroma_ub);
1970
1971
1972 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1973 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
1974 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioChroma[k];
1975 DTRACE("read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
1976 }
1977
1978
1979 // DCFCLK Deep Sleep
1980 CalculateDCFCLKDeepSleep(
1981 mode_lib,
1982 v->NumberOfActivePlanes,
1983 v->BytePerPixelY,
1984 v->BytePerPixelC,
1985 v->VRatio,
1986 v->VRatioChroma,
1987 v->SwathWidthY,
1988 v->SwathWidthC,
1989 v->DPPPerPlane,
1990 v->HRatio,
1991 v->HRatioChroma,
1992 v->PixelClock,
1993 v->PSCL_THROUGHPUT_LUMA,
1994 v->PSCL_THROUGHPUT_CHROMA,
1995 v->DPPCLK,
1996 v->ReadBandwidthPlaneLuma,
1997 v->ReadBandwidthPlaneChroma,
1998 v->ReturnBusWidth,
1999 &v->DCFCLKDeepSleep);
2000
2001 // DSCCLK
2002 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2003 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2004 v->DSCCLK_calculated[k] = 0.0;
2005 } else {
2006 if (v->OutputFormat[k] == dm_420)
2007 v->DSCFormatFactor = 2;
2008 else if (v->OutputFormat[k] == dm_444)
2009 v->DSCFormatFactor = 1;
2010 else if (v->OutputFormat[k] == dm_n422)
2011 v->DSCFormatFactor = 2;
2012 else
2013 v->DSCFormatFactor = 1;
2014 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2015 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12
2016 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2017 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2018 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6
2019 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2020 else
2021 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3
2022 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2023 }
2024 }
2025
2026 // DSC Delay
2027 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2028 double BPP = v->OutputBppPerState[k][v->VoltageLevel];
2029
2030 if (v->DSCEnabled[k] && BPP != 0) {
2031 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2032 v->DSCDelay[k] = dscceComputeDelay(v->DSCInputBitPerComponent[k],
2033 BPP,
2034 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2035 v->NumberOfDSCSlices[k],
2036 v->OutputFormat[k],
2037 v->Output[k])
2038 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2039 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2040 v->DSCDelay[k] = 2 * dscceComputeDelay(v->DSCInputBitPerComponent[k],
2041 BPP,
2042 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2043 v->NumberOfDSCSlices[k] / 2.0,
2044 v->OutputFormat[k],
2045 v->Output[k])
2046 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2047 } else {
2048 v->DSCDelay[k] = 4 * dscceComputeDelay(v->DSCInputBitPerComponent[k],
2049 BPP,
2050 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2051 v->NumberOfDSCSlices[k] / 4.0,
2052 v->OutputFormat[k],
2053 v->Output[k])
2054 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2055 }
2056 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2057 } else {
2058 v->DSCDelay[k] = 0;
2059 }
2060 }
2061
2062 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2063 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2064 if (j != k && v->BlendingAndTiming[k] == j
2065 && v->DSCEnabled[j])
2066 v->DSCDelay[k] = v->DSCDelay[j];
2067
2068 // Prefetch
2069 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2070 unsigned int PDEAndMetaPTEBytesFrameY = 0;
2071 unsigned int PixelPTEBytesPerRowY = 0;
2072 unsigned int MetaRowByteY = 0;
2073 unsigned int MetaRowByteC = 0;
2074 unsigned int PDEAndMetaPTEBytesFrameC = 0;
2075 unsigned int PixelPTEBytesPerRowC = 0;
2076 bool PTEBufferSizeNotExceededY = 0;
2077 bool PTEBufferSizeNotExceededC = 0;
2078
2079
2080 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2081 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2082 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2083 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2084 } else {
2085 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2086 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2087
2088 }
2089 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2090 mode_lib,
2091 v->DCCEnable[k],
2092 v->BlockHeight256BytesC[k],
2093 v->BlockWidth256BytesC[k],
2094 v->SourcePixelFormat[k],
2095 v->SurfaceTiling[k],
2096 v->BytePerPixelC[k],
2097 v->SourceScan[k],
2098 v->SwathWidthC[k],
2099 v->ViewportHeightChroma[k],
2100 v->GPUVMEnable,
2101 v->HostVMEnable,
2102 v->HostVMMaxNonCachedPageTableLevels,
2103 v->GPUVMMinPageSize,
2104 v->HostVMMinPageSize,
2105 v->PTEBufferSizeInRequestsForChroma,
2106 v->PitchC[k],
2107 v->DCCMetaPitchC[k],
2108 &v->MacroTileWidthC[k],
2109 &MetaRowByteC,
2110 &PixelPTEBytesPerRowC,
2111 &PTEBufferSizeNotExceededC,
2112 &v->dpte_row_width_chroma_ub[k],
2113 &v->dpte_row_height_chroma[k],
2114 &v->meta_req_width_chroma[k],
2115 &v->meta_req_height_chroma[k],
2116 &v->meta_row_width_chroma[k],
2117 &v->meta_row_height_chroma[k],
2118 &v->dummyinteger1,
2119 &v->dummyinteger2,
2120 &v->PixelPTEReqWidthC[k],
2121 &v->PixelPTEReqHeightC[k],
2122 &v->PTERequestSizeC[k],
2123 &v->dpde0_bytes_per_frame_ub_c[k],
2124 &v->meta_pte_bytes_per_frame_ub_c[k]);
2125
2126 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2127 mode_lib,
2128 v->VRatioChroma[k],
2129 v->VTAPsChroma[k],
2130 v->Interlace[k],
2131 v->ProgressiveToInterlaceUnitInOPP,
2132 v->SwathHeightC[k],
2133 v->ViewportYStartC[k],
2134 &v->VInitPreFillC[k],
2135 &v->MaxNumSwathC[k]);
2136 } else {
2137 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2138 v->PTEBufferSizeInRequestsForChroma = 0;
2139 PixelPTEBytesPerRowC = 0;
2140 PDEAndMetaPTEBytesFrameC = 0;
2141 MetaRowByteC = 0;
2142 v->MaxNumSwathC[k] = 0;
2143 v->PrefetchSourceLinesC[k] = 0;
2144 }
2145
2146 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2147 mode_lib,
2148 v->DCCEnable[k],
2149 v->BlockHeight256BytesY[k],
2150 v->BlockWidth256BytesY[k],
2151 v->SourcePixelFormat[k],
2152 v->SurfaceTiling[k],
2153 v->BytePerPixelY[k],
2154 v->SourceScan[k],
2155 v->SwathWidthY[k],
2156 v->ViewportHeight[k],
2157 v->GPUVMEnable,
2158 v->HostVMEnable,
2159 v->HostVMMaxNonCachedPageTableLevels,
2160 v->GPUVMMinPageSize,
2161 v->HostVMMinPageSize,
2162 v->PTEBufferSizeInRequestsForLuma,
2163 v->PitchY[k],
2164 v->DCCMetaPitchY[k],
2165 &v->MacroTileWidthY[k],
2166 &MetaRowByteY,
2167 &PixelPTEBytesPerRowY,
2168 &PTEBufferSizeNotExceededY,
2169 &v->dpte_row_width_luma_ub[k],
2170 &v->dpte_row_height[k],
2171 &v->meta_req_width[k],
2172 &v->meta_req_height[k],
2173 &v->meta_row_width[k],
2174 &v->meta_row_height[k],
2175 &v->vm_group_bytes[k],
2176 &v->dpte_group_bytes[k],
2177 &v->PixelPTEReqWidthY[k],
2178 &v->PixelPTEReqHeightY[k],
2179 &v->PTERequestSizeY[k],
2180 &v->dpde0_bytes_per_frame_ub_l[k],
2181 &v->meta_pte_bytes_per_frame_ub_l[k]);
2182
2183 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2184 mode_lib,
2185 v->VRatio[k],
2186 v->vtaps[k],
2187 v->Interlace[k],
2188 v->ProgressiveToInterlaceUnitInOPP,
2189 v->SwathHeightY[k],
2190 v->ViewportYStartY[k],
2191 &v->VInitPreFillY[k],
2192 &v->MaxNumSwathY[k]);
2193 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2194 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY
2195 + PDEAndMetaPTEBytesFrameC;
2196 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2197
2198 CalculateRowBandwidth(
2199 v->GPUVMEnable,
2200 v->SourcePixelFormat[k],
2201 v->VRatio[k],
2202 v->VRatioChroma[k],
2203 v->DCCEnable[k],
2204 v->HTotal[k] / v->PixelClock[k],
2205 MetaRowByteY,
2206 MetaRowByteC,
2207 v->meta_row_height[k],
2208 v->meta_row_height_chroma[k],
2209 PixelPTEBytesPerRowY,
2210 PixelPTEBytesPerRowC,
2211 v->dpte_row_height[k],
2212 v->dpte_row_height_chroma[k],
2213 &v->meta_row_bw[k],
2214 &v->dpte_row_bw[k]);
2215 }
2216
2217 v->TotalDCCActiveDPP = 0;
2218 v->TotalActiveDPP = 0;
2219 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2220 v->TotalActiveDPP = v->TotalActiveDPP
2221 + v->DPPPerPlane[k];
2222 if (v->DCCEnable[k])
2223 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP
2224 + v->DPPPerPlane[k];
2225 }
2226
2227
2228 ReorderBytes = v->NumberOfChannels * dml_max3(
2229 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2230 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2231 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2232
2233 v->UrgentExtraLatency = CalculateExtraLatency(
2234 v->RoundTripPingLatencyCycles,
2235 ReorderBytes,
2236 v->DCFCLK,
2237 v->TotalActiveDPP,
2238 v->PixelChunkSizeInKByte,
2239 v->TotalDCCActiveDPP,
2240 v->MetaChunkSize,
2241 v->ReturnBW,
2242 v->GPUVMEnable,
2243 v->HostVMEnable,
2244 v->NumberOfActivePlanes,
2245 v->DPPPerPlane,
2246 v->dpte_group_bytes,
2247 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2248 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2249 v->HostVMMinPageSize,
2250 v->HostVMMaxNonCachedPageTableLevels);
2251
2252 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2253
2254 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2255 if (v->BlendingAndTiming[k] == k) {
2256 if (v->WritebackEnable[k] == true) {
2257 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency +
2258 CalculateWriteBackDelay(v->WritebackPixelFormat[k],
2259 v->WritebackHRatio[k],
2260 v->WritebackVRatio[k],
2261 v->WritebackVTaps[k],
2262 v->WritebackDestinationWidth[k],
2263 v->WritebackDestinationHeight[k],
2264 v->WritebackSourceHeight[k],
2265 v->HTotal[k]) / v->DISPCLK;
2266 } else
2267 v->WritebackDelay[v->VoltageLevel][k] = 0;
2268 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2269 if (v->BlendingAndTiming[j] == k
2270 && v->WritebackEnable[j] == true) {
2271 v->WritebackDelay[v->VoltageLevel][k] = dml_max(v->WritebackDelay[v->VoltageLevel][k],
2272 v->WritebackLatency + CalculateWriteBackDelay(
2273 v->WritebackPixelFormat[j],
2274 v->WritebackHRatio[j],
2275 v->WritebackVRatio[j],
2276 v->WritebackVTaps[j],
2277 v->WritebackDestinationWidth[j],
2278 v->WritebackDestinationHeight[j],
2279 v->WritebackSourceHeight[j],
2280 v->HTotal[k]) / v->DISPCLK);
2281 }
2282 }
2283 }
2284 }
2285
2286 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2287 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2288 if (v->BlendingAndTiming[k] == j)
2289 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2290
2291 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2292 v->MaxVStartupLines[k] = v->VTotal[k] - v->VActive[k] - dml_max(1.0, dml_ceil((double) v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1));
2293 }
2294
2295 v->MaximumMaxVStartupLines = 0;
2296 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2297 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2298
2299 if (v->DRAMClockChangeLatencyOverride > 0.0) {
2300 v->FinalDRAMClockChangeLatency = v->DRAMClockChangeLatencyOverride;
2301 } else {
2302 v->FinalDRAMClockChangeLatency = v->DRAMClockChangeLatency;
2303 }
2304 v->UrgentLatency = CalculateUrgentLatency(v->UrgentLatencyPixelDataOnly, v->UrgentLatencyPixelMixedWithVMData, v->UrgentLatencyVMDataOnly, v->DoUrgentLatencyAdjustment, v->UrgentLatencyAdjustmentFabricClockComponent, v->UrgentLatencyAdjustmentFabricClockReference, v->FabricClock);
2305
2306
2307 v->FractionOfUrgentBandwidth = 0.0;
2308 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2309
2310 v->VStartupLines = 13;
2311
2312 do {
2313 MaxTotalRDBandwidth = 0;
2314 MaxTotalRDBandwidthNoUrgentBurst = 0;
2315 DestinationLineTimesForPrefetchLessThan2 = false;
2316 VRatioPrefetchMoreThan4 = false;
2317 TWait = CalculateTWait(
2318 PrefetchMode,
2319 v->FinalDRAMClockChangeLatency,
2320 v->UrgentLatency,
2321 v->SREnterPlusExitTime);
2322
2323 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2324 Pipe myPipe = { 0 };
2325
2326 myPipe.DPPCLK = v->DPPCLK[k];
2327 myPipe.DISPCLK = v->DISPCLK;
2328 myPipe.PixelClock = v->PixelClock[k];
2329 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2330 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2331 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2332 myPipe.SourceScan = v->SourceScan[k];
2333 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2334 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2335 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2336 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2337 myPipe.InterlaceEnable = v->Interlace[k];
2338 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2339 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2340 myPipe.HTotal = v->HTotal[k];
2341 myPipe.DCCEnable = v->DCCEnable[k];
2342 myPipe.ODMCombineEnabled = !!v->ODMCombineEnabled[k];
2343
2344 v->ErrorResult[k] = CalculatePrefetchSchedule(
2345 mode_lib,
2346 k,
2347 &myPipe,
2348 v->DSCDelay[k],
2349 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2350 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2351 v->MaxVStartupLines[k],
2352 v->UrgentLatency,
2353 v->UrgentExtraLatency,
2354 v->TCalc,
2355 v->PDEAndMetaPTEBytesFrame[k],
2356 v->MetaRowByte[k],
2357 v->PixelPTEBytesPerRow[k],
2358 v->PrefetchSourceLinesY[k],
2359 v->SwathWidthY[k],
2360 v->BytePerPixelY[k],
2361 v->VInitPreFillY[k],
2362 v->MaxNumSwathY[k],
2363 v->PrefetchSourceLinesC[k],
2364 v->SwathWidthC[k],
2365 v->VInitPreFillC[k],
2366 v->MaxNumSwathC[k],
2367 v->swath_width_luma_ub[k],
2368 v->swath_width_chroma_ub[k],
2369 v->SwathHeightY[k],
2370 v->SwathHeightC[k],
2371 TWait,
2372 &v->DestinationLinesForPrefetch[k],
2373 &v->PrefetchBandwidth[k],
2374 &v->DestinationLinesToRequestVMInVBlank[k],
2375 &v->DestinationLinesToRequestRowInVBlank[k],
2376 &v->VRatioPrefetchY[k],
2377 &v->VRatioPrefetchC[k],
2378 &v->RequiredPrefetchPixDataBWLuma[k],
2379 &v->RequiredPrefetchPixDataBWChroma[k],
2380 &v->NotEnoughTimeForDynamicMetadata[k]);
2381 if (v->BlendingAndTiming[k] == k) {
2382 double TotalRepeaterDelayTime = v->MaxInterDCNTileRepeaters * (2 / v->DPPCLK[k] + 3 / v->DISPCLK);
2383 v->VUpdateWidthPix[k] = (14 / v->DCFCLKDeepSleep + 12 / v->DPPCLK[k] + TotalRepeaterDelayTime) * v->PixelClock[k];
2384 v->VReadyOffsetPix[k] = dml_max(150.0 / v->DPPCLK[k], TotalRepeaterDelayTime + 20 / v->DCFCLKDeepSleep + 10 / v->DPPCLK[k]) * v->PixelClock[k];
2385 v->VUpdateOffsetPix[k] = dml_ceil(v->HTotal[k] / 4.0, 1);
2386 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2387 } else {
2388 int x = v->BlendingAndTiming[k];
2389 double TotalRepeaterDelayTime = v->MaxInterDCNTileRepeaters * (2 / v->DPPCLK[k] + 3 / v->DISPCLK);
2390 v->VUpdateWidthPix[k] = (14 / v->DCFCLKDeepSleep + 12 / v->DPPCLK[k] + TotalRepeaterDelayTime) * v->PixelClock[x];
2391 v->VReadyOffsetPix[k] = dml_max(150.0 / v->DPPCLK[k], TotalRepeaterDelayTime + 20 / v->DCFCLKDeepSleep + 10 / v->DPPCLK[k]) * v->PixelClock[x];
2392 v->VUpdateOffsetPix[k] = dml_ceil(v->HTotal[x] / 4.0, 1);
2393 if (!v->MaxVStartupLines[x])
2394 v->MaxVStartupLines[x] = v->MaxVStartupLines[k];
2395 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[x]);
2396 }
2397 }
2398
2399 v->NotEnoughUrgentLatencyHiding[0][0] = false;
2400 v->NotEnoughUrgentLatencyHidingPre = false;
2401
2402 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2403 v->cursor_bw[k] = v->NumberOfCursors[k]
2404 * v->CursorWidth[k][0] * v->CursorBPP[k][0]
2405 / 8.0
2406 / (v->HTotal[k] / v->PixelClock[k])
2407 * v->VRatio[k];
2408 v->cursor_bw_pre[k] = v->NumberOfCursors[k]
2409 * v->CursorWidth[k][0] * v->CursorBPP[k][0]
2410 / 8.0
2411 / (v->HTotal[k] / v->PixelClock[k])
2412 * v->VRatioPrefetchY[k];
2413
2414 CalculateUrgentBurstFactor(
2415 v->swath_width_luma_ub[k],
2416 v->swath_width_chroma_ub[k],
2417 v->DETBufferSizeInKByte[0],
2418 v->SwathHeightY[k],
2419 v->SwathHeightC[k],
2420 v->HTotal[k] / v->PixelClock[k],
2421 v->UrgentLatency,
2422 v->CursorBufferSize,
2423 v->CursorWidth[k][0],
2424 v->CursorBPP[k][0],
2425 v->VRatio[k],
2426 v->VRatioChroma[k],
2427 v->BytePerPixelDETY[k],
2428 v->BytePerPixelDETC[k],
2429 v->DETBufferSizeY[k],
2430 v->DETBufferSizeC[k],
2431 &v->UrgentBurstFactorCursor[k],
2432 &v->UrgentBurstFactorLuma[k],
2433 &v->UrgentBurstFactorChroma[k],
2434 &v->NoUrgentLatencyHiding[k]);
2435
2436 CalculateUrgentBurstFactor(
2437 v->swath_width_luma_ub[k],
2438 v->swath_width_chroma_ub[k],
2439 v->DETBufferSizeInKByte[0],
2440 v->SwathHeightY[k],
2441 v->SwathHeightC[k],
2442 v->HTotal[k] / v->PixelClock[k],
2443 v->UrgentLatency,
2444 v->CursorBufferSize,
2445 v->CursorWidth[k][0],
2446 v->CursorBPP[k][0],
2447 v->VRatioPrefetchY[k],
2448 v->VRatioPrefetchC[k],
2449 v->BytePerPixelDETY[k],
2450 v->BytePerPixelDETC[k],
2451 v->DETBufferSizeY[k],
2452 v->DETBufferSizeC[k],
2453 &v->UrgentBurstFactorCursorPre[k],
2454 &v->UrgentBurstFactorLumaPre[k],
2455 &v->UrgentBurstFactorChromaPre[k],
2456 &v->NoUrgentLatencyHidingPre[k]);
2457
2458 MaxTotalRDBandwidth = MaxTotalRDBandwidth +
2459 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2460 v->ReadBandwidthPlaneLuma[k] *
2461 v->UrgentBurstFactorLuma[k] +
2462 v->ReadBandwidthPlaneChroma[k] *
2463 v->UrgentBurstFactorChroma[k] +
2464 v->cursor_bw[k] *
2465 v->UrgentBurstFactorCursor[k] +
2466 v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2467 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2468 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) + v->cursor_bw_pre[k] *
2469 v->UrgentBurstFactorCursorPre[k]);
2470
2471 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst +
2472 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2473 v->ReadBandwidthPlaneLuma[k] +
2474 v->ReadBandwidthPlaneChroma[k] +
2475 v->cursor_bw[k] +
2476 v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2477 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2478
2479 if (v->DestinationLinesForPrefetch[k] < 2)
2480 DestinationLineTimesForPrefetchLessThan2 = true;
2481 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2482 VRatioPrefetchMoreThan4 = true;
2483 if (v->NoUrgentLatencyHiding[k] == true)
2484 v->NotEnoughUrgentLatencyHiding[0][0] = true;
2485
2486 if (v->NoUrgentLatencyHidingPre[k] == true)
2487 v->NotEnoughUrgentLatencyHidingPre = true;
2488 }
2489 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2490
2491
2492 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NotEnoughUrgentLatencyHiding[0][0] == 0
2493 && v->NotEnoughUrgentLatencyHidingPre == 0 && !VRatioPrefetchMoreThan4
2494 && !DestinationLineTimesForPrefetchLessThan2)
2495 v->PrefetchModeSupported = true;
2496 else {
2497 v->PrefetchModeSupported = false;
2498 dml_print("DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n");
2499 dml_print("DML: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", MaxTotalRDBandwidth, v->ReturnBW);
2500 dml_print("DML: VRatioPrefetch %s more than 4\n", (VRatioPrefetchMoreThan4) ? "is" : "is not");
2501 dml_print("DML: DestinationLines for Prefetch %s less than 2\n", (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2502 }
2503
2504 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2505 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2506 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2507 v->BandwidthAvailableForImmediateFlip =
2508 v->BandwidthAvailableForImmediateFlip
2509 - dml_max(
2510 v->ReadBandwidthPlaneLuma[k] * v->UrgentBurstFactorLuma[k]
2511 + v->ReadBandwidthPlaneChroma[k] * v->UrgentBurstFactorChroma[k]
2512 + v->cursor_bw[k] * v->UrgentBurstFactorCursor[k],
2513 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2514 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) +
2515 v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
2516 }
2517
2518 v->TotImmediateFlipBytes = 0;
2519 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2520 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2521 }
2522 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2523 CalculateFlipSchedule(
2524 mode_lib,
2525 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2526 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2527 v->UrgentExtraLatency,
2528 v->UrgentLatency,
2529 v->GPUVMMaxPageTableLevels,
2530 v->HostVMEnable,
2531 v->HostVMMaxNonCachedPageTableLevels,
2532 v->GPUVMEnable,
2533 v->HostVMMinPageSize,
2534 v->PDEAndMetaPTEBytesFrame[k],
2535 v->MetaRowByte[k],
2536 v->PixelPTEBytesPerRow[k],
2537 v->BandwidthAvailableForImmediateFlip,
2538 v->TotImmediateFlipBytes,
2539 v->SourcePixelFormat[k],
2540 v->HTotal[k] / v->PixelClock[k],
2541 v->VRatio[k],
2542 v->VRatioChroma[k],
2543 v->Tno_bw[k],
2544 v->DCCEnable[k],
2545 v->dpte_row_height[k],
2546 v->meta_row_height[k],
2547 v->dpte_row_height_chroma[k],
2548 v->meta_row_height_chroma[k],
2549 &v->DestinationLinesToRequestVMInImmediateFlip[k],
2550 &v->DestinationLinesToRequestRowInImmediateFlip[k],
2551 &v->final_flip_bw[k],
2552 &v->ImmediateFlipSupportedForPipe[k]);
2553 }
2554 v->total_dcn_read_bw_with_flip = 0.0;
2555 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2556 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2557 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip + dml_max3(
2558 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2559 v->DPPPerPlane[k] * v->final_flip_bw[k] +
2560 v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k] +
2561 v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k] +
2562 v->cursor_bw[k] * v->UrgentBurstFactorCursor[k],
2563 v->DPPPerPlane[k] * (v->final_flip_bw[k] +
2564 v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2565 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) +
2566 v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
2567 v->total_dcn_read_bw_with_flip_no_urgent_burst =
2568 v->total_dcn_read_bw_with_flip_no_urgent_burst +
2569 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2570 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2571 v->DPPPerPlane[k] * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2572
2573 }
2574 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2575
2576 v->ImmediateFlipSupported = true;
2577 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2578 v->ImmediateFlipSupported = false;
2579 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2580 }
2581 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2582 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2583 v->ImmediateFlipSupported = false;
2584 }
2585 }
2586 } else {
2587 v->ImmediateFlipSupported = false;
2588 }
2589
2590 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2591 if (v->ErrorResult[k] || v->NotEnoughTimeForDynamicMetadata[k]) {
2592 v->PrefetchModeSupported = false;
2593 dml_print("DML: CalculatePrefetchSchedule ***failed***. Prefetch schedule violation. Results are NOT valid\n");
2594 }
2595 }
2596
2597 v->VStartupLines = v->VStartupLines + 1;
2598 v->PrefetchModeSupported = (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport &&
2599 !v->HostVMEnable && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2600 v->ImmediateFlipSupported)) ? true : false;
2601 } while (!v->PrefetchModeSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2602 ASSERT(v->PrefetchModeSupported);
2603
2604 //Watermarks and NB P-State/DRAM Clock Change Support
2605 {
2606 enum clock_change_support DRAMClockChangeSupport = 0; // dummy
2607 CalculateWatermarksAndDRAMSpeedChangeSupport(
2608 mode_lib,
2609 PrefetchMode,
2610 v->DCFCLK,
2611 v->ReturnBW,
2612 v->UrgentLatency,
2613 v->UrgentExtraLatency,
2614 v->SOCCLK,
2615 v->DCFCLKDeepSleep,
2616 v->DPPPerPlane,
2617 v->DPPCLK,
2618 v->DETBufferSizeY,
2619 v->DETBufferSizeC,
2620 v->SwathHeightY,
2621 v->SwathHeightC,
2622 v->SwathWidthY,
2623 v->SwathWidthC,
2624 v->BytePerPixelDETY,
2625 v->BytePerPixelDETC,
2626 &DRAMClockChangeSupport);
2627
2628 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2629 if (v->WritebackEnable[k] == true) {
2630 if (v->BlendingAndTiming[k] == k) {
2631 v->ThisVStartup = v->VStartup[k];
2632 } else {
2633 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2634 if (v->BlendingAndTiming[k] == j) {
2635 v->ThisVStartup = v->VStartup[j];
2636 }
2637 }
2638 }
2639 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(0,
2640 v->ThisVStartup * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
2641 } else {
2642 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
2643 }
2644 }
2645
2646 }
2647
2648
2649 //Display Pipeline Delivery Time in Prefetch, Groups
2650 CalculatePixelDeliveryTimes(
2651 v->NumberOfActivePlanes,
2652 v->VRatio,
2653 v->VRatioChroma,
2654 v->VRatioPrefetchY,
2655 v->VRatioPrefetchC,
2656 v->swath_width_luma_ub,
2657 v->swath_width_chroma_ub,
2658 v->DPPPerPlane,
2659 v->HRatio,
2660 v->HRatioChroma,
2661 v->PixelClock,
2662 v->PSCL_THROUGHPUT_LUMA,
2663 v->PSCL_THROUGHPUT_CHROMA,
2664 v->DPPCLK,
2665 v->BytePerPixelC,
2666 v->SourceScan,
2667 v->NumberOfCursors,
2668 v->CursorWidth,
2669 v->CursorBPP,
2670 v->BlockWidth256BytesY,
2671 v->BlockHeight256BytesY,
2672 v->BlockWidth256BytesC,
2673 v->BlockHeight256BytesC,
2674 v->DisplayPipeLineDeliveryTimeLuma,
2675 v->DisplayPipeLineDeliveryTimeChroma,
2676 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
2677 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
2678 v->DisplayPipeRequestDeliveryTimeLuma,
2679 v->DisplayPipeRequestDeliveryTimeChroma,
2680 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
2681 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
2682 v->CursorRequestDeliveryTime,
2683 v->CursorRequestDeliveryTimePrefetch);
2684
2685 CalculateMetaAndPTETimes(
2686 v->NumberOfActivePlanes,
2687 v->GPUVMEnable,
2688 v->MetaChunkSize,
2689 v->MinMetaChunkSizeBytes,
2690 v->HTotal,
2691 v->VRatio,
2692 v->VRatioChroma,
2693 v->DestinationLinesToRequestRowInVBlank,
2694 v->DestinationLinesToRequestRowInImmediateFlip,
2695 v->DCCEnable,
2696 v->PixelClock,
2697 v->BytePerPixelY,
2698 v->BytePerPixelC,
2699 v->SourceScan,
2700 v->dpte_row_height,
2701 v->dpte_row_height_chroma,
2702 v->meta_row_width,
2703 v->meta_row_width_chroma,
2704 v->meta_row_height,
2705 v->meta_row_height_chroma,
2706 v->meta_req_width,
2707 v->meta_req_width_chroma,
2708 v->meta_req_height,
2709 v->meta_req_height_chroma,
2710 v->dpte_group_bytes,
2711 v->PTERequestSizeY,
2712 v->PTERequestSizeC,
2713 v->PixelPTEReqWidthY,
2714 v->PixelPTEReqHeightY,
2715 v->PixelPTEReqWidthC,
2716 v->PixelPTEReqHeightC,
2717 v->dpte_row_width_luma_ub,
2718 v->dpte_row_width_chroma_ub,
2719 v->DST_Y_PER_PTE_ROW_NOM_L,
2720 v->DST_Y_PER_PTE_ROW_NOM_C,
2721 v->DST_Y_PER_META_ROW_NOM_L,
2722 v->DST_Y_PER_META_ROW_NOM_C,
2723 v->TimePerMetaChunkNominal,
2724 v->TimePerChromaMetaChunkNominal,
2725 v->TimePerMetaChunkVBlank,
2726 v->TimePerChromaMetaChunkVBlank,
2727 v->TimePerMetaChunkFlip,
2728 v->TimePerChromaMetaChunkFlip,
2729 v->time_per_pte_group_nom_luma,
2730 v->time_per_pte_group_vblank_luma,
2731 v->time_per_pte_group_flip_luma,
2732 v->time_per_pte_group_nom_chroma,
2733 v->time_per_pte_group_vblank_chroma,
2734 v->time_per_pte_group_flip_chroma);
2735
2736 CalculateVMGroupAndRequestTimes(
2737 v->NumberOfActivePlanes,
2738 v->GPUVMEnable,
2739 v->GPUVMMaxPageTableLevels,
2740 v->HTotal,
2741 v->BytePerPixelC,
2742 v->DestinationLinesToRequestVMInVBlank,
2743 v->DestinationLinesToRequestVMInImmediateFlip,
2744 v->DCCEnable,
2745 v->PixelClock,
2746 v->dpte_row_width_luma_ub,
2747 v->dpte_row_width_chroma_ub,
2748 v->vm_group_bytes,
2749 v->dpde0_bytes_per_frame_ub_l,
2750 v->dpde0_bytes_per_frame_ub_c,
2751 v->meta_pte_bytes_per_frame_ub_l,
2752 v->meta_pte_bytes_per_frame_ub_c,
2753 v->TimePerVMGroupVBlank,
2754 v->TimePerVMGroupFlip,
2755 v->TimePerVMRequestVBlank,
2756 v->TimePerVMRequestFlip);
2757
2758
2759 // Min TTUVBlank
2760 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2761 if (PrefetchMode == 0) {
2762 v->AllowDRAMClockChangeDuringVBlank[k] = true;
2763 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
2764 v->MinTTUVBlank[k] = dml_max(
2765 v->DRAMClockChangeWatermark,
2766 dml_max(
2767 v->StutterEnterPlusExitWatermark,
2768 v->UrgentWatermark));
2769 } else if (PrefetchMode == 1) {
2770 v->AllowDRAMClockChangeDuringVBlank[k] = false;
2771 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
2772 v->MinTTUVBlank[k] = dml_max(
2773 v->StutterEnterPlusExitWatermark,
2774 v->UrgentWatermark);
2775 } else {
2776 v->AllowDRAMClockChangeDuringVBlank[k] = false;
2777 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
2778 v->MinTTUVBlank[k] = v->UrgentWatermark;
2779 }
2780 if (!v->DynamicMetadataEnable[k])
2781 v->MinTTUVBlank[k] = v->TCalc
2782 + v->MinTTUVBlank[k];
2783 }
2784
2785 // DCC Configuration
2786 v->ActiveDPPs = 0;
2787 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2788 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
2789 v->SourcePixelFormat[k],
2790 v->SurfaceWidthY[k],
2791 v->SurfaceWidthC[k],
2792 v->SurfaceHeightY[k],
2793 v->SurfaceHeightC[k],
2794 v->DETBufferSizeInKByte[0] * 1024,
2795 v->BlockHeight256BytesY[k],
2796 v->BlockHeight256BytesC[k],
2797 v->SurfaceTiling[k],
2798 v->BytePerPixelY[k],
2799 v->BytePerPixelC[k],
2800 v->BytePerPixelDETY[k],
2801 v->BytePerPixelDETC[k],
2802 v->SourceScan[k],
2803 &v->DCCYMaxUncompressedBlock[k],
2804 &v->DCCCMaxUncompressedBlock[k],
2805 &v->DCCYMaxCompressedBlock[k],
2806 &v->DCCCMaxCompressedBlock[k],
2807 &v->DCCYIndependentBlock[k],
2808 &v->DCCCIndependentBlock[k]);
2809 }
2810
2811 {
2812 //Maximum Bandwidth Used
2813 v->TotalDataReadBandwidth = 0;
2814 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2815 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth
2816 + v->ReadBandwidthPlaneLuma[k]
2817 + v->ReadBandwidthPlaneChroma[k];
2818 }
2819 }
2820
2821 // VStartup Margin
2822 v->VStartupMargin = 0;
2823 v->FirstMainPlane = true;
2824 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2825 if (v->BlendingAndTiming[k] == k) {
2826 double margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k]
2827 / v->PixelClock[k];
2828 if (v->FirstMainPlane == true) {
2829 v->VStartupMargin = margin;
2830 v->FirstMainPlane = false;
2831 } else {
2832 v->VStartupMargin = dml_min(v->VStartupMargin, margin);
2833 }
2834 }
2835 }
2836
2837 // Stutter Efficiency
2838 CalculateStutterEfficiency(
2839 v->NumberOfActivePlanes,
2840 v->ROBBufferSizeInKByte,
2841 v->TotalDataReadBandwidth,
2842 v->DCFCLK,
2843 v->ReturnBW,
2844 v->SRExitTime,
2845 v->SynchronizedVBlank,
2846 v->DPPPerPlane,
2847 v->DETBufferSizeY,
2848 v->BytePerPixelY,
2849 v->BytePerPixelDETY,
2850 v->SwathWidthY,
2851 v->SwathHeightY,
2852 v->SwathHeightC,
2853 v->DCCRateLuma,
2854 v->DCCRateChroma,
2855 v->HTotal,
2856 v->VTotal,
2857 v->PixelClock,
2858 v->VRatio,
2859 v->SourceScan,
2860 v->BlockHeight256BytesY,
2861 v->BlockWidth256BytesY,
2862 v->BlockHeight256BytesC,
2863 v->BlockWidth256BytesC,
2864 v->DCCYMaxUncompressedBlock,
2865 v->DCCCMaxUncompressedBlock,
2866 v->VActive,
2867 v->DCCEnable,
2868 v->WritebackEnable,
2869 v->ReadBandwidthPlaneLuma,
2870 v->ReadBandwidthPlaneChroma,
2871 v->meta_row_bw,
2872 v->dpte_row_bw,
2873 &v->StutterEfficiencyNotIncludingVBlank,
2874 &v->StutterEfficiency,
2875 &v->StutterPeriod);
2876 }
2877
DisplayPipeConfiguration(struct display_mode_lib * mode_lib)2878 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
2879 {
2880 // Display Pipe Configuration
2881 double BytePerPixDETY[DC__NUM_DPP__MAX] = { 0 };
2882 double BytePerPixDETC[DC__NUM_DPP__MAX] = { 0 };
2883 int BytePerPixY[DC__NUM_DPP__MAX] = { 0 };
2884 int BytePerPixC[DC__NUM_DPP__MAX] = { 0 };
2885 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX] = { 0 };
2886 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX] = { 0 };
2887 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX] = { 0 };
2888 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX] = { 0 };
2889 double dummy1[DC__NUM_DPP__MAX] = { 0 };
2890 double dummy2[DC__NUM_DPP__MAX] = { 0 };
2891 double dummy3[DC__NUM_DPP__MAX] = { 0 };
2892 double dummy4[DC__NUM_DPP__MAX] = { 0 };
2893 int dummy5[DC__NUM_DPP__MAX] = { 0 };
2894 int dummy6[DC__NUM_DPP__MAX] = { 0 };
2895 bool dummy7[DC__NUM_DPP__MAX] = { 0 };
2896 bool dummysinglestring = 0;
2897 unsigned int k;
2898
2899 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2900
2901 dml30_CalculateBytePerPixelAnd256BBlockSizes(
2902 mode_lib->vba.SourcePixelFormat[k],
2903 mode_lib->vba.SurfaceTiling[k],
2904 &BytePerPixY[k],
2905 &BytePerPixC[k],
2906 &BytePerPixDETY[k],
2907 &BytePerPixDETC[k],
2908 &Read256BytesBlockHeightY[k],
2909 &Read256BytesBlockHeightC[k],
2910 &Read256BytesBlockWidthY[k],
2911 &Read256BytesBlockWidthC[k]);
2912 }
2913 CalculateSwathAndDETConfiguration(
2914 false,
2915 mode_lib->vba.NumberOfActivePlanes,
2916 mode_lib->vba.DETBufferSizeInKByte[0],
2917 dummy1,
2918 dummy2,
2919 mode_lib->vba.SourceScan,
2920 mode_lib->vba.SourcePixelFormat,
2921 mode_lib->vba.SurfaceTiling,
2922 mode_lib->vba.ViewportWidth,
2923 mode_lib->vba.ViewportHeight,
2924 mode_lib->vba.SurfaceWidthY,
2925 mode_lib->vba.SurfaceWidthC,
2926 mode_lib->vba.SurfaceHeightY,
2927 mode_lib->vba.SurfaceHeightC,
2928 Read256BytesBlockHeightY,
2929 Read256BytesBlockHeightC,
2930 Read256BytesBlockWidthY,
2931 Read256BytesBlockWidthC,
2932 mode_lib->vba.ODMCombineEnabled,
2933 mode_lib->vba.BlendingAndTiming,
2934 BytePerPixY,
2935 BytePerPixC,
2936 BytePerPixDETY,
2937 BytePerPixDETC,
2938 mode_lib->vba.HActive,
2939 mode_lib->vba.HRatio,
2940 mode_lib->vba.HRatioChroma,
2941 mode_lib->vba.DPPPerPlane,
2942 dummy5,
2943 dummy6,
2944 dummy3,
2945 dummy4,
2946 mode_lib->vba.SwathHeightY,
2947 mode_lib->vba.SwathHeightC,
2948 mode_lib->vba.DETBufferSizeY,
2949 mode_lib->vba.DETBufferSizeC,
2950 dummy7,
2951 &dummysinglestring);
2952 }
2953
dml30_CalculateBytePerPixelAnd256BBlockSizes(enum source_format_class SourcePixelFormat,enum dm_swizzle_mode SurfaceTiling,unsigned int * BytePerPixelY,unsigned int * BytePerPixelC,double * BytePerPixelDETY,double * BytePerPixelDETC,unsigned int * BlockHeight256BytesY,unsigned int * BlockHeight256BytesC,unsigned int * BlockWidth256BytesY,unsigned int * BlockWidth256BytesC)2954 void dml30_CalculateBytePerPixelAnd256BBlockSizes(
2955 enum source_format_class SourcePixelFormat,
2956 enum dm_swizzle_mode SurfaceTiling,
2957 unsigned int *BytePerPixelY,
2958 unsigned int *BytePerPixelC,
2959 double *BytePerPixelDETY,
2960 double *BytePerPixelDETC,
2961 unsigned int *BlockHeight256BytesY,
2962 unsigned int *BlockHeight256BytesC,
2963 unsigned int *BlockWidth256BytesY,
2964 unsigned int *BlockWidth256BytesC)
2965 {
2966 if (SourcePixelFormat == dm_444_64) {
2967 *BytePerPixelDETY = 8;
2968 *BytePerPixelDETC = 0;
2969 *BytePerPixelY = 8;
2970 *BytePerPixelC = 0;
2971 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
2972 *BytePerPixelDETY = 4;
2973 *BytePerPixelDETC = 0;
2974 *BytePerPixelY = 4;
2975 *BytePerPixelC = 0;
2976 } else if (SourcePixelFormat == dm_444_16) {
2977 *BytePerPixelDETY = 2;
2978 *BytePerPixelDETC = 0;
2979 *BytePerPixelY = 2;
2980 *BytePerPixelC = 0;
2981 } else if (SourcePixelFormat == dm_444_8) {
2982 *BytePerPixelDETY = 1;
2983 *BytePerPixelDETC = 0;
2984 *BytePerPixelY = 1;
2985 *BytePerPixelC = 0;
2986 } else if (SourcePixelFormat == dm_rgbe_alpha) {
2987 *BytePerPixelDETY = 4;
2988 *BytePerPixelDETC = 1;
2989 *BytePerPixelY = 4;
2990 *BytePerPixelC = 1;
2991 } else if (SourcePixelFormat == dm_420_8) {
2992 *BytePerPixelDETY = 1;
2993 *BytePerPixelDETC = 2;
2994 *BytePerPixelY = 1;
2995 *BytePerPixelC = 2;
2996 } else if (SourcePixelFormat == dm_420_12) {
2997 *BytePerPixelDETY = 2;
2998 *BytePerPixelDETC = 4;
2999 *BytePerPixelY = 2;
3000 *BytePerPixelC = 4;
3001 } else {
3002 *BytePerPixelDETY = 4.0 / 3;
3003 *BytePerPixelDETC = 8.0 / 3;
3004 *BytePerPixelY = 2;
3005 *BytePerPixelC = 4;
3006 }
3007
3008 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
3009 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8
3010 || SourcePixelFormat == dm_mono_16 || SourcePixelFormat == dm_mono_8
3011 || SourcePixelFormat == dm_rgbe)) {
3012 if (SurfaceTiling == dm_sw_linear) {
3013 *BlockHeight256BytesY = 1;
3014 } else if (SourcePixelFormat == dm_444_64) {
3015 *BlockHeight256BytesY = 4;
3016 } else if (SourcePixelFormat == dm_444_8) {
3017 *BlockHeight256BytesY = 16;
3018 } else {
3019 *BlockHeight256BytesY = 8;
3020 }
3021 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3022 *BlockHeight256BytesC = 0;
3023 *BlockWidth256BytesC = 0;
3024 } else {
3025 if (SurfaceTiling == dm_sw_linear) {
3026 *BlockHeight256BytesY = 1;
3027 *BlockHeight256BytesC = 1;
3028 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3029 *BlockHeight256BytesY = 8;
3030 *BlockHeight256BytesC = 16;
3031 } else if (SourcePixelFormat == dm_420_8) {
3032 *BlockHeight256BytesY = 16;
3033 *BlockHeight256BytesC = 8;
3034 } else {
3035 *BlockHeight256BytesY = 8;
3036 *BlockHeight256BytesC = 8;
3037 }
3038 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3039 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3040 }
3041 }
3042
CalculateTWait(unsigned int PrefetchMode,double DRAMClockChangeLatency,double UrgentLatency,double SREnterPlusExitTime)3043 static double CalculateTWait(
3044 unsigned int PrefetchMode,
3045 double DRAMClockChangeLatency,
3046 double UrgentLatency,
3047 double SREnterPlusExitTime)
3048 {
3049 if (PrefetchMode == 0) {
3050 return dml_max(DRAMClockChangeLatency + UrgentLatency,
3051 dml_max(SREnterPlusExitTime, UrgentLatency));
3052 } else if (PrefetchMode == 1) {
3053 return dml_max(SREnterPlusExitTime, UrgentLatency);
3054 } else {
3055 return UrgentLatency;
3056 }
3057 }
3058
dml30_CalculateWriteBackDISPCLK(enum source_format_class WritebackPixelFormat,double PixelClock,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackHTaps,unsigned int WritebackVTaps,long WritebackSourceWidth,long WritebackDestinationWidth,unsigned int HTotal,unsigned int WritebackLineBufferSize)3059 double dml30_CalculateWriteBackDISPCLK(
3060 enum source_format_class WritebackPixelFormat,
3061 double PixelClock,
3062 double WritebackHRatio,
3063 double WritebackVRatio,
3064 unsigned int WritebackHTaps,
3065 unsigned int WritebackVTaps,
3066 long WritebackSourceWidth,
3067 long WritebackDestinationWidth,
3068 unsigned int HTotal,
3069 unsigned int WritebackLineBufferSize)
3070 {
3071 double DISPCLK_H = 0, DISPCLK_V = 0, DISPCLK_HB = 0;
3072
3073 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3074 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3075 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3076 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3077 }
3078
CalculateWriteBackDelay(enum source_format_class WritebackPixelFormat,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackVTaps,long WritebackDestinationWidth,long WritebackDestinationHeight,long WritebackSourceHeight,unsigned int HTotal)3079 static double CalculateWriteBackDelay(
3080 enum source_format_class WritebackPixelFormat,
3081 double WritebackHRatio,
3082 double WritebackVRatio,
3083 unsigned int WritebackVTaps,
3084 long WritebackDestinationWidth,
3085 long WritebackDestinationHeight,
3086 long WritebackSourceHeight,
3087 unsigned int HTotal)
3088 {
3089 double CalculateWriteBackDelay = 0;
3090 double Line_length = 0;
3091 double Output_lines_last_notclamped = 0;
3092 double WritebackVInit = 0;
3093
3094 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3095 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3096 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3097 if (Output_lines_last_notclamped < 0) {
3098 CalculateWriteBackDelay = 0;
3099 } else {
3100 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3101 }
3102 return CalculateWriteBackDelay;
3103 }
3104
3105
CalculateDynamicMetadataParameters(int MaxInterDCNTileRepeaters,double DPPCLK,double DISPCLK,double DCFClkDeepSleep,double PixelClock,unsigned int HTotal,unsigned int VBlank,unsigned int DynamicMetadataTransmittedBytes,int DynamicMetadataLinesBeforeActiveRequired,int InterlaceEnable,bool ProgressiveToInterlaceUnitInOPP,double * Tsetup,double * Tdmbf,double * Tdmec,double * Tdmsks)3106 static void CalculateDynamicMetadataParameters(int MaxInterDCNTileRepeaters, double DPPCLK, double DISPCLK,
3107 double DCFClkDeepSleep, double PixelClock, unsigned int HTotal, unsigned int VBlank, unsigned int DynamicMetadataTransmittedBytes,
3108 int DynamicMetadataLinesBeforeActiveRequired, int InterlaceEnable, bool ProgressiveToInterlaceUnitInOPP,
3109 double *Tsetup, double *Tdmbf, double *Tdmec, double *Tdmsks)
3110 {
3111 double TotalRepeaterDelayTime = 0;
3112 double VUpdateWidthPix = 0;
3113 double VReadyOffsetPix = 0;
3114 double VUpdateOffsetPix = 0;
3115 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3116 VUpdateWidthPix = (14 / DCFClkDeepSleep + 12 / DPPCLK + TotalRepeaterDelayTime) * PixelClock;
3117 VReadyOffsetPix = dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20 / DCFClkDeepSleep + 10 / DPPCLK) * PixelClock;
3118 VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3119 *Tsetup = (VUpdateOffsetPix + VUpdateWidthPix + VReadyOffsetPix) / PixelClock;
3120 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3121 *Tdmec = HTotal / PixelClock;
3122 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3123 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3124 } else {
3125 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3126 }
3127 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3128 *Tdmsks = *Tdmsks / 2;
3129 }
3130 }
3131
CalculateRowBandwidth(bool GPUVMEnable,enum source_format_class SourcePixelFormat,double VRatio,double VRatioChroma,bool DCCEnable,double LineTime,unsigned int MetaRowByteLuma,unsigned int MetaRowByteChroma,unsigned int meta_row_height_luma,unsigned int meta_row_height_chroma,unsigned int PixelPTEBytesPerRowLuma,unsigned int PixelPTEBytesPerRowChroma,unsigned int dpte_row_height_luma,unsigned int dpte_row_height_chroma,double * meta_row_bw,double * dpte_row_bw)3132 static void CalculateRowBandwidth(
3133 bool GPUVMEnable,
3134 enum source_format_class SourcePixelFormat,
3135 double VRatio,
3136 double VRatioChroma,
3137 bool DCCEnable,
3138 double LineTime,
3139 unsigned int MetaRowByteLuma,
3140 unsigned int MetaRowByteChroma,
3141 unsigned int meta_row_height_luma,
3142 unsigned int meta_row_height_chroma,
3143 unsigned int PixelPTEBytesPerRowLuma,
3144 unsigned int PixelPTEBytesPerRowChroma,
3145 unsigned int dpte_row_height_luma,
3146 unsigned int dpte_row_height_chroma,
3147 double *meta_row_bw,
3148 double *dpte_row_bw)
3149 {
3150 if (DCCEnable != true) {
3151 *meta_row_bw = 0;
3152 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3153 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime)
3154 + VRatioChroma * MetaRowByteChroma
3155 / (meta_row_height_chroma * LineTime);
3156 } else {
3157 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3158 }
3159
3160 if (GPUVMEnable != true) {
3161 *dpte_row_bw = 0;
3162 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3163 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3164 + VRatioChroma * PixelPTEBytesPerRowChroma
3165 / (dpte_row_height_chroma * LineTime);
3166 } else {
3167 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3168 }
3169 }
3170
CalculateFlipSchedule(struct display_mode_lib * mode_lib,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,double UrgentExtraLatency,double UrgentLatency,unsigned int GPUVMMaxPageTableLevels,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,bool GPUVMEnable,double HostVMMinPageSize,double PDEAndMetaPTEBytesPerFrame,double MetaRowBytes,double DPTEBytesPerRow,double BandwidthAvailableForImmediateFlip,unsigned int TotImmediateFlipBytes,enum source_format_class SourcePixelFormat,double LineTime,double VRatio,double VRatioChroma,double Tno_bw,bool DCCEnable,unsigned int dpte_row_height,unsigned int meta_row_height,unsigned int dpte_row_height_chroma,unsigned int meta_row_height_chroma,double * DestinationLinesToRequestVMInImmediateFlip,double * DestinationLinesToRequestRowInImmediateFlip,double * final_flip_bw,bool * ImmediateFlipSupportedForPipe)3171 static void CalculateFlipSchedule(
3172 struct display_mode_lib *mode_lib,
3173 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
3174 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
3175 double UrgentExtraLatency,
3176 double UrgentLatency,
3177 unsigned int GPUVMMaxPageTableLevels,
3178 bool HostVMEnable,
3179 unsigned int HostVMMaxNonCachedPageTableLevels,
3180 bool GPUVMEnable,
3181 double HostVMMinPageSize,
3182 double PDEAndMetaPTEBytesPerFrame,
3183 double MetaRowBytes,
3184 double DPTEBytesPerRow,
3185 double BandwidthAvailableForImmediateFlip,
3186 unsigned int TotImmediateFlipBytes,
3187 enum source_format_class SourcePixelFormat,
3188 double LineTime,
3189 double VRatio,
3190 double VRatioChroma,
3191 double Tno_bw,
3192 bool DCCEnable,
3193 unsigned int dpte_row_height,
3194 unsigned int meta_row_height,
3195 unsigned int dpte_row_height_chroma,
3196 unsigned int meta_row_height_chroma,
3197 double *DestinationLinesToRequestVMInImmediateFlip,
3198 double *DestinationLinesToRequestRowInImmediateFlip,
3199 double *final_flip_bw,
3200 bool *ImmediateFlipSupportedForPipe)
3201 {
3202 double min_row_time = 0.0;
3203 unsigned int HostVMDynamicLevelsTrips = 0;
3204 double TimeForFetchingMetaPTEImmediateFlip = 0;
3205 double TimeForFetchingRowInVBlankImmediateFlip = 0;
3206 double ImmediateFlipBW = 0;
3207 double HostVMInefficiencyFactor = 0;
3208
3209 if (GPUVMEnable == true && HostVMEnable == true) {
3210 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
3211 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
3212 } else {
3213 HostVMInefficiencyFactor = 1;
3214 HostVMDynamicLevelsTrips = 0;
3215 }
3216
3217 if (GPUVMEnable == true || DCCEnable == true) {
3218 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
3219 }
3220
3221 if (GPUVMEnable == true) {
3222 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3223 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), LineTime / 4.0);
3224 } else {
3225 TimeForFetchingMetaPTEImmediateFlip = 0;
3226 }
3227
3228 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3229 if ((GPUVMEnable == true || DCCEnable == true)) {
3230 TimeForFetchingRowInVBlankImmediateFlip = dml_max3((MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3231 UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4);
3232 } else {
3233 TimeForFetchingRowInVBlankImmediateFlip = 0;
3234 }
3235
3236 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3237
3238 if (GPUVMEnable == true) {
3239 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
3240 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
3241 } else if ((GPUVMEnable == true || DCCEnable == true)) {
3242 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
3243 } else {
3244 *final_flip_bw = 0;
3245 }
3246
3247
3248 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
3249 if (GPUVMEnable == true && DCCEnable != true) {
3250 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
3251 } else if (GPUVMEnable != true && DCCEnable == true) {
3252 min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
3253 } else {
3254 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio,
3255 dpte_row_height_chroma * LineTime / VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
3256 }
3257 } else {
3258 if (GPUVMEnable == true && DCCEnable != true) {
3259 min_row_time = dpte_row_height * LineTime / VRatio;
3260 } else if (GPUVMEnable != true && DCCEnable == true) {
3261 min_row_time = meta_row_height * LineTime / VRatio;
3262 } else {
3263 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
3264 }
3265 }
3266
3267 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
3268 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3269 *ImmediateFlipSupportedForPipe = false;
3270 } else {
3271 *ImmediateFlipSupportedForPipe = true;
3272 }
3273 }
3274
TruncToValidBPP(double LinkBitRate,int Lanes,long HTotal,long HActive,double PixelClock,double DesiredBPP,bool DSCEnable,enum output_encoder_class Output,enum output_format_class Format,unsigned int DSCInputBitPerComponent,int DSCSlices,int AudioRate,int AudioLayout,enum odm_combine_mode ODMCombine)3275 static double TruncToValidBPP(
3276 double LinkBitRate,
3277 int Lanes,
3278 long HTotal,
3279 long HActive,
3280 double PixelClock,
3281 double DesiredBPP,
3282 bool DSCEnable,
3283 enum output_encoder_class Output,
3284 enum output_format_class Format,
3285 unsigned int DSCInputBitPerComponent,
3286 int DSCSlices,
3287 int AudioRate,
3288 int AudioLayout,
3289 enum odm_combine_mode ODMCombine)
3290 {
3291 double MaxLinkBPP = 0;
3292 int MinDSCBPP = 0;
3293 double MaxDSCBPP = 0;
3294 int NonDSCBPP0 = 0;
3295 int NonDSCBPP1 = 0;
3296 int NonDSCBPP2 = 0;
3297
3298 if (Format == dm_420) {
3299 NonDSCBPP0 = 12;
3300 NonDSCBPP1 = 15;
3301 NonDSCBPP2 = 18;
3302 MinDSCBPP = 6;
3303 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
3304 } else if (Format == dm_444) {
3305 NonDSCBPP0 = 24;
3306 NonDSCBPP1 = 30;
3307 NonDSCBPP2 = 36;
3308 MinDSCBPP = 8;
3309 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3310 } else {
3311 NonDSCBPP0 = 16;
3312 NonDSCBPP1 = 20;
3313 NonDSCBPP2 = 24;
3314
3315 if (Format == dm_n422) {
3316 MinDSCBPP = 7;
3317 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3318 } else {
3319 MinDSCBPP = 8;
3320 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3321 }
3322 }
3323
3324 if (DSCEnable && Output == dm_dp) {
3325 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3326 } else {
3327 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3328 }
3329
3330 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3331 MaxLinkBPP = 16;
3332 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3333 MaxLinkBPP = 32;
3334 }
3335
3336
3337 if (DesiredBPP == 0) {
3338 if (DSCEnable) {
3339 if (MaxLinkBPP < MinDSCBPP) {
3340 return BPP_INVALID;
3341 } else if (MaxLinkBPP >= MaxDSCBPP) {
3342 return MaxDSCBPP;
3343 } else {
3344 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3345 }
3346 } else {
3347 if (MaxLinkBPP >= NonDSCBPP2) {
3348 return NonDSCBPP2;
3349 } else if (MaxLinkBPP >= NonDSCBPP1) {
3350 return NonDSCBPP1;
3351 } else if (MaxLinkBPP >= NonDSCBPP0) {
3352 return NonDSCBPP0;
3353 } else {
3354 return BPP_INVALID;
3355 }
3356 }
3357 } else {
3358 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0 || DesiredBPP == 18)) ||
3359 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3360 return BPP_INVALID;
3361 } else {
3362 return DesiredBPP;
3363 }
3364 }
3365 }
3366
dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib * mode_lib)3367 void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3368 {
3369 struct vba_vars_st *v = &mode_lib->vba;
3370 int MinPrefetchMode, MaxPrefetchMode;
3371 int i, start_state;
3372 unsigned int j, k, m;
3373 bool EnoughWritebackUnits = true;
3374 bool WritebackModeSupport = true;
3375 bool ViewportExceedsSurface = false;
3376 double MaxTotalVActiveRDBandwidth = 0;
3377 long ReorderingBytes = 0;
3378 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX] = { 0 };
3379
3380 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3381
3382 if (mode_lib->validate_max_state)
3383 start_state = v->soc.num_states - 1;
3384 else
3385 start_state = 0;
3386
3387 CalculateMinAndMaxPrefetchMode(
3388 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3389 &MinPrefetchMode, &MaxPrefetchMode);
3390
3391 /*Scale Ratio, taps Support Check*/
3392
3393 v->ScaleRatioAndTapsSupport = true;
3394 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3395 if (v->ScalerEnabled[k] == false
3396 && ((v->SourcePixelFormat[k] != dm_444_64
3397 && v->SourcePixelFormat[k] != dm_444_32
3398 && v->SourcePixelFormat[k] != dm_444_16
3399 && v->SourcePixelFormat[k] != dm_mono_16
3400 && v->SourcePixelFormat[k] != dm_mono_8
3401 && v->SourcePixelFormat[k] != dm_rgbe
3402 && v->SourcePixelFormat[k] != dm_rgbe_alpha)
3403 || v->HRatio[k] != 1.0
3404 || v->htaps[k] != 1.0
3405 || v->VRatio[k] != 1.0
3406 || v->vtaps[k] != 1.0)) {
3407 v->ScaleRatioAndTapsSupport = false;
3408 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0
3409 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3410 || (v->htaps[k] > 1.0
3411 && (v->htaps[k] % 2) == 1)
3412 || v->HRatio[k] > v->MaxHSCLRatio
3413 || v->VRatio[k] > v->MaxVSCLRatio
3414 || v->HRatio[k] > v->htaps[k]
3415 || v->VRatio[k] > v->vtaps[k]
3416 || (v->SourcePixelFormat[k] != dm_444_64
3417 && v->SourcePixelFormat[k] != dm_444_32
3418 && v->SourcePixelFormat[k] != dm_444_16
3419 && v->SourcePixelFormat[k] != dm_mono_16
3420 && v->SourcePixelFormat[k] != dm_mono_8
3421 && v->SourcePixelFormat[k] != dm_rgbe
3422 && (v->VTAPsChroma[k] < 1
3423 || v->VTAPsChroma[k] > 8
3424 || v->HTAPsChroma[k] < 1
3425 || v->HTAPsChroma[k] > 8
3426 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3427 || v->HRatioChroma[k] > v->MaxHSCLRatio
3428 || v->VRatioChroma[k] > v->MaxVSCLRatio
3429 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3430 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3431 v->ScaleRatioAndTapsSupport = false;
3432 }
3433 }
3434 /*Source Format, Pixel Format and Scan Support Check*/
3435
3436 v->SourceFormatPixelAndScanSupport = true;
3437 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3438 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
3439 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t || v->SurfaceTiling[k] == dm_sw_64kb_d_x)
3440 && !(v->SourcePixelFormat[k] == dm_444_64))) {
3441 v->SourceFormatPixelAndScanSupport = false;
3442 }
3443 }
3444 /*Bandwidth Support Check*/
3445
3446 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3447 dml30_CalculateBytePerPixelAnd256BBlockSizes(
3448 v->SourcePixelFormat[k],
3449 v->SurfaceTiling[k],
3450 &v->BytePerPixelY[k],
3451 &v->BytePerPixelC[k],
3452 &v->BytePerPixelInDETY[k],
3453 &v->BytePerPixelInDETC[k],
3454 &v->Read256BlockHeightY[k],
3455 &v->Read256BlockHeightC[k],
3456 &v->Read256BlockWidthY[k],
3457 &v->Read256BlockWidthC[k]);
3458 }
3459 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3460 if (v->SourceScan[k] != dm_vert) {
3461 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3462 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3463 } else {
3464 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3465 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3466 }
3467 }
3468 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3469 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0) / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3470 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0) / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3471 }
3472 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3473 if (v->WritebackEnable[k] == true
3474 && v->WritebackPixelFormat[k] == dm_444_64) {
3475 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k]
3476 * v->WritebackDestinationHeight[k]
3477 / (v->WritebackSourceHeight[k]
3478 * v->HTotal[k]
3479 / v->PixelClock[k]) * 8.0;
3480 } else if (v->WritebackEnable[k] == true) {
3481 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k]
3482 * v->WritebackDestinationHeight[k]
3483 / (v->WritebackSourceHeight[k]
3484 * v->HTotal[k]
3485 / v->PixelClock[k]) * 4.0;
3486 } else {
3487 v->WriteBandwidth[k] = 0.0;
3488 }
3489 }
3490
3491 /*Writeback Latency support check*/
3492
3493 v->WritebackLatencySupport = true;
3494 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3495 if (v->WritebackEnable[k] == true) {
3496 if (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave ||
3497 v->WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave) {
3498 if (v->WriteBandwidth[k]
3499 > 2.0 * v->WritebackInterfaceBufferSize * 1024
3500 / v->WritebackLatency) {
3501 v->WritebackLatencySupport = false;
3502 }
3503 } else {
3504 if (v->WriteBandwidth[k]
3505 > v->WritebackInterfaceBufferSize * 1024
3506 / v->WritebackLatency) {
3507 v->WritebackLatencySupport = false;
3508 }
3509 }
3510 }
3511 }
3512
3513 /*Writeback Mode Support Check*/
3514
3515 v->TotalNumberOfActiveWriteback = 0;
3516 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3517 if (v->WritebackEnable[k] == true) {
3518 v->TotalNumberOfActiveWriteback =
3519 v->TotalNumberOfActiveWriteback + 1;
3520 }
3521 }
3522
3523 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
3524 EnoughWritebackUnits = false;
3525 }
3526 if (!v->WritebackSupportInterleaveAndUsingWholeBufferForASingleStream
3527 && (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave
3528 || v->WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave)) {
3529
3530 WritebackModeSupport = false;
3531 }
3532 if (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave && v->TotalNumberOfActiveWriteback > 1) {
3533 WritebackModeSupport = false;
3534 }
3535
3536 /*Writeback Scale Ratio and Taps Support Check*/
3537
3538 v->WritebackScaleRatioAndTapsSupport = true;
3539 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3540 if (v->WritebackEnable[k] == true) {
3541 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio
3542 || v->WritebackVRatio[k]
3543 > v->WritebackMaxVSCLRatio
3544 || v->WritebackHRatio[k]
3545 < v->WritebackMinHSCLRatio
3546 || v->WritebackVRatio[k]
3547 < v->WritebackMinVSCLRatio
3548 || v->WritebackHTaps[k]
3549 > v->WritebackMaxHSCLTaps
3550 || v->WritebackVTaps[k]
3551 > v->WritebackMaxVSCLTaps
3552 || v->WritebackHRatio[k]
3553 > v->WritebackHTaps[k]
3554 || v->WritebackVRatio[k]
3555 > v->WritebackVTaps[k]
3556 || (v->WritebackHTaps[k] > 2.0
3557 && ((v->WritebackHTaps[k] % 2)
3558 == 1))) {
3559 v->WritebackScaleRatioAndTapsSupport = false;
3560 }
3561 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
3562 v->WritebackScaleRatioAndTapsSupport = false;
3563 }
3564 }
3565 }
3566 /*Maximum DISPCLK/DPPCLK Support check*/
3567
3568 v->WritebackRequiredDISPCLK = 0.0;
3569 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3570 if (v->WritebackEnable[k] == true) {
3571 v->WritebackRequiredDISPCLK = dml_max(v->WritebackRequiredDISPCLK,
3572 dml30_CalculateWriteBackDISPCLK(
3573 v->WritebackPixelFormat[k],
3574 v->PixelClock[k],
3575 v->WritebackHRatio[k],
3576 v->WritebackVRatio[k],
3577 v->WritebackHTaps[k],
3578 v->WritebackVTaps[k],
3579 v->WritebackSourceWidth[k],
3580 v->WritebackDestinationWidth[k],
3581 v->HTotal[k],
3582 v->WritebackLineBufferSize));
3583 }
3584 }
3585 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3586 if (v->HRatio[k] > 1.0) {
3587 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
3588 } else {
3589 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3590 }
3591 if (v->BytePerPixelC[k] == 0.0) {
3592 v->PSCL_FACTOR_CHROMA[k] = 0.0;
3593 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
3594 * dml_max3(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 1.0);
3595 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3596 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3597 }
3598 } else {
3599 if (v->HRatioChroma[k] > 1.0) {
3600 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
3601 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
3602 } else {
3603 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3604 }
3605 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] * dml_max5(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
3606 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
3607 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
3608 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
3609 1.0);
3610 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
3611 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3612 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3613 }
3614 }
3615 }
3616 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3617 int MaximumSwathWidthSupportLuma = 0;
3618 int MaximumSwathWidthSupportChroma = 0;
3619
3620 if (v->SurfaceTiling[k] == dm_sw_linear) {
3621 MaximumSwathWidthSupportLuma = 8192.0;
3622 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
3623 MaximumSwathWidthSupportLuma = 2880.0;
3624 } else {
3625 MaximumSwathWidthSupportLuma = 5760.0;
3626 }
3627
3628 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
3629 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
3630 } else {
3631 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
3632 }
3633 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
3634 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
3635 if (v->BytePerPixelC[k] == 0.0) {
3636 v->MaximumSwathWidthInLineBufferChroma = 0;
3637 } else {
3638 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
3639 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
3640 }
3641 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
3642 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
3643 }
3644
3645 CalculateSwathAndDETConfiguration(
3646 true,
3647 v->NumberOfActivePlanes,
3648 v->DETBufferSizeInKByte[0],
3649 v->MaximumSwathWidthLuma,
3650 v->MaximumSwathWidthChroma,
3651 v->SourceScan,
3652 v->SourcePixelFormat,
3653 v->SurfaceTiling,
3654 v->ViewportWidth,
3655 v->ViewportHeight,
3656 v->SurfaceWidthY,
3657 v->SurfaceWidthC,
3658 v->SurfaceHeightY,
3659 v->SurfaceHeightC,
3660 v->Read256BlockHeightY,
3661 v->Read256BlockHeightC,
3662 v->Read256BlockWidthY,
3663 v->Read256BlockWidthC,
3664 v->odm_combine_dummy,
3665 v->BlendingAndTiming,
3666 v->BytePerPixelY,
3667 v->BytePerPixelC,
3668 v->BytePerPixelInDETY,
3669 v->BytePerPixelInDETC,
3670 v->HActive,
3671 v->HRatio,
3672 v->HRatioChroma,
3673 v->DPPPerPlane,
3674 v->swath_width_luma_ub,
3675 v->swath_width_chroma_ub,
3676 v->SwathWidthY,
3677 v->SwathWidthC,
3678 v->SwathHeightY,
3679 v->SwathHeightC,
3680 v->DETBufferSizeY,
3681 v->DETBufferSizeC,
3682 v->SingleDPPViewportSizeSupportPerPlane,
3683 &v->ViewportSizeSupport[0][0]);
3684
3685 for (i = start_state; i < v->soc.num_states; i++) {
3686 for (j = 0; j < 2; j++) {
3687 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
3688 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
3689 v->RequiredDISPCLK[i][j] = 0.0;
3690 v->DISPCLK_DPPCLK_Support[i][j] = true;
3691 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3692 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3693 * (1.0 + v->DISPCLKRampingMargin / 100.0);
3694 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3695 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3696 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3697 }
3698 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3699 * (1 + v->DISPCLKRampingMargin / 100.0);
3700 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3701 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3702 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3703 }
3704 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3705 * (1 + v->DISPCLKRampingMargin / 100.0);
3706 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3707 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3708 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3709 }
3710
3711 if (v->ODMCombinePolicy == dm_odm_combine_policy_none) {
3712 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
3713 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
3714 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
3715 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3716 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3717 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
3718 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
3719 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
3720 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
3721 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
3722 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3723 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3724 } else {
3725 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
3726 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
3727 }
3728 if (v->DSCEnabled[k] && v->HActive[k] > DCN30_MAX_DSC_IMAGE_WIDTH
3729 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
3730 if (v->HActive[k] / 2 > DCN30_MAX_DSC_IMAGE_WIDTH) {
3731 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
3732 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
3733 } else {
3734 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3735 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3736 }
3737 }
3738 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN30_MAX_FMT_420_BUFFER_WIDTH
3739 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
3740 if (v->HActive[k] / 2 > DCN30_MAX_FMT_420_BUFFER_WIDTH) {
3741 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
3742 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
3743 } else {
3744 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3745 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3746 }
3747 }
3748 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
3749 v->MPCCombine[i][j][k] = false;
3750 v->NoOfDPP[i][j][k] = 4;
3751 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
3752 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
3753 v->MPCCombine[i][j][k] = false;
3754 v->NoOfDPP[i][j][k] = 2;
3755 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
3756 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
3757 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= v->MaxDppclkRoundedDownToDFSGranularity
3758 && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
3759 v->MPCCombine[i][j][k] = false;
3760 v->NoOfDPP[i][j][k] = 1;
3761 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3762 } else {
3763 v->MPCCombine[i][j][k] = true;
3764 v->NoOfDPP[i][j][k] = 2;
3765 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
3766 }
3767 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
3768 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3769 > v->MaxDppclkRoundedDownToDFSGranularity) || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
3770 v->DISPCLK_DPPCLK_Support[i][j] = false;
3771 }
3772 }
3773 v->TotalNumberOfActiveDPP[i][j] = 0;
3774 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
3775 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3776 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
3777 if (v->NoOfDPP[i][j][k] == 1)
3778 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
3779 }
3780 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never) {
3781 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
3782 double BWOfNonSplitPlaneOfMaximumBandwidth = 0;
3783 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
3784 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
3785 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
3786 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3787 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
3788 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
3789 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
3790 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
3791 }
3792 }
3793 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
3794 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
3795 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
3796 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
3797 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
3798 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
3799 }
3800 }
3801 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
3802 v->RequiredDISPCLK[i][j] = 0.0;
3803 v->DISPCLK_DPPCLK_Support[i][j] = true;
3804 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3805 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
3806 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
3807 v->MPCCombine[i][j][k] = true;
3808 v->NoOfDPP[i][j][k] = 2;
3809 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
3810 } else {
3811 v->MPCCombine[i][j][k] = false;
3812 v->NoOfDPP[i][j][k] = 1;
3813 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3814 }
3815 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
3816 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3817 * (1.0 + v->DISPCLKRampingMargin / 100.0);
3818 } else {
3819 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3820 }
3821 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
3822 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3823 > v->MaxDppclkRoundedDownToDFSGranularity) || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
3824 v->DISPCLK_DPPCLK_Support[i][j] = false;
3825 }
3826 }
3827 v->TotalNumberOfActiveDPP[i][j] = 0.0;
3828 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3829 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
3830 }
3831 }
3832 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
3833 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
3834 v->DISPCLK_DPPCLK_Support[i][j] = false;
3835 }
3836 }
3837 }
3838
3839 /*Total Available Pipes Support Check*/
3840
3841 for (i = start_state; i < v->soc.num_states; i++) {
3842 for (j = 0; j < 2; j++) {
3843 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
3844 v->TotalAvailablePipesSupport[i][j] = true;
3845 } else {
3846 v->TotalAvailablePipesSupport[i][j] = false;
3847 }
3848 }
3849 }
3850 /*Display IO and DSC Support Check*/
3851
3852 v->NonsupportedDSCInputBPC = false;
3853 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3854 if (!(v->DSCInputBitPerComponent[k] == 12.0
3855 || v->DSCInputBitPerComponent[k] == 10.0
3856 || v->DSCInputBitPerComponent[k] == 8.0)) {
3857 v->NonsupportedDSCInputBPC = true;
3858 }
3859 }
3860
3861 /*Number Of DSC Slices*/
3862 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3863 if (v->BlendingAndTiming[k] == k) {
3864 if (v->PixelClockBackEnd[k] > 3200) {
3865 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
3866 } else if (v->PixelClockBackEnd[k] > 1360) {
3867 v->NumberOfDSCSlices[k] = 8;
3868 } else if (v->PixelClockBackEnd[k] > 680) {
3869 v->NumberOfDSCSlices[k] = 4;
3870 } else if (v->PixelClockBackEnd[k] > 340) {
3871 v->NumberOfDSCSlices[k] = 2;
3872 } else {
3873 v->NumberOfDSCSlices[k] = 1;
3874 }
3875 } else {
3876 v->NumberOfDSCSlices[k] = 0;
3877 }
3878 }
3879
3880 for (i = start_state; i < v->soc.num_states; i++) {
3881 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3882 v->RequiresDSC[i][k] = false;
3883 v->RequiresFEC[i][k] = false;
3884 if (v->BlendingAndTiming[k] == k) {
3885 if (v->Output[k] == dm_hdmi) {
3886 v->RequiresDSC[i][k] = false;
3887 v->RequiresFEC[i][k] = false;
3888 v->OutputBppPerState[i][k] = TruncToValidBPP(
3889 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
3890 3,
3891 v->HTotal[k],
3892 v->HActive[k],
3893 v->PixelClockBackEnd[k],
3894 v->ForcedOutputLinkBPP[k],
3895 false,
3896 v->Output[k],
3897 v->OutputFormat[k],
3898 v->DSCInputBitPerComponent[k],
3899 v->NumberOfDSCSlices[k],
3900 v->AudioSampleRate[k],
3901 v->AudioSampleLayout[k],
3902 v->ODMCombineEnablePerState[i][k]);
3903 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) {
3904 if (v->DSCEnable[k] == true) {
3905 v->RequiresDSC[i][k] = true;
3906 v->LinkDSCEnable = true;
3907 if (v->Output[k] == dm_dp) {
3908 v->RequiresFEC[i][k] = true;
3909 } else {
3910 v->RequiresFEC[i][k] = false;
3911 }
3912 } else {
3913 v->RequiresDSC[i][k] = false;
3914 v->LinkDSCEnable = false;
3915 v->RequiresFEC[i][k] = false;
3916 }
3917
3918 v->Outbpp = BPP_INVALID;
3919 if (v->PHYCLKPerState[i] >= 270.0) {
3920 v->Outbpp = TruncToValidBPP(
3921 (1.0 - v->Downspreading / 100.0) * 2700,
3922 v->OutputLinkDPLanes[k],
3923 v->HTotal[k],
3924 v->HActive[k],
3925 v->PixelClockBackEnd[k],
3926 v->ForcedOutputLinkBPP[k],
3927 v->LinkDSCEnable,
3928 v->Output[k],
3929 v->OutputFormat[k],
3930 v->DSCInputBitPerComponent[k],
3931 v->NumberOfDSCSlices[k],
3932 v->AudioSampleRate[k],
3933 v->AudioSampleLayout[k],
3934 v->ODMCombineEnablePerState[i][k]);
3935 v->OutputBppPerState[i][k] = v->Outbpp;
3936 // TODO: Need some other way to handle this nonsense
3937 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
3938 }
3939 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
3940 v->Outbpp = TruncToValidBPP(
3941 (1.0 - v->Downspreading / 100.0) * 5400,
3942 v->OutputLinkDPLanes[k],
3943 v->HTotal[k],
3944 v->HActive[k],
3945 v->PixelClockBackEnd[k],
3946 v->ForcedOutputLinkBPP[k],
3947 v->LinkDSCEnable,
3948 v->Output[k],
3949 v->OutputFormat[k],
3950 v->DSCInputBitPerComponent[k],
3951 v->NumberOfDSCSlices[k],
3952 v->AudioSampleRate[k],
3953 v->AudioSampleLayout[k],
3954 v->ODMCombineEnablePerState[i][k]);
3955 v->OutputBppPerState[i][k] = v->Outbpp;
3956 // TODO: Need some other way to handle this nonsense
3957 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
3958 }
3959 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
3960 v->Outbpp = TruncToValidBPP(
3961 (1.0 - v->Downspreading / 100.0) * 8100,
3962 v->OutputLinkDPLanes[k],
3963 v->HTotal[k],
3964 v->HActive[k],
3965 v->PixelClockBackEnd[k],
3966 v->ForcedOutputLinkBPP[k],
3967 v->LinkDSCEnable,
3968 v->Output[k],
3969 v->OutputFormat[k],
3970 v->DSCInputBitPerComponent[k],
3971 v->NumberOfDSCSlices[k],
3972 v->AudioSampleRate[k],
3973 v->AudioSampleLayout[k],
3974 v->ODMCombineEnablePerState[i][k]);
3975 if (v->Outbpp == BPP_INVALID && v->ForcedOutputLinkBPP[k] == 0) {
3976 //if (v->Outbpp == BPP_INVALID && v->DSCEnabled[k] == dm_dsc_enable_only_if_necessary && v->ForcedOutputLinkBPP[k] == 0) {
3977 v->RequiresDSC[i][k] = true;
3978 v->LinkDSCEnable = true;
3979 if (v->Output[k] == dm_dp) {
3980 v->RequiresFEC[i][k] = true;
3981 }
3982 v->Outbpp = TruncToValidBPP(
3983 (1.0 - v->Downspreading / 100.0) * 8100,
3984 v->OutputLinkDPLanes[k],
3985 v->HTotal[k],
3986 v->HActive[k],
3987 v->PixelClockBackEnd[k],
3988 v->ForcedOutputLinkBPP[k],
3989 v->LinkDSCEnable,
3990 v->Output[k],
3991 v->OutputFormat[k],
3992 v->DSCInputBitPerComponent[k],
3993 v->NumberOfDSCSlices[k],
3994 v->AudioSampleRate[k],
3995 v->AudioSampleLayout[k],
3996 v->ODMCombineEnablePerState[i][k]);
3997 }
3998 v->OutputBppPerState[i][k] = v->Outbpp;
3999 // TODO: Need some other way to handle this nonsense
4000 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4001 }
4002 }
4003 } else {
4004 v->OutputBppPerState[i][k] = 0;
4005 }
4006 }
4007 }
4008 for (i = start_state; i < v->soc.num_states; i++) {
4009 v->DIOSupport[i] = true;
4010 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4011 if (!v->skip_dio_check[k] && v->BlendingAndTiming[k] == k && (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)
4012 && (v->OutputBppPerState[i][k] == 0
4013 || (v->OutputFormat[k] == dm_420 && v->Interlace[k] == true && v->ProgressiveToInterlaceUnitInOPP == true))) {
4014 v->DIOSupport[i] = false;
4015 }
4016 }
4017 }
4018
4019 for (i = start_state; i < v->soc.num_states; ++i) {
4020 v->ODMCombine4To1SupportCheckOK[i] = true;
4021 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4022 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4023 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)) {
4024 v->ODMCombine4To1SupportCheckOK[i] = false;
4025 }
4026 }
4027 }
4028
4029 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4030
4031 for (i = start_state; i < v->soc.num_states; i++) {
4032 v->NotEnoughDSCUnits[i] = false;
4033 v->TotalDSCUnitsRequired = 0.0;
4034 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4035 if (v->RequiresDSC[i][k] == true) {
4036 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4037 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4038 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4039 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4040 } else {
4041 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4042 }
4043 }
4044 }
4045 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4046 v->NotEnoughDSCUnits[i] = true;
4047 }
4048 }
4049 /*DSC Delay per state*/
4050
4051 for (i = start_state; i < v->soc.num_states; i++) {
4052 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4053 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4054 v->BPP = 0.0;
4055 } else {
4056 v->BPP = v->OutputBppPerState[i][k];
4057 }
4058 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4059 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4060 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4061 v->DSCInputBitPerComponent[k],
4062 v->BPP,
4063 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4064 v->NumberOfDSCSlices[k],
4065 v->OutputFormat[k],
4066 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4067 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4068 v->DSCDelayPerState[i][k] = 2.0
4069 * dscceComputeDelay(
4070 v->DSCInputBitPerComponent[k],
4071 v->BPP,
4072 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4073 v->NumberOfDSCSlices[k] / 2,
4074 v->OutputFormat[k],
4075 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4076 } else {
4077 v->DSCDelayPerState[i][k] = 4.0
4078 * (dscceComputeDelay(
4079 v->DSCInputBitPerComponent[k],
4080 v->BPP,
4081 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4082 v->NumberOfDSCSlices[k] / 4,
4083 v->OutputFormat[k],
4084 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4085 }
4086 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4087 } else {
4088 v->DSCDelayPerState[i][k] = 0.0;
4089 }
4090 }
4091 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4092 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4093 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4094 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4095 }
4096 }
4097 }
4098 }
4099
4100 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4101 //
4102 for (i = start_state; i < mode_lib->soc.num_states; ++i) {
4103 for (j = 0; j <= 1; ++j) {
4104 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4105 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4106 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4107 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4108 }
4109
4110 CalculateSwathAndDETConfiguration(
4111 false,
4112 v->NumberOfActivePlanes,
4113 v->DETBufferSizeInKByte[0],
4114 v->MaximumSwathWidthLuma,
4115 v->MaximumSwathWidthChroma,
4116 v->SourceScan,
4117 v->SourcePixelFormat,
4118 v->SurfaceTiling,
4119 v->ViewportWidth,
4120 v->ViewportHeight,
4121 v->SurfaceWidthY,
4122 v->SurfaceWidthC,
4123 v->SurfaceHeightY,
4124 v->SurfaceHeightC,
4125 v->Read256BlockHeightY,
4126 v->Read256BlockHeightC,
4127 v->Read256BlockWidthY,
4128 v->Read256BlockWidthC,
4129 v->ODMCombineEnableThisState,
4130 v->BlendingAndTiming,
4131 v->BytePerPixelY,
4132 v->BytePerPixelC,
4133 v->BytePerPixelInDETY,
4134 v->BytePerPixelInDETC,
4135 v->HActive,
4136 v->HRatio,
4137 v->HRatioChroma,
4138 v->NoOfDPPThisState,
4139 v->swath_width_luma_ub_this_state,
4140 v->swath_width_chroma_ub_this_state,
4141 v->SwathWidthYThisState,
4142 v->SwathWidthCThisState,
4143 v->SwathHeightYThisState,
4144 v->SwathHeightCThisState,
4145 v->DETBufferSizeYThisState,
4146 v->DETBufferSizeCThisState,
4147 v->dummystring,
4148 &v->ViewportSizeSupport[i][j]);
4149
4150 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4151 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4152 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4153 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4154 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4155 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4156 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4157 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4158 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4159 }
4160
4161 }
4162 }
4163 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4164 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4165 }
4166
4167 for (i = start_state; i < v->soc.num_states; i++) {
4168 for (j = 0; j < 2; j++) {
4169 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4170 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4171 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4172 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4173 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4174 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4175 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4176 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4177 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4178 }
4179
4180 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4181 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4182 if (v->DCCEnable[k] == true) {
4183 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4184 }
4185 }
4186
4187 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4188 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
4189 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4190
4191 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
4192 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
4193 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4194 } else {
4195 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4196 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4197 }
4198
4199 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4200 mode_lib,
4201 v->DCCEnable[k],
4202 v->Read256BlockHeightC[k],
4203 v->Read256BlockWidthY[k],
4204 v->SourcePixelFormat[k],
4205 v->SurfaceTiling[k],
4206 v->BytePerPixelC[k],
4207 v->SourceScan[k],
4208 v->SwathWidthCThisState[k],
4209 v->ViewportHeightChroma[k],
4210 v->GPUVMEnable,
4211 v->HostVMEnable,
4212 v->HostVMMaxNonCachedPageTableLevels,
4213 v->GPUVMMinPageSize,
4214 v->HostVMMinPageSize,
4215 v->PTEBufferSizeInRequestsForChroma,
4216 v->PitchC[k],
4217 0.0,
4218 &v->MacroTileWidthC[k],
4219 &v->MetaRowBytesC,
4220 &v->DPTEBytesPerRowC,
4221 &v->PTEBufferSizeNotExceededC[i][j][k],
4222 &v->dummyinteger7,
4223 &v->dpte_row_height_chroma[k],
4224 &v->dummyinteger28,
4225 &v->dummyinteger26,
4226 &v->dummyinteger23,
4227 &v->meta_row_height_chroma[k],
4228 &v->dummyinteger8,
4229 &v->dummyinteger9,
4230 &v->dummyinteger19,
4231 &v->dummyinteger20,
4232 &v->dummyinteger17,
4233 &v->dummyinteger10,
4234 &v->dummyinteger11);
4235
4236 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4237 mode_lib,
4238 v->VRatioChroma[k],
4239 v->VTAPsChroma[k],
4240 v->Interlace[k],
4241 v->ProgressiveToInterlaceUnitInOPP,
4242 v->SwathHeightCThisState[k],
4243 v->ViewportYStartC[k],
4244 &v->PrefillC[k],
4245 &v->MaxNumSwC[k]);
4246 } else {
4247 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4248 v->PTEBufferSizeInRequestsForChroma = 0;
4249 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4250 v->MetaRowBytesC = 0.0;
4251 v->DPTEBytesPerRowC = 0.0;
4252 v->PrefetchLinesC[i][j][k] = 0.0;
4253 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4254 }
4255 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4256 mode_lib,
4257 v->DCCEnable[k],
4258 v->Read256BlockHeightY[k],
4259 v->Read256BlockWidthY[k],
4260 v->SourcePixelFormat[k],
4261 v->SurfaceTiling[k],
4262 v->BytePerPixelY[k],
4263 v->SourceScan[k],
4264 v->SwathWidthYThisState[k],
4265 v->ViewportHeight[k],
4266 v->GPUVMEnable,
4267 v->HostVMEnable,
4268 v->HostVMMaxNonCachedPageTableLevels,
4269 v->GPUVMMinPageSize,
4270 v->HostVMMinPageSize,
4271 v->PTEBufferSizeInRequestsForLuma,
4272 v->PitchY[k],
4273 v->DCCMetaPitchY[k],
4274 &v->MacroTileWidthY[k],
4275 &v->MetaRowBytesY,
4276 &v->DPTEBytesPerRowY,
4277 &v->PTEBufferSizeNotExceededY[i][j][k],
4278 v->dummyinteger4,
4279 &v->dpte_row_height[k],
4280 &v->dummyinteger29,
4281 &v->dummyinteger27,
4282 &v->dummyinteger24,
4283 &v->meta_row_height[k],
4284 &v->dummyinteger25,
4285 &v->dpte_group_bytes[k],
4286 &v->dummyinteger21,
4287 &v->dummyinteger22,
4288 &v->dummyinteger18,
4289 &v->dummyinteger5,
4290 &v->dummyinteger6);
4291 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4292 mode_lib,
4293 v->VRatio[k],
4294 v->vtaps[k],
4295 v->Interlace[k],
4296 v->ProgressiveToInterlaceUnitInOPP,
4297 v->SwathHeightYThisState[k],
4298 v->ViewportYStartY[k],
4299 &v->PrefillY[k],
4300 &v->MaxNumSwY[k]);
4301 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4302 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4303 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4304
4305 CalculateRowBandwidth(
4306 v->GPUVMEnable,
4307 v->SourcePixelFormat[k],
4308 v->VRatio[k],
4309 v->VRatioChroma[k],
4310 v->DCCEnable[k],
4311 v->HTotal[k] / v->PixelClock[k],
4312 v->MetaRowBytesY,
4313 v->MetaRowBytesC,
4314 v->meta_row_height[k],
4315 v->meta_row_height_chroma[k],
4316 v->DPTEBytesPerRowY,
4317 v->DPTEBytesPerRowC,
4318 v->dpte_row_height[k],
4319 v->dpte_row_height_chroma[k],
4320 &v->meta_row_bandwidth[i][j][k],
4321 &v->dpte_row_bandwidth[i][j][k]);
4322 }
4323 v->UrgLatency[i] = CalculateUrgentLatency(
4324 v->UrgentLatencyPixelDataOnly,
4325 v->UrgentLatencyPixelMixedWithVMData,
4326 v->UrgentLatencyVMDataOnly,
4327 v->DoUrgentLatencyAdjustment,
4328 v->UrgentLatencyAdjustmentFabricClockComponent,
4329 v->UrgentLatencyAdjustmentFabricClockReference,
4330 v->FabricClockPerState[i]);
4331
4332 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4333 CalculateUrgentBurstFactor(
4334 v->swath_width_luma_ub_this_state[k],
4335 v->swath_width_chroma_ub_this_state[k],
4336 v->DETBufferSizeInKByte[0],
4337 v->SwathHeightYThisState[k],
4338 v->SwathHeightCThisState[k],
4339 v->HTotal[k] / v->PixelClock[k],
4340 v->UrgLatency[i],
4341 v->CursorBufferSize,
4342 v->CursorWidth[k][0],
4343 v->CursorBPP[k][0],
4344 v->VRatio[k],
4345 v->VRatioChroma[k],
4346 v->BytePerPixelInDETY[k],
4347 v->BytePerPixelInDETC[k],
4348 v->DETBufferSizeYThisState[k],
4349 v->DETBufferSizeCThisState[k],
4350 &v->UrgentBurstFactorCursor[k],
4351 &v->UrgentBurstFactorLuma[k],
4352 &v->UrgentBurstFactorChroma[k],
4353 &NotUrgentLatencyHiding[k]);
4354 }
4355
4356 v->NotUrgentLatencyHiding[i][j] = false;
4357 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4358 if (NotUrgentLatencyHiding[k]) {
4359 v->NotUrgentLatencyHiding[i][j] = true;
4360 }
4361 }
4362
4363 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4364 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
4365 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
4366 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
4367 }
4368
4369 v->TotalVActivePixelBandwidth[i][j] = 0;
4370 v->TotalVActiveCursorBandwidth[i][j] = 0;
4371 v->TotalMetaRowBandwidth[i][j] = 0;
4372 v->TotalDPTERowBandwidth[i][j] = 0;
4373 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4374 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
4375 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
4376 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
4377 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
4378 }
4379
4380 CalculateDCFCLKDeepSleep(
4381 mode_lib,
4382 v->NumberOfActivePlanes,
4383 v->BytePerPixelY,
4384 v->BytePerPixelC,
4385 v->VRatio,
4386 v->VRatioChroma,
4387 v->SwathWidthYThisState,
4388 v->SwathWidthCThisState,
4389 v->NoOfDPPThisState,
4390 v->HRatio,
4391 v->HRatioChroma,
4392 v->PixelClock,
4393 v->PSCL_FACTOR,
4394 v->PSCL_FACTOR_CHROMA,
4395 v->RequiredDPPCLKThisState,
4396 v->ReadBandwidthLuma,
4397 v->ReadBandwidthChroma,
4398 v->ReturnBusWidth,
4399 &v->ProjectedDCFCLKDeepSleep[i][j]);
4400 }
4401 }
4402
4403 //Calculate Return BW
4404
4405 for (i = start_state; i < mode_lib->soc.num_states; ++i) {
4406 for (j = 0; j <= 1; ++j) {
4407 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4408 if (v->BlendingAndTiming[k] == k) {
4409 if (v->WritebackEnable[k] == true) {
4410 v->WritebackDelayTime[k] = v->WritebackLatency
4411 + CalculateWriteBackDelay(
4412 v->WritebackPixelFormat[k],
4413 v->WritebackHRatio[k],
4414 v->WritebackVRatio[k],
4415 v->WritebackVTaps[k],
4416 v->WritebackDestinationWidth[k],
4417 v->WritebackDestinationHeight[k],
4418 v->WritebackSourceHeight[k],
4419 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
4420 } else {
4421 v->WritebackDelayTime[k] = 0.0;
4422 }
4423 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4424 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
4425 v->WritebackDelayTime[k] = dml_max(
4426 v->WritebackDelayTime[k],
4427 v->WritebackLatency
4428 + CalculateWriteBackDelay(
4429 v->WritebackPixelFormat[m],
4430 v->WritebackHRatio[m],
4431 v->WritebackVRatio[m],
4432 v->WritebackVTaps[m],
4433 v->WritebackDestinationWidth[m],
4434 v->WritebackDestinationHeight[m],
4435 v->WritebackSourceHeight[m],
4436 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
4437 }
4438 }
4439 }
4440 }
4441 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4442 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4443 if (v->BlendingAndTiming[k] == m) {
4444 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
4445 }
4446 }
4447 }
4448 v->MaxMaxVStartup[i][j] = 0;
4449 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4450 v->MaximumVStartup[i][j][k] = v->VTotal[k] - v->VActive[k]
4451 - dml_max(1.0, dml_ceil(1.0 * v->WritebackDelayTime[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0));
4452 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
4453 }
4454 }
4455 }
4456
4457 ReorderingBytes = v->NumberOfChannels
4458 * dml_max3(
4459 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
4460 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
4461 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
4462 v->FinalDRAMClockChangeLatency = (v->DRAMClockChangeLatencyOverride > 0 ? v->DRAMClockChangeLatencyOverride : v->DRAMClockChangeLatency);
4463
4464 for (i = start_state; i < mode_lib->soc.num_states; ++i) {
4465 for (j = 0; j <= 1; ++j) {
4466 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
4467 }
4468 }
4469
4470 if (v->UseMinimumRequiredDCFCLK == true) {
4471 UseMinimumDCFCLK(mode_lib, v, MaxPrefetchMode, ReorderingBytes);
4472
4473 if (v->ClampMinDCFCLK) {
4474 /* Clamp calculated values to actual minimum */
4475 for (i = start_state; i < mode_lib->soc.num_states; ++i) {
4476 for (j = 0; j <= 1; ++j) {
4477 if (v->DCFCLKState[i][j] < mode_lib->soc.min_dcfclk) {
4478 v->DCFCLKState[i][j] = mode_lib->soc.min_dcfclk;
4479 }
4480 }
4481 }
4482 }
4483 }
4484
4485 for (i = start_state; i < mode_lib->soc.num_states; ++i) {
4486 for (j = 0; j <= 1; ++j) {
4487 v->IdealSDPPortBandwidthPerState[i][j] = dml_min3(
4488 v->ReturnBusWidth * v->DCFCLKState[i][j],
4489 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth,
4490 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
4491 if (v->HostVMEnable != true) {
4492 v->ReturnBWPerState[i][j] = v->IdealSDPPortBandwidthPerState[i][j] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly
4493 / 100;
4494 } else {
4495 v->ReturnBWPerState[i][j] = v->IdealSDPPortBandwidthPerState[i][j]
4496 * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100;
4497 }
4498 }
4499 }
4500
4501 //Re-ordering Buffer Support Check
4502
4503 for (i = start_state; i < mode_lib->soc.num_states; ++i) {
4504 for (j = 0; j <= 1; ++j) {
4505 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
4506 > (v->RoundTripPingLatencyCycles + 32) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
4507 v->ROBSupport[i][j] = true;
4508 } else {
4509 v->ROBSupport[i][j] = false;
4510 }
4511 }
4512 }
4513
4514 //Vertical Active BW support check
4515
4516 MaxTotalVActiveRDBandwidth = 0;
4517 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4518 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4519 }
4520
4521 for (i = start_state; i < mode_lib->soc.num_states; ++i) {
4522 for (j = 0; j <= 1; ++j) {
4523 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
4524 v->IdealSDPPortBandwidthPerState[i][j] * v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
4525 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation
4526 / 100);
4527 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
4528 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
4529 } else {
4530 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
4531 }
4532 }
4533 }
4534
4535 //Prefetch Check
4536
4537 for (i = start_state; i < mode_lib->soc.num_states; ++i) {
4538 for (j = 0; j <= 1; ++j) {
4539 int NextPrefetchModeState = MinPrefetchMode;
4540
4541 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
4542
4543 v->BandwidthWithoutPrefetchSupported[i][j] = true;
4544 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j] + v->TotalDPTERowBandwidth[i][j]
4545 > v->ReturnBWPerState[i][j] || v->NotUrgentLatencyHiding[i][j]) {
4546 v->BandwidthWithoutPrefetchSupported[i][j] = false;
4547 }
4548
4549 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4550 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4551 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4552 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4553 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4554 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4555 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4556 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4557 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4558 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4559 v->ODMCombineEnabled[k] = v->ODMCombineEnablePerState[i][k];
4560 }
4561
4562 v->ExtraLatency = CalculateExtraLatency(
4563 v->RoundTripPingLatencyCycles,
4564 ReorderingBytes,
4565 v->DCFCLKState[i][j],
4566 v->TotalNumberOfActiveDPP[i][j],
4567 v->PixelChunkSizeInKByte,
4568 v->TotalNumberOfDCCActiveDPP[i][j],
4569 v->MetaChunkSize,
4570 v->ReturnBWPerState[i][j],
4571 v->GPUVMEnable,
4572 v->HostVMEnable,
4573 v->NumberOfActivePlanes,
4574 v->NoOfDPPThisState,
4575 v->dpte_group_bytes,
4576 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4577 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4578 v->HostVMMinPageSize,
4579 v->HostVMMaxNonCachedPageTableLevels);
4580
4581 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
4582 do {
4583 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
4584 v->MaxVStartup = v->NextMaxVStartup;
4585
4586 v->TWait = CalculateTWait(v->PrefetchModePerState[i][j], v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
4587
4588 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4589 Pipe myPipe = { 0 };
4590
4591 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
4592 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
4593 myPipe.PixelClock = v->PixelClock[k];
4594 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
4595 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
4596 myPipe.ScalerEnabled = v->ScalerEnabled[k];
4597 myPipe.SourceScan = v->SourceScan[k];
4598 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
4599 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
4600 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
4601 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
4602 myPipe.InterlaceEnable = v->Interlace[k];
4603 myPipe.NumberOfCursors = v->NumberOfCursors[k];
4604 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
4605 myPipe.HTotal = v->HTotal[k];
4606 myPipe.DCCEnable = v->DCCEnable[k];
4607 myPipe.ODMCombineEnabled = !!v->ODMCombineEnabled[k];
4608
4609 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
4610 mode_lib,
4611 k,
4612 &myPipe,
4613 v->DSCDelayPerState[i][k],
4614 v->SwathWidthYThisState[k] / v->HRatio[k],
4615 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
4616 v->MaximumVStartup[i][j][k],
4617 v->UrgLatency[i],
4618 v->ExtraLatency,
4619 v->TimeCalc,
4620 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
4621 v->MetaRowBytes[i][j][k],
4622 v->DPTEBytesPerRow[i][j][k],
4623 v->PrefetchLinesY[i][j][k],
4624 v->SwathWidthYThisState[k],
4625 v->BytePerPixelY[k],
4626 v->PrefillY[k],
4627 v->MaxNumSwY[k],
4628 v->PrefetchLinesC[i][j][k],
4629 v->SwathWidthCThisState[k],
4630 v->PrefillC[k],
4631 v->MaxNumSwC[k],
4632 v->swath_width_luma_ub_this_state[k],
4633 v->swath_width_chroma_ub_this_state[k],
4634 v->SwathHeightYThisState[k],
4635 v->SwathHeightCThisState[k],
4636 v->TWait,
4637 &v->LineTimesForPrefetch[k],
4638 &v->PrefetchBW[k],
4639 &v->LinesForMetaPTE[k],
4640 &v->LinesForMetaAndDPTERow[k],
4641 &v->VRatioPreY[i][j][k],
4642 &v->VRatioPreC[i][j][k],
4643 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
4644 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
4645 &v->NoTimeForDynamicMetadata[i][j][k]);
4646 }
4647
4648 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4649 CalculateUrgentBurstFactor(
4650 v->swath_width_luma_ub_this_state[k],
4651 v->swath_width_chroma_ub_this_state[k],
4652 v->DETBufferSizeInKByte[0],
4653 v->SwathHeightYThisState[k],
4654 v->SwathHeightCThisState[k],
4655 v->HTotal[k] / v->PixelClock[k],
4656 v->UrgLatency[i],
4657 v->CursorBufferSize,
4658 v->CursorWidth[k][0],
4659 v->CursorBPP[k][0],
4660 v->VRatioPreY[i][j][k],
4661 v->VRatioPreC[i][j][k],
4662 v->BytePerPixelInDETY[k],
4663 v->BytePerPixelInDETC[k],
4664 v->DETBufferSizeYThisState[k],
4665 v->DETBufferSizeCThisState[k],
4666 &v->UrgentBurstFactorCursorPre[k],
4667 &v->UrgentBurstFactorLumaPre[k],
4668 &v->UrgentBurstFactorChromaPre[k],
4669 &v->NoUrgentLatencyHidingPre[k]);
4670 }
4671
4672 v->MaximumReadBandwidthWithPrefetch = 0.0;
4673 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4674 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k])
4675 * v->VRatioPreY[i][j][k];
4676
4677 v->MaximumReadBandwidthWithPrefetch = v->MaximumReadBandwidthWithPrefetch
4678 + dml_max4(
4679 v->VActivePixelBandwidth[i][j][k],
4680 v->VActiveCursorBandwidth[i][j][k]
4681 + v->NoOfDPP[i][j][k] * (v->meta_row_bandwidth[i][j][k] + v->dpte_row_bandwidth[i][j][k]),
4682 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
4683 v->NoOfDPP[i][j][k]
4684 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] * v->UrgentBurstFactorLumaPre[k]
4685 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
4686 * v->UrgentBurstFactorChromaPre[k])
4687 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
4688 }
4689
4690 v->NotEnoughUrgentLatencyHidingPre = false;
4691 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4692 if (v->NoUrgentLatencyHidingPre[k] == true) {
4693 v->NotEnoughUrgentLatencyHidingPre = true;
4694 }
4695 }
4696
4697 v->PrefetchSupported[i][j] = true;
4698 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
4699 || v->NotEnoughUrgentLatencyHidingPre == 1) {
4700 v->PrefetchSupported[i][j] = false;
4701 }
4702 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4703 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
4704 || v->NoTimeForPrefetch[i][j][k] == true) {
4705 v->PrefetchSupported[i][j] = false;
4706 }
4707 }
4708
4709 v->DynamicMetadataSupported[i][j] = true;
4710 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4711 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
4712 v->DynamicMetadataSupported[i][j] = false;
4713 }
4714 }
4715
4716 v->VRatioInPrefetchSupported[i][j] = true;
4717 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4718 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
4719 v->VRatioInPrefetchSupported[i][j] = false;
4720 }
4721 }
4722 v->AnyLinesForVMOrRowTooLarge = false;
4723 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4724 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
4725 v->AnyLinesForVMOrRowTooLarge = true;
4726 }
4727 }
4728
4729 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
4730 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
4731 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4732 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
4733 - dml_max(
4734 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
4735 v->NoOfDPP[i][j][k]
4736 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] * v->UrgentBurstFactorLumaPre[k]
4737 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
4738 * v->UrgentBurstFactorChromaPre[k])
4739 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
4740 }
4741 v->TotImmediateFlipBytes = 0.0;
4742 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4743 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes + v->NoOfDPP[i][j][k] * (v->PDEAndMetaPTEBytesPerFrame[i][j][k]
4744 + v->MetaRowBytes[i][j][k] + v->DPTEBytesPerRow[i][j][k]);
4745 }
4746
4747 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4748 CalculateFlipSchedule(
4749 mode_lib,
4750 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4751 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4752 v->ExtraLatency,
4753 v->UrgLatency[i],
4754 v->GPUVMMaxPageTableLevels,
4755 v->HostVMEnable,
4756 v->HostVMMaxNonCachedPageTableLevels,
4757 v->GPUVMEnable,
4758 v->HostVMMinPageSize,
4759 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
4760 v->MetaRowBytes[i][j][k],
4761 v->DPTEBytesPerRow[i][j][k],
4762 v->BandwidthAvailableForImmediateFlip,
4763 v->TotImmediateFlipBytes,
4764 v->SourcePixelFormat[k],
4765 v->HTotal[k] / v->PixelClock[k],
4766 v->VRatio[k],
4767 v->VRatioChroma[k],
4768 v->Tno_bw[k],
4769 v->DCCEnable[k],
4770 v->dpte_row_height[k],
4771 v->meta_row_height[k],
4772 v->dpte_row_height_chroma[k],
4773 v->meta_row_height_chroma[k],
4774 &v->DestinationLinesToRequestVMInImmediateFlip[k],
4775 &v->DestinationLinesToRequestRowInImmediateFlip[k],
4776 &v->final_flip_bw[k],
4777 &v->ImmediateFlipSupportedForPipe[k]);
4778 }
4779 v->total_dcn_read_bw_with_flip = 0.0;
4780 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4781 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
4782 + dml_max3(
4783 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
4784 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
4785 + v->VActiveCursorBandwidth[i][j][k],
4786 v->NoOfDPP[i][j][k]
4787 * (v->final_flip_bw[k]
4788 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
4789 * v->UrgentBurstFactorLumaPre[k]
4790 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
4791 * v->UrgentBurstFactorChromaPre[k])
4792 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
4793 }
4794 v->ImmediateFlipSupportedForState[i][j] = true;
4795 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
4796 v->ImmediateFlipSupportedForState[i][j] = false;
4797 }
4798 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4799 if (v->ImmediateFlipSupportedForPipe[k] == false) {
4800 v->ImmediateFlipSupportedForState[i][j] = false;
4801 }
4802 }
4803 } else {
4804 v->ImmediateFlipSupportedForState[i][j] = false;
4805 }
4806 if (v->MaxVStartup <= 13 || v->AnyLinesForVMOrRowTooLarge == false) {
4807 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
4808 NextPrefetchModeState = NextPrefetchModeState + 1;
4809 } else {
4810 v->NextMaxVStartup = v->NextMaxVStartup - 1;
4811 }
4812 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
4813 && ((v->HostVMEnable == false && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
4814 || v->ImmediateFlipSupportedForState[i][j] == true))
4815 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
4816
4817 CalculateWatermarksAndDRAMSpeedChangeSupport(
4818 mode_lib,
4819 v->PrefetchModePerState[i][j],
4820 v->DCFCLKState[i][j],
4821 v->ReturnBWPerState[i][j],
4822 v->UrgLatency[i],
4823 v->ExtraLatency,
4824 v->SOCCLKPerState[i],
4825 v->ProjectedDCFCLKDeepSleep[i][j],
4826 v->NoOfDPPThisState,
4827 v->RequiredDPPCLKThisState,
4828 v->DETBufferSizeYThisState,
4829 v->DETBufferSizeCThisState,
4830 v->SwathHeightYThisState,
4831 v->SwathHeightCThisState,
4832 v->SwathWidthYThisState,
4833 v->SwathWidthCThisState,
4834 v->BytePerPixelInDETY,
4835 v->BytePerPixelInDETC,
4836 &v->DRAMClockChangeSupport[i][j]);
4837 }
4838 }
4839
4840 /*PTE Buffer Size Check*/
4841
4842 for (i = start_state; i < v->soc.num_states; i++) {
4843 for (j = 0; j < 2; j++) {
4844 v->PTEBufferSizeNotExceeded[i][j] = true;
4845 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4846 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
4847 v->PTEBufferSizeNotExceeded[i][j] = false;
4848 }
4849 }
4850 }
4851 }
4852 /*Cursor Support Check*/
4853
4854 v->CursorSupport = true;
4855 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4856 if (v->CursorWidth[k][0] > 0.0) {
4857 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
4858 v->CursorSupport = false;
4859 }
4860 }
4861 }
4862 /*Valid Pitch Check*/
4863
4864 v->PitchSupport = true;
4865 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4866 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
4867 if (v->DCCEnable[k] == true) {
4868 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
4869 } else {
4870 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
4871 }
4872 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
4873 && v->SourcePixelFormat[k] != dm_rgbe && v->SourcePixelFormat[k] != dm_mono_8) {
4874 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
4875 if (v->DCCEnable[k] == true) {
4876 v->AlignedDCCMetaPitchC[k] = dml_ceil(dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]), 64.0 * v->Read256BlockWidthC[k]);
4877 } else {
4878 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
4879 }
4880 } else {
4881 v->AlignedCPitch[k] = v->PitchC[k];
4882 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
4883 }
4884 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k] || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k]
4885 || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
4886 v->PitchSupport = false;
4887 }
4888 }
4889
4890 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4891 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k])
4892 ViewportExceedsSurface = true;
4893
4894 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
4895 && v->SourcePixelFormat[k] != dm_444_8 && v->SourcePixelFormat[k] != dm_rgbe) {
4896 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k] || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
4897 ViewportExceedsSurface = true;
4898 }
4899 }
4900 }
4901 /*Mode Support, Voltage State and SOC Configuration*/
4902
4903 for (i = v->soc.num_states - 1; i >= start_state; i--) {
4904 for (j = 0; j < 2; j++) {
4905 if (v->ScaleRatioAndTapsSupport == 1 && v->SourceFormatPixelAndScanSupport == 1 && v->ViewportSizeSupport[i][j] == 1
4906 && v->DIOSupport[i] == 1 && v->ODMCombine4To1SupportCheckOK[i] == 1
4907 && v->NotEnoughDSCUnits[i] == 0
4908 && v->DTBCLKRequiredMoreThanSupported[i] == 0
4909 && v->ROBSupport[i][j] == 1 && v->DISPCLK_DPPCLK_Support[i][j] == 1 && v->TotalAvailablePipesSupport[i][j] == 1
4910 && EnoughWritebackUnits == 1 && WritebackModeSupport == 1
4911 && v->WritebackLatencySupport == 1 && v->WritebackScaleRatioAndTapsSupport == 1 && v->CursorSupport == 1 && v->PitchSupport == 1
4912 && ViewportExceedsSurface == 0 && v->PrefetchSupported[i][j] == 1 && v->DynamicMetadataSupported[i][j] == 1
4913 && v->TotalVerticalActiveBandwidthSupport[i][j] == 1 && v->VRatioInPrefetchSupported[i][j] == 1
4914 && v->PTEBufferSizeNotExceeded[i][j] == 1 && v->NonsupportedDSCInputBPC == 0
4915 && ((v->HostVMEnable == 0 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
4916 || v->ImmediateFlipSupportedForState[i][j] == true)) {
4917 v->ModeSupport[i][j] = true;
4918 } else {
4919 v->ModeSupport[i][j] = false;
4920 }
4921 }
4922 }
4923 {
4924 unsigned int MaximumMPCCombine = 0;
4925 for (i = v->soc.num_states; i >= start_state; i--) {
4926 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
4927 v->VoltageLevel = i;
4928 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
4929 if (v->ModeSupport[i][1] == true) {
4930 MaximumMPCCombine = 1;
4931 } else {
4932 MaximumMPCCombine = 0;
4933 }
4934 }
4935 }
4936 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
4937 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4938 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
4939 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
4940 }
4941 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
4942 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
4943 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
4944 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
4945 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
4946 v->maxMpcComb = MaximumMPCCombine;
4947 }
4948 }
4949
CalculateWatermarksAndDRAMSpeedChangeSupport(struct display_mode_lib * mode_lib,unsigned int PrefetchMode,double DCFCLK,double ReturnBW,double UrgentLatency,double ExtraLatency,double SOCCLK,double DCFCLKDeepSleep,unsigned int DPPPerPlane[],double DPPCLK[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],double SwathWidthY[],double SwathWidthC[],double BytePerPixelDETY[],double BytePerPixelDETC[],enum clock_change_support * DRAMClockChangeSupport)4950 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
4951 struct display_mode_lib *mode_lib,
4952 unsigned int PrefetchMode,
4953 double DCFCLK,
4954 double ReturnBW,
4955 double UrgentLatency,
4956 double ExtraLatency,
4957 double SOCCLK,
4958 double DCFCLKDeepSleep,
4959 unsigned int DPPPerPlane[],
4960 double DPPCLK[],
4961 unsigned int DETBufferSizeY[],
4962 unsigned int DETBufferSizeC[],
4963 unsigned int SwathHeightY[],
4964 unsigned int SwathHeightC[],
4965 double SwathWidthY[],
4966 double SwathWidthC[],
4967 double BytePerPixelDETY[],
4968 double BytePerPixelDETC[],
4969 enum clock_change_support *DRAMClockChangeSupport)
4970 {
4971 struct vba_vars_st *v = &mode_lib->vba;
4972 double EffectiveLBLatencyHidingY = 0;
4973 double EffectiveLBLatencyHidingC = 0;
4974 double LinesInDETY[DC__NUM_DPP__MAX] = { 0 };
4975 double LinesInDETC = 0;
4976 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX] = { 0 };
4977 unsigned int LinesInDETCRoundedDownToSwath = 0;
4978 double FullDETBufferingTimeY[DC__NUM_DPP__MAX] = { 0 };
4979 double FullDETBufferingTimeC = 0;
4980 double ActiveDRAMClockChangeLatencyMarginY = 0;
4981 double ActiveDRAMClockChangeLatencyMarginC = 0;
4982 double WritebackDRAMClockChangeLatencyMargin = 0;
4983 double PlaneWithMinActiveDRAMClockChangeMargin = 0;
4984 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 0;
4985 double FullDETBufferingTimeYStutterCriticalPlane = 0;
4986 double TimeToFinishSwathTransferStutterCriticalPlane = 0;
4987 double WritebackDRAMClockChangeLatencyHiding = 0;
4988 unsigned int k, j;
4989
4990 v->TotalActiveDPP = 0;
4991 v->TotalDCCActiveDPP = 0;
4992 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4993 v->TotalActiveDPP = v->TotalActiveDPP + DPPPerPlane[k];
4994 if (v->DCCEnable[k] == true) {
4995 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + DPPPerPlane[k];
4996 }
4997 }
4998
4999 v->UrgentWatermark = UrgentLatency + ExtraLatency;
5000
5001 v->DRAMClockChangeWatermark = v->FinalDRAMClockChangeLatency + v->UrgentWatermark;
5002
5003 v->TotalActiveWriteback = 0;
5004 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5005 if (v->WritebackEnable[k] == true) {
5006 v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
5007 }
5008 }
5009
5010 if (v->TotalActiveWriteback <= 1) {
5011 v->WritebackUrgentWatermark = v->WritebackLatency;
5012 } else {
5013 v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5014 }
5015
5016 if (v->TotalActiveWriteback <= 1) {
5017 v->WritebackDRAMClockChangeWatermark = v->FinalDRAMClockChangeLatency + v->WritebackLatency;
5018 } else {
5019 v->WritebackDRAMClockChangeWatermark = v->FinalDRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5020 }
5021
5022 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5023
5024 v->LBLatencyHidingSourceLinesY = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
5025
5026 v->LBLatencyHidingSourceLinesC = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
5027
5028 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
5029
5030 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
5031
5032 LinesInDETY[k] = (double) DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k];
5033 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5034 FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
5035 if (BytePerPixelDETC[k] > 0) {
5036 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5037 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5038 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k];
5039 } else {
5040 LinesInDETC = 0;
5041 FullDETBufferingTimeC = 999999;
5042 }
5043
5044 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY[k] - v->UrgentWatermark - (v->HTotal[k] / v->PixelClock[k]) * (v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) - v->DRAMClockChangeWatermark;
5045
5046 if (v->NumberOfActivePlanes > 1) {
5047 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k];
5048 }
5049
5050 if (BytePerPixelDETC[k] > 0) {
5051 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC - v->UrgentWatermark - (v->HTotal[k] / v->PixelClock[k]) * (v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) - v->DRAMClockChangeWatermark;
5052
5053 if (v->NumberOfActivePlanes > 1) {
5054 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k];
5055 }
5056 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5057 } else {
5058 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5059 }
5060
5061 if (v->WritebackEnable[k] == true) {
5062
5063 WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
5064 if (v->WritebackPixelFormat[k] == dm_444_64) {
5065 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5066 }
5067 if (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave) {
5068 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding * 2;
5069 }
5070 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
5071 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5072 }
5073 }
5074
5075 v->MinActiveDRAMClockChangeMargin = 999999;
5076 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5077 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5078 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
5079 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
5080 if (v->BlendingAndTiming[k] == k) {
5081 PlaneWithMinActiveDRAMClockChangeMargin = k;
5082 } else {
5083 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
5084 if (v->BlendingAndTiming[k] == j) {
5085 PlaneWithMinActiveDRAMClockChangeMargin = j;
5086 }
5087 }
5088 }
5089 }
5090 }
5091
5092 v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->FinalDRAMClockChangeLatency;
5093
5094 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5095 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5096 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5097 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
5098 }
5099 }
5100
5101 v->TotalNumberOfActiveOTG = 0;
5102 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5103 if (v->BlendingAndTiming[k] == k) {
5104 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
5105 }
5106 }
5107
5108 if (v->MinActiveDRAMClockChangeMargin > 0) {
5109 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5110 } else if (((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0)) {
5111 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5112 } else {
5113 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5114 }
5115
5116 FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[0];
5117 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5118 if (FullDETBufferingTimeY[k] <= FullDETBufferingTimeYStutterCriticalPlane) {
5119 FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[k];
5120 TimeToFinishSwathTransferStutterCriticalPlane = (SwathHeightY[k] - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k])) * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
5121 }
5122 }
5123
5124 v->StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5125 v->StutterEnterPlusExitWatermark = dml_max(v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep, TimeToFinishSwathTransferStutterCriticalPlane);
5126
5127 }
5128
CalculateDCFCLKDeepSleep(struct display_mode_lib * mode_lib,unsigned int NumberOfActivePlanes,int BytePerPixelY[],int BytePerPixelC[],double VRatio[],double VRatioChroma[],double SwathWidthY[],double SwathWidthC[],unsigned int DPPPerPlane[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double DPPCLK[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],int ReturnBusWidth,double * DCFCLKDeepSleep)5129 static void CalculateDCFCLKDeepSleep(
5130 struct display_mode_lib *mode_lib,
5131 unsigned int NumberOfActivePlanes,
5132 int BytePerPixelY[],
5133 int BytePerPixelC[],
5134 double VRatio[],
5135 double VRatioChroma[],
5136 double SwathWidthY[],
5137 double SwathWidthC[],
5138 unsigned int DPPPerPlane[],
5139 double HRatio[],
5140 double HRatioChroma[],
5141 double PixelClock[],
5142 double PSCL_THROUGHPUT[],
5143 double PSCL_THROUGHPUT_CHROMA[],
5144 double DPPCLK[],
5145 double ReadBandwidthLuma[],
5146 double ReadBandwidthChroma[],
5147 int ReturnBusWidth,
5148 double *DCFCLKDeepSleep)
5149 {
5150 double DisplayPipeLineDeliveryTimeLuma = 0;
5151 double DisplayPipeLineDeliveryTimeChroma = 0;
5152 unsigned int k;
5153 double ReadBandwidth = 0.0;
5154
5155 //double DCFCLKDeepSleepPerPlane[DC__NUM_DPP__MAX];
5156 for (k = 0; k < NumberOfActivePlanes; ++k) {
5157
5158 if (VRatio[k] <= 1) {
5159 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5160 } else {
5161 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5162 }
5163 if (BytePerPixelC[k] == 0) {
5164 DisplayPipeLineDeliveryTimeChroma = 0;
5165 } else {
5166 if (VRatioChroma[k] <= 1) {
5167 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5168 } else {
5169 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5170 }
5171 }
5172
5173 if (BytePerPixelC[k] > 0) {
5174 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(1.1 * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 1.1 * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5175 } else {
5176 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = 1.1 * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5177 }
5178 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(mode_lib->vba.DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5179
5180 }
5181
5182 for (k = 0; k < NumberOfActivePlanes; ++k) {
5183 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5184 }
5185
5186 *DCFCLKDeepSleep = dml_max(8.0, ReadBandwidth / ReturnBusWidth);
5187
5188 for (k = 0; k < NumberOfActivePlanes; ++k) {
5189 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, mode_lib->vba.DCFCLKDeepSleepPerPlane[k]);
5190 }
5191 }
5192
CalculateUrgentBurstFactor(long swath_width_luma_ub,long swath_width_chroma_ub,unsigned int DETBufferSizeInKByte,unsigned int SwathHeightY,unsigned int SwathHeightC,double LineTime,double UrgentLatency,double CursorBufferSize,unsigned int CursorWidth,unsigned int CursorBPP,double VRatio,double VRatioC,double BytePerPixelInDETY,double BytePerPixelInDETC,double DETBufferSizeY,double DETBufferSizeC,double * UrgentBurstFactorCursor,double * UrgentBurstFactorLuma,double * UrgentBurstFactorChroma,bool * NotEnoughUrgentLatencyHiding)5193 static void CalculateUrgentBurstFactor(
5194 long swath_width_luma_ub,
5195 long swath_width_chroma_ub,
5196 unsigned int DETBufferSizeInKByte,
5197 unsigned int SwathHeightY,
5198 unsigned int SwathHeightC,
5199 double LineTime,
5200 double UrgentLatency,
5201 double CursorBufferSize,
5202 unsigned int CursorWidth,
5203 unsigned int CursorBPP,
5204 double VRatio,
5205 double VRatioC,
5206 double BytePerPixelInDETY,
5207 double BytePerPixelInDETC,
5208 double DETBufferSizeY,
5209 double DETBufferSizeC,
5210 double *UrgentBurstFactorCursor,
5211 double *UrgentBurstFactorLuma,
5212 double *UrgentBurstFactorChroma,
5213 bool *NotEnoughUrgentLatencyHiding)
5214 {
5215 double LinesInDETLuma = 0;
5216 double LinesInDETChroma = 0;
5217 unsigned int LinesInCursorBuffer = 0;
5218 double CursorBufferSizeInTime = 0;
5219 double DETBufferSizeInTimeLuma = 0;
5220 double DETBufferSizeInTimeChroma = 0;
5221
5222 *NotEnoughUrgentLatencyHiding = 0;
5223
5224 if (CursorWidth > 0) {
5225 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5226 if (VRatio > 0) {
5227 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5228 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5229 *NotEnoughUrgentLatencyHiding = 1;
5230 *UrgentBurstFactorCursor = 0;
5231 } else {
5232 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5233 }
5234 } else {
5235 *UrgentBurstFactorCursor = 1;
5236 }
5237 }
5238
5239 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5240 if (VRatio > 0) {
5241 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5242 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5243 *NotEnoughUrgentLatencyHiding = 1;
5244 *UrgentBurstFactorLuma = 0;
5245 } else {
5246 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5247 }
5248 } else {
5249 *UrgentBurstFactorLuma = 1;
5250 }
5251
5252 if (BytePerPixelInDETC > 0) {
5253 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5254 if (VRatio > 0) {
5255 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
5256 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5257 *NotEnoughUrgentLatencyHiding = 1;
5258 *UrgentBurstFactorChroma = 0;
5259 } else {
5260 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5261 }
5262 } else {
5263 *UrgentBurstFactorChroma = 1;
5264 }
5265 }
5266 }
5267
CalculatePixelDeliveryTimes(unsigned int NumberOfActivePlanes,double VRatio[],double VRatioChroma[],double VRatioPrefetchY[],double VRatioPrefetchC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],unsigned int DPPPerPlane[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double DPPCLK[],int BytePerPixelC[],enum scan_direction_class SourceScan[],unsigned int NumberOfCursors[],unsigned int CursorWidth[][2],unsigned int CursorBPP[][2],unsigned int BlockWidth256BytesY[],unsigned int BlockHeight256BytesY[],unsigned int BlockWidth256BytesC[],unsigned int BlockHeight256BytesC[],double DisplayPipeLineDeliveryTimeLuma[],double DisplayPipeLineDeliveryTimeChroma[],double DisplayPipeLineDeliveryTimeLumaPrefetch[],double DisplayPipeLineDeliveryTimeChromaPrefetch[],double DisplayPipeRequestDeliveryTimeLuma[],double DisplayPipeRequestDeliveryTimeChroma[],double DisplayPipeRequestDeliveryTimeLumaPrefetch[],double DisplayPipeRequestDeliveryTimeChromaPrefetch[],double CursorRequestDeliveryTime[],double CursorRequestDeliveryTimePrefetch[])5268 static void CalculatePixelDeliveryTimes(
5269 unsigned int NumberOfActivePlanes,
5270 double VRatio[],
5271 double VRatioChroma[],
5272 double VRatioPrefetchY[],
5273 double VRatioPrefetchC[],
5274 unsigned int swath_width_luma_ub[],
5275 unsigned int swath_width_chroma_ub[],
5276 unsigned int DPPPerPlane[],
5277 double HRatio[],
5278 double HRatioChroma[],
5279 double PixelClock[],
5280 double PSCL_THROUGHPUT[],
5281 double PSCL_THROUGHPUT_CHROMA[],
5282 double DPPCLK[],
5283 int BytePerPixelC[],
5284 enum scan_direction_class SourceScan[],
5285 unsigned int NumberOfCursors[],
5286 unsigned int CursorWidth[][2],
5287 unsigned int CursorBPP[][2],
5288 unsigned int BlockWidth256BytesY[],
5289 unsigned int BlockHeight256BytesY[],
5290 unsigned int BlockWidth256BytesC[],
5291 unsigned int BlockHeight256BytesC[],
5292 double DisplayPipeLineDeliveryTimeLuma[],
5293 double DisplayPipeLineDeliveryTimeChroma[],
5294 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
5295 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
5296 double DisplayPipeRequestDeliveryTimeLuma[],
5297 double DisplayPipeRequestDeliveryTimeChroma[],
5298 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
5299 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
5300 double CursorRequestDeliveryTime[],
5301 double CursorRequestDeliveryTimePrefetch[])
5302 {
5303 double req_per_swath_ub = 0;
5304 unsigned int k;
5305
5306 for (k = 0; k < NumberOfActivePlanes; ++k) {
5307 if (VRatio[k] <= 1) {
5308 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5309 } else {
5310 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5311 }
5312
5313 if (BytePerPixelC[k] == 0) {
5314 DisplayPipeLineDeliveryTimeChroma[k] = 0;
5315 } else {
5316 if (VRatioChroma[k] <= 1) {
5317 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5318 } else {
5319 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5320 }
5321 }
5322
5323 if (VRatioPrefetchY[k] <= 1) {
5324 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5325 } else {
5326 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5327 }
5328
5329 if (BytePerPixelC[k] == 0) {
5330 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
5331 } else {
5332 if (VRatioPrefetchC[k] <= 1) {
5333 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5334 } else {
5335 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5336 }
5337 }
5338 }
5339
5340 for (k = 0; k < NumberOfActivePlanes; ++k) {
5341 if (SourceScan[k] != dm_vert) {
5342 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
5343 } else {
5344 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
5345 }
5346 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
5347 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
5348 if (BytePerPixelC[k] == 0) {
5349 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
5350 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
5351 } else {
5352 if (SourceScan[k] != dm_vert) {
5353 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
5354 } else {
5355 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
5356 }
5357 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
5358 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
5359 }
5360 }
5361
5362 for (k = 0; k < NumberOfActivePlanes; ++k) {
5363 int cursor_req_per_width = 0;
5364 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
5365 if (NumberOfCursors[k] > 0) {
5366 if (VRatio[k] <= 1) {
5367 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
5368 } else {
5369 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
5370 }
5371 if (VRatioPrefetchY[k] <= 1) {
5372 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
5373 } else {
5374 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
5375 }
5376 } else {
5377 CursorRequestDeliveryTime[k] = 0;
5378 CursorRequestDeliveryTimePrefetch[k] = 0;
5379 }
5380 }
5381 }
5382
CalculateMetaAndPTETimes(int NumberOfActivePlanes,bool GPUVMEnable,int MetaChunkSize,int MinMetaChunkSizeBytes,int HTotal[],double VRatio[],double VRatioChroma[],double DestinationLinesToRequestRowInVBlank[],double DestinationLinesToRequestRowInImmediateFlip[],bool DCCEnable[],double PixelClock[],int BytePerPixelY[],int BytePerPixelC[],enum scan_direction_class SourceScan[],int dpte_row_height[],int dpte_row_height_chroma[],int meta_row_width[],int meta_row_width_chroma[],int meta_row_height[],int meta_row_height_chroma[],int meta_req_width[],int meta_req_width_chroma[],int meta_req_height[],int meta_req_height_chroma[],int dpte_group_bytes[],int PTERequestSizeY[],int PTERequestSizeC[],int PixelPTEReqWidthY[],int PixelPTEReqHeightY[],int PixelPTEReqWidthC[],int PixelPTEReqHeightC[],int dpte_row_width_luma_ub[],int dpte_row_width_chroma_ub[],double DST_Y_PER_PTE_ROW_NOM_L[],double DST_Y_PER_PTE_ROW_NOM_C[],double DST_Y_PER_META_ROW_NOM_L[],double DST_Y_PER_META_ROW_NOM_C[],double TimePerMetaChunkNominal[],double TimePerChromaMetaChunkNominal[],double TimePerMetaChunkVBlank[],double TimePerChromaMetaChunkVBlank[],double TimePerMetaChunkFlip[],double TimePerChromaMetaChunkFlip[],double time_per_pte_group_nom_luma[],double time_per_pte_group_vblank_luma[],double time_per_pte_group_flip_luma[],double time_per_pte_group_nom_chroma[],double time_per_pte_group_vblank_chroma[],double time_per_pte_group_flip_chroma[])5383 static void CalculateMetaAndPTETimes(
5384 int NumberOfActivePlanes,
5385 bool GPUVMEnable,
5386 int MetaChunkSize,
5387 int MinMetaChunkSizeBytes,
5388 int HTotal[],
5389 double VRatio[],
5390 double VRatioChroma[],
5391 double DestinationLinesToRequestRowInVBlank[],
5392 double DestinationLinesToRequestRowInImmediateFlip[],
5393 bool DCCEnable[],
5394 double PixelClock[],
5395 int BytePerPixelY[],
5396 int BytePerPixelC[],
5397 enum scan_direction_class SourceScan[],
5398 int dpte_row_height[],
5399 int dpte_row_height_chroma[],
5400 int meta_row_width[],
5401 int meta_row_width_chroma[],
5402 int meta_row_height[],
5403 int meta_row_height_chroma[],
5404 int meta_req_width[],
5405 int meta_req_width_chroma[],
5406 int meta_req_height[],
5407 int meta_req_height_chroma[],
5408 int dpte_group_bytes[],
5409 int PTERequestSizeY[],
5410 int PTERequestSizeC[],
5411 int PixelPTEReqWidthY[],
5412 int PixelPTEReqHeightY[],
5413 int PixelPTEReqWidthC[],
5414 int PixelPTEReqHeightC[],
5415 int dpte_row_width_luma_ub[],
5416 int dpte_row_width_chroma_ub[],
5417 double DST_Y_PER_PTE_ROW_NOM_L[],
5418 double DST_Y_PER_PTE_ROW_NOM_C[],
5419 double DST_Y_PER_META_ROW_NOM_L[],
5420 double DST_Y_PER_META_ROW_NOM_C[],
5421 double TimePerMetaChunkNominal[],
5422 double TimePerChromaMetaChunkNominal[],
5423 double TimePerMetaChunkVBlank[],
5424 double TimePerChromaMetaChunkVBlank[],
5425 double TimePerMetaChunkFlip[],
5426 double TimePerChromaMetaChunkFlip[],
5427 double time_per_pte_group_nom_luma[],
5428 double time_per_pte_group_vblank_luma[],
5429 double time_per_pte_group_flip_luma[],
5430 double time_per_pte_group_nom_chroma[],
5431 double time_per_pte_group_vblank_chroma[],
5432 double time_per_pte_group_flip_chroma[])
5433 {
5434 unsigned int meta_chunk_width = 0;
5435 unsigned int min_meta_chunk_width = 0;
5436 unsigned int meta_chunk_per_row_int = 0;
5437 unsigned int meta_row_remainder = 0;
5438 unsigned int meta_chunk_threshold = 0;
5439 unsigned int meta_chunks_per_row_ub = 0;
5440 unsigned int meta_chunk_width_chroma = 0;
5441 unsigned int min_meta_chunk_width_chroma = 0;
5442 unsigned int meta_chunk_per_row_int_chroma = 0;
5443 unsigned int meta_row_remainder_chroma = 0;
5444 unsigned int meta_chunk_threshold_chroma = 0;
5445 unsigned int meta_chunks_per_row_ub_chroma = 0;
5446 unsigned int dpte_group_width_luma = 0;
5447 unsigned int dpte_groups_per_row_luma_ub = 0;
5448 unsigned int dpte_group_width_chroma = 0;
5449 unsigned int dpte_groups_per_row_chroma_ub = 0;
5450 unsigned int k;
5451
5452 for (k = 0; k < NumberOfActivePlanes; ++k) {
5453 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
5454 if (BytePerPixelC[k] == 0) {
5455 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
5456 } else {
5457 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
5458 }
5459 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
5460 if (BytePerPixelC[k] == 0) {
5461 DST_Y_PER_META_ROW_NOM_C[k] = 0;
5462 } else {
5463 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
5464 }
5465 }
5466
5467 for (k = 0; k < NumberOfActivePlanes; ++k) {
5468 if (DCCEnable[k] == true) {
5469 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
5470 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
5471 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
5472 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
5473 if (SourceScan[k] != dm_vert) {
5474 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
5475 } else {
5476 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
5477 }
5478 if (meta_row_remainder <= meta_chunk_threshold) {
5479 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
5480 } else {
5481 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
5482 }
5483 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5484 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5485 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5486 if (BytePerPixelC[k] == 0) {
5487 TimePerChromaMetaChunkNominal[k] = 0;
5488 TimePerChromaMetaChunkVBlank[k] = 0;
5489 TimePerChromaMetaChunkFlip[k] = 0;
5490 } else {
5491 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
5492 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
5493 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
5494 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
5495 if (SourceScan[k] != dm_vert) {
5496 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
5497 } else {
5498 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
5499 }
5500 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
5501 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
5502 } else {
5503 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
5504 }
5505 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5506 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5507 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5508 }
5509 } else {
5510 TimePerMetaChunkNominal[k] = 0;
5511 TimePerMetaChunkVBlank[k] = 0;
5512 TimePerMetaChunkFlip[k] = 0;
5513 TimePerChromaMetaChunkNominal[k] = 0;
5514 TimePerChromaMetaChunkVBlank[k] = 0;
5515 TimePerChromaMetaChunkFlip[k] = 0;
5516 }
5517 }
5518
5519 for (k = 0; k < NumberOfActivePlanes; ++k) {
5520 if (GPUVMEnable == true) {
5521 if (SourceScan[k] != dm_vert) {
5522 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
5523 } else {
5524 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
5525 }
5526 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
5527 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5528 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5529 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5530 if (BytePerPixelC[k] == 0) {
5531 time_per_pte_group_nom_chroma[k] = 0;
5532 time_per_pte_group_vblank_chroma[k] = 0;
5533 time_per_pte_group_flip_chroma[k] = 0;
5534 } else {
5535 if (SourceScan[k] != dm_vert) {
5536 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5537 } else {
5538 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5539 }
5540 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
5541 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5542 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5543 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5544 }
5545 } else {
5546 time_per_pte_group_nom_luma[k] = 0;
5547 time_per_pte_group_vblank_luma[k] = 0;
5548 time_per_pte_group_flip_luma[k] = 0;
5549 time_per_pte_group_nom_chroma[k] = 0;
5550 time_per_pte_group_vblank_chroma[k] = 0;
5551 time_per_pte_group_flip_chroma[k] = 0;
5552 }
5553 }
5554 }
5555
CalculateVMGroupAndRequestTimes(unsigned int NumberOfActivePlanes,bool GPUVMEnable,unsigned int GPUVMMaxPageTableLevels,unsigned int HTotal[],int BytePerPixelC[],double DestinationLinesToRequestVMInVBlank[],double DestinationLinesToRequestVMInImmediateFlip[],bool DCCEnable[],double PixelClock[],int dpte_row_width_luma_ub[],int dpte_row_width_chroma_ub[],int vm_group_bytes[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],int meta_pte_bytes_per_frame_ub_l[],int meta_pte_bytes_per_frame_ub_c[],double TimePerVMGroupVBlank[],double TimePerVMGroupFlip[],double TimePerVMRequestVBlank[],double TimePerVMRequestFlip[])5556 static void CalculateVMGroupAndRequestTimes(
5557 unsigned int NumberOfActivePlanes,
5558 bool GPUVMEnable,
5559 unsigned int GPUVMMaxPageTableLevels,
5560 unsigned int HTotal[],
5561 int BytePerPixelC[],
5562 double DestinationLinesToRequestVMInVBlank[],
5563 double DestinationLinesToRequestVMInImmediateFlip[],
5564 bool DCCEnable[],
5565 double PixelClock[],
5566 int dpte_row_width_luma_ub[],
5567 int dpte_row_width_chroma_ub[],
5568 int vm_group_bytes[],
5569 unsigned int dpde0_bytes_per_frame_ub_l[],
5570 unsigned int dpde0_bytes_per_frame_ub_c[],
5571 int meta_pte_bytes_per_frame_ub_l[],
5572 int meta_pte_bytes_per_frame_ub_c[],
5573 double TimePerVMGroupVBlank[],
5574 double TimePerVMGroupFlip[],
5575 double TimePerVMRequestVBlank[],
5576 double TimePerVMRequestFlip[])
5577 {
5578 int num_group_per_lower_vm_stage = 0;
5579 int num_req_per_lower_vm_stage = 0;
5580 unsigned int k;
5581
5582 for (k = 0; k < NumberOfActivePlanes; ++k) {
5583 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5584 if (DCCEnable[k] == false) {
5585 if (BytePerPixelC[k] > 0) {
5586 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k])
5587 / (double) (vm_group_bytes[k]), 1) + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k])
5588 / (double) (vm_group_bytes[k]), 1);
5589 } else {
5590 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k])
5591 / (double) (vm_group_bytes[k]), 1);
5592 }
5593 } else {
5594 if (GPUVMMaxPageTableLevels == 1) {
5595 if (BytePerPixelC[k] > 0) {
5596 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k])
5597 / (double) (vm_group_bytes[k]), 1) + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k])
5598 / (double) (vm_group_bytes[k]), 1);
5599 } else {
5600 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k])
5601 / (double) (vm_group_bytes[k]), 1);
5602 }
5603 } else {
5604 if (BytePerPixelC[k] > 0) {
5605 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
5606 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
5607 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
5608 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
5609 } else {
5610 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
5611 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
5612 }
5613 }
5614 }
5615
5616 if (DCCEnable[k] == false) {
5617 if (BytePerPixelC[k] > 0) {
5618 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
5619 } else {
5620 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5621 }
5622 } else {
5623 if (GPUVMMaxPageTableLevels == 1) {
5624 if (BytePerPixelC[k] > 0) {
5625 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64
5626 + meta_pte_bytes_per_frame_ub_c[k] / 64;
5627 } else {
5628 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5629 }
5630 } else {
5631 if (BytePerPixelC[k] > 0) {
5632 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
5633 + dpde0_bytes_per_frame_ub_c[k] / 64 + meta_pte_bytes_per_frame_ub_l[k]
5634 / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
5635 } else {
5636 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
5637 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5638 }
5639 }
5640 }
5641
5642 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k]
5643 / num_group_per_lower_vm_stage;
5644 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k]
5645 / num_group_per_lower_vm_stage;
5646 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k]
5647 / num_req_per_lower_vm_stage;
5648 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k]
5649 / num_req_per_lower_vm_stage;
5650
5651 if (GPUVMMaxPageTableLevels > 2) {
5652 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
5653 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
5654 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
5655 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
5656 }
5657
5658 } else {
5659 TimePerVMGroupVBlank[k] = 0;
5660 TimePerVMGroupFlip[k] = 0;
5661 TimePerVMRequestVBlank[k] = 0;
5662 TimePerVMRequestFlip[k] = 0;
5663 }
5664 }
5665 }
5666
CalculateStutterEfficiency(int NumberOfActivePlanes,long ROBBufferSizeInKByte,double TotalDataReadBandwidth,double DCFCLK,double ReturnBW,double SRExitTime,bool SynchronizedVBlank,int DPPPerPlane[],unsigned int DETBufferSizeY[],int BytePerPixelY[],double BytePerPixelDETY[],double SwathWidthY[],int SwathHeightY[],int SwathHeightC[],double DCCRateLuma[],double DCCRateChroma[],int HTotal[],int VTotal[],double PixelClock[],double VRatio[],enum scan_direction_class SourceScan[],int BlockHeight256BytesY[],int BlockWidth256BytesY[],int BlockHeight256BytesC[],int BlockWidth256BytesC[],int DCCYMaxUncompressedBlock[],int DCCCMaxUncompressedBlock[],int VActive[],bool DCCEnable[],bool WritebackEnable[],double ReadBandwidthPlaneLuma[],double ReadBandwidthPlaneChroma[],double meta_row_bw[],double dpte_row_bw[],double * StutterEfficiencyNotIncludingVBlank,double * StutterEfficiency,double * StutterPeriodOut)5667 static void CalculateStutterEfficiency(
5668 int NumberOfActivePlanes,
5669 long ROBBufferSizeInKByte,
5670 double TotalDataReadBandwidth,
5671 double DCFCLK,
5672 double ReturnBW,
5673 double SRExitTime,
5674 bool SynchronizedVBlank,
5675 int DPPPerPlane[],
5676 unsigned int DETBufferSizeY[],
5677 int BytePerPixelY[],
5678 double BytePerPixelDETY[],
5679 double SwathWidthY[],
5680 int SwathHeightY[],
5681 int SwathHeightC[],
5682 double DCCRateLuma[],
5683 double DCCRateChroma[],
5684 int HTotal[],
5685 int VTotal[],
5686 double PixelClock[],
5687 double VRatio[],
5688 enum scan_direction_class SourceScan[],
5689 int BlockHeight256BytesY[],
5690 int BlockWidth256BytesY[],
5691 int BlockHeight256BytesC[],
5692 int BlockWidth256BytesC[],
5693 int DCCYMaxUncompressedBlock[],
5694 int DCCCMaxUncompressedBlock[],
5695 int VActive[],
5696 bool DCCEnable[],
5697 bool WritebackEnable[],
5698 double ReadBandwidthPlaneLuma[],
5699 double ReadBandwidthPlaneChroma[],
5700 double meta_row_bw[],
5701 double dpte_row_bw[],
5702 double *StutterEfficiencyNotIncludingVBlank,
5703 double *StutterEfficiency,
5704 double *StutterPeriodOut)
5705 {
5706 double FullDETBufferingTimeY[DC__NUM_DPP__MAX] = { 0 };
5707 double FrameTimeForMinFullDETBufferingTime = 0;
5708 double StutterPeriod = 0;
5709 double AverageReadBandwidth = 0;
5710 double TotalRowReadBandwidth = 0;
5711 double AverageDCCCompressionRate = 0;
5712 double PartOfBurstThatFitsInROB = 0;
5713 double StutterBurstTime = 0;
5714 int TotalActiveWriteback = 0;
5715 double VBlankTime = 0;
5716 double SmallestVBlank = 0;
5717 int BytePerPixelYCriticalPlane = 0;
5718 double SwathWidthYCriticalPlane = 0;
5719 double LinesInDETY[DC__NUM_DPP__MAX] = { 0 };
5720 double LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX] = { 0 };
5721 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
5722 double MaximumEffectiveCompressionLuma = 0;
5723 double MaximumEffectiveCompressionChroma = 0;
5724 unsigned int k;
5725
5726 for (k = 0; k < NumberOfActivePlanes; ++k) {
5727 LinesInDETY[k] = DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k];
5728 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5729 FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
5730 }
5731
5732 StutterPeriod = FullDETBufferingTimeY[0];
5733 FrameTimeForMinFullDETBufferingTime = VTotal[0] * HTotal[0] / PixelClock[0];
5734 BytePerPixelYCriticalPlane = BytePerPixelY[0];
5735 SwathWidthYCriticalPlane = SwathWidthY[0];
5736 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[0]
5737 - (LinesInDETY[0] - LinesInDETYRoundedDownToSwath[0]);
5738
5739 for (k = 0; k < NumberOfActivePlanes; ++k) {
5740 if (FullDETBufferingTimeY[k] < StutterPeriod) {
5741 StutterPeriod = FullDETBufferingTimeY[k];
5742 FrameTimeForMinFullDETBufferingTime = VTotal[k] * HTotal[k] / PixelClock[k];
5743 BytePerPixelYCriticalPlane = BytePerPixelY[k];
5744 SwathWidthYCriticalPlane = SwathWidthY[k];
5745 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k]
5746 - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k]);
5747 }
5748 }
5749
5750 AverageReadBandwidth = 0;
5751 TotalRowReadBandwidth = 0;
5752 for (k = 0; k < NumberOfActivePlanes; ++k) {
5753 if (DCCEnable[k] == true) {
5754 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k])
5755 || (SourceScan[k] != dm_vert
5756 && BlockHeight256BytesY[k] > SwathHeightY[k])
5757 || DCCYMaxUncompressedBlock[k] < 256) {
5758 MaximumEffectiveCompressionLuma = 2;
5759 } else {
5760 MaximumEffectiveCompressionLuma = 4;
5761 }
5762 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(DCCRateLuma[k], MaximumEffectiveCompressionLuma);
5763
5764 if (ReadBandwidthPlaneChroma[k] > 0) {
5765 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
5766 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k])
5767 || DCCCMaxUncompressedBlock[k] < 256) {
5768 MaximumEffectiveCompressionChroma = 2;
5769 } else {
5770 MaximumEffectiveCompressionChroma = 4;
5771 }
5772 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneChroma[k] / dml_min(DCCRateChroma[k], MaximumEffectiveCompressionChroma);
5773 }
5774 } else {
5775 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
5776 }
5777 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
5778 }
5779
5780 AverageDCCCompressionRate = TotalDataReadBandwidth / AverageReadBandwidth;
5781 PartOfBurstThatFitsInROB = dml_min(StutterPeriod * TotalDataReadBandwidth, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
5782 StutterBurstTime = PartOfBurstThatFitsInROB / AverageDCCCompressionRate / ReturnBW + (StutterPeriod * TotalDataReadBandwidth
5783 - PartOfBurstThatFitsInROB) / (DCFCLK * 64) + StutterPeriod * TotalRowReadBandwidth / ReturnBW;
5784 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
5785
5786 TotalActiveWriteback = 0;
5787 for (k = 0; k < NumberOfActivePlanes; ++k) {
5788 if (WritebackEnable[k] == true) {
5789 TotalActiveWriteback = TotalActiveWriteback + 1;
5790 }
5791 }
5792
5793 if (TotalActiveWriteback == 0) {
5794 *StutterEfficiencyNotIncludingVBlank = (1
5795 - (SRExitTime + StutterBurstTime) / StutterPeriod) * 100;
5796 } else {
5797 *StutterEfficiencyNotIncludingVBlank = 0;
5798 }
5799
5800 if (SynchronizedVBlank == true || NumberOfActivePlanes == 1) {
5801 SmallestVBlank = (VTotal[0] - VActive[0]) * HTotal[0] / PixelClock[0];
5802 } else {
5803 SmallestVBlank = 0;
5804 }
5805 for (k = 0; k < NumberOfActivePlanes; ++k) {
5806 if (SynchronizedVBlank == true || NumberOfActivePlanes == 1) {
5807 VBlankTime = (VTotal[k] - VActive[k]) * HTotal[k] / PixelClock[k];
5808 } else {
5809 VBlankTime = 0;
5810 }
5811 SmallestVBlank = dml_min(SmallestVBlank, VBlankTime);
5812 }
5813
5814 *StutterEfficiency = (*StutterEfficiencyNotIncludingVBlank / 100.0 * (FrameTimeForMinFullDETBufferingTime - SmallestVBlank) + SmallestVBlank) / FrameTimeForMinFullDETBufferingTime * 100;
5815
5816 if (StutterPeriodOut)
5817 *StutterPeriodOut = StutterPeriod;
5818 }
5819
CalculateSwathAndDETConfiguration(bool ForceSingleDPP,int NumberOfActivePlanes,unsigned int DETBufferSizeInKByte,double MaximumSwathWidthLuma[],double MaximumSwathWidthChroma[],enum scan_direction_class SourceScan[],enum source_format_class SourcePixelFormat[],enum dm_swizzle_mode SurfaceTiling[],int ViewportWidth[],int ViewportHeight[],int SurfaceWidthY[],int SurfaceWidthC[],int SurfaceHeightY[],int SurfaceHeightC[],int Read256BytesBlockHeightY[],int Read256BytesBlockHeightC[],int Read256BytesBlockWidthY[],int Read256BytesBlockWidthC[],enum odm_combine_mode ODMCombineEnabled[],int BlendingAndTiming[],int BytePerPixY[],int BytePerPixC[],double BytePerPixDETY[],double BytePerPixDETC[],int HActive[],double HRatio[],double HRatioChroma[],int DPPPerPlane[],int swath_width_luma_ub[],int swath_width_chroma_ub[],double SwathWidth[],double SwathWidthChroma[],int SwathHeightY[],int SwathHeightC[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],bool ViewportSizeSupportPerPlane[],bool * ViewportSizeSupport)5820 static void CalculateSwathAndDETConfiguration(
5821 bool ForceSingleDPP,
5822 int NumberOfActivePlanes,
5823 unsigned int DETBufferSizeInKByte,
5824 double MaximumSwathWidthLuma[],
5825 double MaximumSwathWidthChroma[],
5826 enum scan_direction_class SourceScan[],
5827 enum source_format_class SourcePixelFormat[],
5828 enum dm_swizzle_mode SurfaceTiling[],
5829 int ViewportWidth[],
5830 int ViewportHeight[],
5831 int SurfaceWidthY[],
5832 int SurfaceWidthC[],
5833 int SurfaceHeightY[],
5834 int SurfaceHeightC[],
5835 int Read256BytesBlockHeightY[],
5836 int Read256BytesBlockHeightC[],
5837 int Read256BytesBlockWidthY[],
5838 int Read256BytesBlockWidthC[],
5839 enum odm_combine_mode ODMCombineEnabled[],
5840 int BlendingAndTiming[],
5841 int BytePerPixY[],
5842 int BytePerPixC[],
5843 double BytePerPixDETY[],
5844 double BytePerPixDETC[],
5845 int HActive[],
5846 double HRatio[],
5847 double HRatioChroma[],
5848 int DPPPerPlane[],
5849 int swath_width_luma_ub[],
5850 int swath_width_chroma_ub[],
5851 double SwathWidth[],
5852 double SwathWidthChroma[],
5853 int SwathHeightY[],
5854 int SwathHeightC[],
5855 unsigned int DETBufferSizeY[],
5856 unsigned int DETBufferSizeC[],
5857 bool ViewportSizeSupportPerPlane[],
5858 bool *ViewportSizeSupport)
5859 {
5860 int MaximumSwathHeightY[DC__NUM_DPP__MAX] = { 0 };
5861 int MaximumSwathHeightC[DC__NUM_DPP__MAX] = { 0 };
5862 int MinimumSwathHeightY = 0;
5863 int MinimumSwathHeightC = 0;
5864 long RoundedUpMaxSwathSizeBytesY = 0;
5865 long RoundedUpMaxSwathSizeBytesC = 0;
5866 long RoundedUpMinSwathSizeBytesY = 0;
5867 long RoundedUpMinSwathSizeBytesC = 0;
5868 long RoundedUpSwathSizeBytesY = 0;
5869 long RoundedUpSwathSizeBytesC = 0;
5870 double SwathWidthSingleDPP[DC__NUM_DPP__MAX] = { 0 };
5871 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX] = { 0 };
5872 int k;
5873
5874 CalculateSwathWidth(
5875 ForceSingleDPP,
5876 NumberOfActivePlanes,
5877 SourcePixelFormat,
5878 SourceScan,
5879 ViewportWidth,
5880 ViewportHeight,
5881 SurfaceWidthY,
5882 SurfaceWidthC,
5883 SurfaceHeightY,
5884 SurfaceHeightC,
5885 ODMCombineEnabled,
5886 BytePerPixY,
5887 BytePerPixC,
5888 Read256BytesBlockHeightY,
5889 Read256BytesBlockHeightC,
5890 Read256BytesBlockWidthY,
5891 Read256BytesBlockWidthC,
5892 BlendingAndTiming,
5893 HActive,
5894 HRatio,
5895 DPPPerPlane,
5896 SwathWidthSingleDPP,
5897 SwathWidthSingleDPPChroma,
5898 SwathWidth,
5899 SwathWidthChroma,
5900 MaximumSwathHeightY,
5901 MaximumSwathHeightC,
5902 swath_width_luma_ub,
5903 swath_width_chroma_ub);
5904
5905 *ViewportSizeSupport = true;
5906 for (k = 0; k < NumberOfActivePlanes; ++k) {
5907 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32
5908 || SourcePixelFormat[k] == dm_444_16
5909 || SourcePixelFormat[k] == dm_mono_16
5910 || SourcePixelFormat[k] == dm_mono_8
5911 || SourcePixelFormat[k] == dm_rgbe)) {
5912 if (SurfaceTiling[k] == dm_sw_linear
5913 || (SourcePixelFormat[k] == dm_444_64
5914 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
5915 && SourceScan[k] != dm_vert)) {
5916 MinimumSwathHeightY = MaximumSwathHeightY[k];
5917 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
5918 MinimumSwathHeightY = MaximumSwathHeightY[k];
5919 } else {
5920 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
5921 }
5922 MinimumSwathHeightC = MaximumSwathHeightC[k];
5923 } else {
5924 if (SurfaceTiling[k] == dm_sw_linear) {
5925 MinimumSwathHeightY = MaximumSwathHeightY[k];
5926 MinimumSwathHeightC = MaximumSwathHeightC[k];
5927 } else if (SourcePixelFormat[k] == dm_rgbe_alpha
5928 && SourceScan[k] == dm_vert) {
5929 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
5930 MinimumSwathHeightC = MaximumSwathHeightC[k];
5931 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
5932 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
5933 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
5934 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
5935 MinimumSwathHeightY = MaximumSwathHeightY[k];
5936 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
5937 } else {
5938 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
5939 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
5940 }
5941 }
5942
5943 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k]
5944 * MaximumSwathHeightY[k];
5945 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k]
5946 * MinimumSwathHeightY;
5947 if (SourcePixelFormat[k] == dm_420_10) {
5948 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
5949 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
5950 }
5951 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k]
5952 * MaximumSwathHeightC[k];
5953 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k]
5954 * MinimumSwathHeightC;
5955 if (SourcePixelFormat[k] == dm_420_10) {
5956 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
5957 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
5958 }
5959
5960 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
5961 <= DETBufferSizeInKByte * 1024 / 2) {
5962 SwathHeightY[k] = MaximumSwathHeightY[k];
5963 SwathHeightC[k] = MaximumSwathHeightC[k];
5964 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
5965 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
5966 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
5967 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
5968 <= DETBufferSizeInKByte * 1024 / 2) {
5969 SwathHeightY[k] = MinimumSwathHeightY;
5970 SwathHeightC[k] = MaximumSwathHeightC[k];
5971 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
5972 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
5973 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
5974 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC
5975 <= DETBufferSizeInKByte * 1024 / 2) {
5976 SwathHeightY[k] = MaximumSwathHeightY[k];
5977 SwathHeightC[k] = MinimumSwathHeightC;
5978 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
5979 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
5980 } else {
5981 SwathHeightY[k] = MinimumSwathHeightY;
5982 SwathHeightC[k] = MinimumSwathHeightC;
5983 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
5984 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
5985 }
5986
5987 if (SwathHeightC[k] == 0) {
5988 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024;
5989 DETBufferSizeC[k] = 0;
5990 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
5991 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024 / 2;
5992 DETBufferSizeC[k] = DETBufferSizeInKByte * 1024 / 2;
5993 } else {
5994 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024 * 2 / 3;
5995 DETBufferSizeC[k] = DETBufferSizeInKByte * 1024 / 3;
5996 }
5997
5998 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC
5999 > DETBufferSizeInKByte * 1024 / 2
6000 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6001 || (SwathHeightC[k] > 0
6002 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6003 *ViewportSizeSupport = false;
6004 ViewportSizeSupportPerPlane[k] = false;
6005 } else {
6006 ViewportSizeSupportPerPlane[k] = true;
6007 }
6008 }
6009 }
6010
CalculateSwathWidth(bool ForceSingleDPP,int NumberOfActivePlanes,enum source_format_class SourcePixelFormat[],enum scan_direction_class SourceScan[],unsigned int ViewportWidth[],unsigned int ViewportHeight[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],enum odm_combine_mode ODMCombineEnabled[],int BytePerPixY[],int BytePerPixC[],int Read256BytesBlockHeightY[],int Read256BytesBlockHeightC[],int Read256BytesBlockWidthY[],int Read256BytesBlockWidthC[],int BlendingAndTiming[],unsigned int HActive[],double HRatio[],int DPPPerPlane[],double SwathWidthSingleDPPY[],double SwathWidthSingleDPPC[],double SwathWidthY[],double SwathWidthC[],int MaximumSwathHeightY[],int MaximumSwathHeightC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[])6011 static void CalculateSwathWidth(
6012 bool ForceSingleDPP,
6013 int NumberOfActivePlanes,
6014 enum source_format_class SourcePixelFormat[],
6015 enum scan_direction_class SourceScan[],
6016 unsigned int ViewportWidth[],
6017 unsigned int ViewportHeight[],
6018 unsigned int SurfaceWidthY[],
6019 unsigned int SurfaceWidthC[],
6020 unsigned int SurfaceHeightY[],
6021 unsigned int SurfaceHeightC[],
6022 enum odm_combine_mode ODMCombineEnabled[],
6023 int BytePerPixY[],
6024 int BytePerPixC[],
6025 int Read256BytesBlockHeightY[],
6026 int Read256BytesBlockHeightC[],
6027 int Read256BytesBlockWidthY[],
6028 int Read256BytesBlockWidthC[],
6029 int BlendingAndTiming[],
6030 unsigned int HActive[],
6031 double HRatio[],
6032 int DPPPerPlane[],
6033 double SwathWidthSingleDPPY[],
6034 double SwathWidthSingleDPPC[],
6035 double SwathWidthY[],
6036 double SwathWidthC[],
6037 int MaximumSwathHeightY[],
6038 int MaximumSwathHeightC[],
6039 unsigned int swath_width_luma_ub[],
6040 unsigned int swath_width_chroma_ub[])
6041 {
6042 unsigned int k, j;
6043 long surface_width_ub_l;
6044 long surface_height_ub_l;
6045 long surface_width_ub_c;
6046 long surface_height_ub_c;
6047
6048 for (k = 0; k < NumberOfActivePlanes; ++k) {
6049 enum odm_combine_mode MainPlaneODMCombine = 0;
6050
6051 if (SourceScan[k] != dm_vert) {
6052 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6053 } else {
6054 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6055 }
6056
6057 MainPlaneODMCombine = ODMCombineEnabled[k];
6058 for (j = 0; j < NumberOfActivePlanes; ++j) {
6059 if (BlendingAndTiming[k] == j) {
6060 MainPlaneODMCombine = ODMCombineEnabled[j];
6061 }
6062 }
6063
6064 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) {
6065 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
6066 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) {
6067 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
6068 } else if (DPPPerPlane[k] == 2) {
6069 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6070 } else {
6071 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6072 }
6073
6074 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6075 SwathWidthC[k] = SwathWidthY[k] / 2;
6076 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6077 } else {
6078 SwathWidthC[k] = SwathWidthY[k];
6079 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6080 }
6081
6082 if (ForceSingleDPP == true) {
6083 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6084 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6085 }
6086
6087 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6088 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6089
6090 if (SourceScan[k] != dm_vert) {
6091 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6092 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6093 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,
6094 Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6095 if (BytePerPixC[k] > 0) {
6096 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6097 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
6098 Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
6099 } else {
6100 swath_width_chroma_ub[k] = 0;
6101 }
6102 } else {
6103 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
6104 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
6105 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,
6106 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
6107 if (BytePerPixC[k] > 0) {
6108 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6109 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
6110 Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
6111 } else {
6112 swath_width_chroma_ub[k] = 0;
6113 }
6114 }
6115 }
6116 }
6117
CalculateExtraLatency(long RoundTripPingLatencyCycles,long ReorderingBytes,double DCFCLK,int TotalNumberOfActiveDPP,int PixelChunkSizeInKByte,int TotalNumberOfDCCActiveDPP,int MetaChunkSize,double ReturnBW,bool GPUVMEnable,bool HostVMEnable,int NumberOfActivePlanes,int NumberOfDPP[],int dpte_group_bytes[],double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,double HostVMMinPageSize,int HostVMMaxNonCachedPageTableLevels)6118 static double CalculateExtraLatency(
6119 long RoundTripPingLatencyCycles,
6120 long ReorderingBytes,
6121 double DCFCLK,
6122 int TotalNumberOfActiveDPP,
6123 int PixelChunkSizeInKByte,
6124 int TotalNumberOfDCCActiveDPP,
6125 int MetaChunkSize,
6126 double ReturnBW,
6127 bool GPUVMEnable,
6128 bool HostVMEnable,
6129 int NumberOfActivePlanes,
6130 int NumberOfDPP[],
6131 int dpte_group_bytes[],
6132 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6133 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6134 double HostVMMinPageSize,
6135 int HostVMMaxNonCachedPageTableLevels)
6136 {
6137 double ExtraLatencyBytes = 0;
6138 ExtraLatencyBytes = CalculateExtraLatencyBytes(
6139 ReorderingBytes,
6140 TotalNumberOfActiveDPP,
6141 PixelChunkSizeInKByte,
6142 TotalNumberOfDCCActiveDPP,
6143 MetaChunkSize,
6144 GPUVMEnable,
6145 HostVMEnable,
6146 NumberOfActivePlanes,
6147 NumberOfDPP,
6148 dpte_group_bytes,
6149 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6150 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6151 HostVMMinPageSize,
6152 HostVMMaxNonCachedPageTableLevels);
6153
6154 return (RoundTripPingLatencyCycles + 32) / DCFCLK + ExtraLatencyBytes / ReturnBW;
6155 }
6156
CalculateExtraLatencyBytes(long ReorderingBytes,int TotalNumberOfActiveDPP,int PixelChunkSizeInKByte,int TotalNumberOfDCCActiveDPP,int MetaChunkSize,bool GPUVMEnable,bool HostVMEnable,int NumberOfActivePlanes,int NumberOfDPP[],int dpte_group_bytes[],double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,double HostVMMinPageSize,int HostVMMaxNonCachedPageTableLevels)6157 static double CalculateExtraLatencyBytes(
6158 long ReorderingBytes,
6159 int TotalNumberOfActiveDPP,
6160 int PixelChunkSizeInKByte,
6161 int TotalNumberOfDCCActiveDPP,
6162 int MetaChunkSize,
6163 bool GPUVMEnable,
6164 bool HostVMEnable,
6165 int NumberOfActivePlanes,
6166 int NumberOfDPP[],
6167 int dpte_group_bytes[],
6168 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6169 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6170 double HostVMMinPageSize,
6171 int HostVMMaxNonCachedPageTableLevels)
6172 {
6173 double ret = 0;
6174 double HostVMInefficiencyFactor = 0;
6175 int HostVMDynamicLevels = 0;
6176 unsigned int k;
6177
6178 if (GPUVMEnable == true && HostVMEnable == true) {
6179 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
6180 if (HostVMMinPageSize < 2048) {
6181 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
6182 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
6183 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
6184 } else {
6185 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
6186 }
6187 } else {
6188 HostVMInefficiencyFactor = 1;
6189 HostVMDynamicLevels = 0;
6190 }
6191
6192 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
6193
6194 if (GPUVMEnable == true) {
6195 for (k = 0; k < NumberOfActivePlanes; ++k) {
6196 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
6197 }
6198 }
6199 return ret;
6200 }
6201
6202
CalculateUrgentLatency(double UrgentLatencyPixelDataOnly,double UrgentLatencyPixelMixedWithVMData,double UrgentLatencyVMDataOnly,bool DoUrgentLatencyAdjustment,double UrgentLatencyAdjustmentFabricClockComponent,double UrgentLatencyAdjustmentFabricClockReference,double FabricClock)6203 static double CalculateUrgentLatency(
6204 double UrgentLatencyPixelDataOnly,
6205 double UrgentLatencyPixelMixedWithVMData,
6206 double UrgentLatencyVMDataOnly,
6207 bool DoUrgentLatencyAdjustment,
6208 double UrgentLatencyAdjustmentFabricClockComponent,
6209 double UrgentLatencyAdjustmentFabricClockReference,
6210 double FabricClock)
6211 {
6212 double ret;
6213
6214 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
6215 if (DoUrgentLatencyAdjustment == true) {
6216 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
6217 }
6218 return ret;
6219 }
6220
UseMinimumDCFCLK(struct display_mode_lib * mode_lib,struct vba_vars_st * v,int MaxPrefetchMode,int ReorderingBytes)6221 static noinline_for_stack void UseMinimumDCFCLK(
6222 struct display_mode_lib *mode_lib,
6223 struct vba_vars_st *v,
6224 int MaxPrefetchMode,
6225 int ReorderingBytes)
6226 {
6227 double NormalEfficiency = 0;
6228 double PTEEfficiency = 0;
6229 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2] = { { 0 } };
6230 unsigned int i, j, k;
6231
6232 NormalEfficiency = (v->HostVMEnable == true ? v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
6233 : v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly) / 100.0;
6234 PTEEfficiency = (v->HostVMEnable == true ? v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly
6235 / v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData : 1.0);
6236 for (i = 0; i < mode_lib->soc.num_states; ++i) {
6237 for (j = 0; j <= 1; ++j) {
6238 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX] = { 0 };
6239 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX] = { 0 };
6240 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX] = { 0 };
6241 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX] = { 0 };
6242 double MinimumTWait = 0;
6243 double NonDPTEBandwidth = 0;
6244 double DPTEBandwidth = 0;
6245 double DCFCLKRequiredForAverageBandwidth = 0;
6246 double ExtraLatencyBytes = 0;
6247 double ExtraLatencyCycles = 0;
6248 double DCFCLKRequiredForPeakBandwidth = 0;
6249 int NoOfDPPState[DC__NUM_DPP__MAX] = { 0 };
6250 double MinimumTvmPlus2Tr0 = 0;
6251
6252 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
6253 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
6254 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
6255 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
6256 }
6257
6258 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
6259 NoOfDPPState[k] = v->NoOfDPP[i][j][k];
6260 }
6261
6262 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
6263 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
6264 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
6265 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
6266 DCFCLKRequiredForAverageBandwidth = dml_max3(v->ProjectedDCFCLKDeepSleep[i][j],
6267 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth / (v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
6268 (NonDPTEBandwidth + DPTEBandwidth / PTEEfficiency) / NormalEfficiency / v->ReturnBusWidth);
6269
6270 ExtraLatencyBytes = CalculateExtraLatencyBytes(ReorderingBytes, v->TotalNumberOfActiveDPP[i][j], v->PixelChunkSizeInKByte, v->TotalNumberOfDCCActiveDPP[i][j],
6271 v->MetaChunkSize, v->GPUVMEnable, v->HostVMEnable, v->NumberOfActivePlanes, NoOfDPPState, v->dpte_group_bytes,
6272 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6273 v->HostVMMinPageSize, v->HostVMMaxNonCachedPageTableLevels);
6274 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + 32 + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
6275 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
6276 double DCFCLKCyclesRequiredInPrefetch = { 0 };
6277 double ExpectedPrefetchBWAcceleration = { 0 };
6278 double PrefetchTime = { 0 };
6279
6280 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
6281 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
6282 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / PTEEfficiency
6283 / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0) + 2 * v->DPTEBytesPerRow[i][j][k] / PTEEfficiency
6284 / NormalEfficiency / v->ReturnBusWidth + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
6285 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
6286 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k]) / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
6287 DynamicMetadataVMExtraLatency[k] = (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
6288 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
6289 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - v->UrgLatency[i] * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels
6290 : v->GPUVMMaxPageTableLevels - 2) * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - DynamicMetadataVMExtraLatency[k];
6291
6292 if (PrefetchTime > 0) {
6293 double ExpectedVRatioPrefetch = { 0 };
6294 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
6295 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
6296 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
6297 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
6298 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
6299 + NoOfDPPState[k] * DPTEBandwidth / PTEEfficiency / NormalEfficiency / v->ReturnBusWidth;
6300 }
6301 } else {
6302 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
6303 }
6304 if (v->DynamicMetadataEnable[k] == true) {
6305 double TsetupPipe = { 0 };
6306 double TdmbfPipe = { 0 };
6307 double TdmsksPipe = { 0 };
6308 double TdmecPipe = { 0 };
6309 double AllowedTimeForUrgentExtraLatency = { 0 };
6310
6311 CalculateDynamicMetadataParameters(
6312 v->MaxInterDCNTileRepeaters,
6313 v->RequiredDPPCLK[i][j][k],
6314 v->RequiredDISPCLK[i][j],
6315 v->ProjectedDCFCLKDeepSleep[i][j],
6316 v->PixelClock[k],
6317 v->HTotal[k],
6318 v->VTotal[k] - v->VActive[k],
6319 v->DynamicMetadataTransmittedBytes[k],
6320 v->DynamicMetadataLinesBeforeActiveRequired[k],
6321 v->Interlace[k],
6322 v->ProgressiveToInterlaceUnitInOPP,
6323 &TsetupPipe,
6324 &TdmbfPipe,
6325 &TdmecPipe,
6326 &TdmsksPipe);
6327 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TsetupPipe
6328 - TdmbfPipe - TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
6329 if (AllowedTimeForUrgentExtraLatency > 0) {
6330 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(DCFCLKRequiredForPeakBandwidthPerPlane[k],
6331 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
6332 } else {
6333 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
6334 }
6335 }
6336 }
6337 DCFCLKRequiredForPeakBandwidth = 0;
6338 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
6339 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
6340 }
6341 MinimumTvmPlus2Tr0 = v->UrgLatency[i] * (v->GPUVMEnable == true ? (v->HostVMEnable == true ?
6342 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) : 0);
6343 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
6344 double MaximumTvmPlus2Tr0PlusTsw = { 0 };
6345 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
6346 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
6347 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
6348 } else {
6349 DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth, 2 * ExtraLatencyCycles
6350 / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
6351 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
6352 }
6353 }
6354 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * (1 + mode_lib->vba.PercentMarginOverMinimumRequiredDCFCLK / 100)
6355 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
6356 }
6357 }
6358 }
6359
6360