1 /*
2 * Copyright 2017 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25
26 #include "dc.h"
27 #include "../display_mode_lib.h"
28 #include "../dcn30/display_mode_vba_30.h"
29 #include "display_mode_vba_31.h"
30 #include "../dml_inline_defs.h"
31
32 /*
33 * NOTE:
34 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
35 *
36 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
37 * ways. Unless there is something clearly wrong with it the code should
38 * remain as-is as it provides us with a guarantee from HW that it is correct.
39 */
40
41 #define BPP_INVALID 0
42 #define BPP_BLENDED_PIPE 0xffffffff
43 #define DCN31_MAX_DSC_IMAGE_WIDTH 5184
44 #define DCN31_MAX_FMT_420_BUFFER_WIDTH 4096
45 #define DCN3_15_MIN_COMPBUF_SIZE_KB 128
46 #define DCN3_15_MAX_DET_SIZE 384
47
48 // For DML-C changes that hasn't been propagated to VBA yet
49 //#define __DML_VBA_ALLOW_DELTA__
50
51 // Move these to ip paramaters/constant
52
53 // At which vstartup the DML start to try if the mode can be supported
54 #define __DML_VBA_MIN_VSTARTUP__ 9
55
56 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
57 #define __DML_ARB_TO_RET_DELAY__ (7 + 95)
58
59 // fudge factor for min dcfclk calclation
60 #define __DML_MIN_DCFCLK_FACTOR__ 1.15
61
62 typedef struct {
63 double DPPCLK;
64 double DISPCLK;
65 double PixelClock;
66 double DCFCLKDeepSleep;
67 unsigned int DPPPerPlane;
68 bool ScalerEnabled;
69 double VRatio;
70 double VRatioChroma;
71 enum scan_direction_class SourceScan;
72 unsigned int BlockWidth256BytesY;
73 unsigned int BlockHeight256BytesY;
74 unsigned int BlockWidth256BytesC;
75 unsigned int BlockHeight256BytesC;
76 unsigned int InterlaceEnable;
77 unsigned int NumberOfCursors;
78 unsigned int VBlank;
79 unsigned int HTotal;
80 unsigned int DCCEnable;
81 bool ODMCombineIsEnabled;
82 enum source_format_class SourcePixelFormat;
83 int BytePerPixelY;
84 int BytePerPixelC;
85 bool ProgressiveToInterlaceUnitInOPP;
86 } Pipe;
87
88 #define BPP_INVALID 0
89 #define BPP_BLENDED_PIPE 0xffffffff
90
91 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
92 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
93 static unsigned int dscceComputeDelay(
94 unsigned int bpc,
95 double BPP,
96 unsigned int sliceWidth,
97 unsigned int numSlices,
98 enum output_format_class pixelFormat,
99 enum output_encoder_class Output);
100 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
101 static bool CalculatePrefetchSchedule(
102 struct display_mode_lib *mode_lib,
103 double HostVMInefficiencyFactor,
104 Pipe *myPipe,
105 unsigned int DSCDelay,
106 double DPPCLKDelaySubtotalPlusCNVCFormater,
107 double DPPCLKDelaySCL,
108 double DPPCLKDelaySCLLBOnly,
109 double DPPCLKDelayCNVCCursor,
110 double DISPCLKDelaySubtotal,
111 unsigned int DPP_RECOUT_WIDTH,
112 enum output_format_class OutputFormat,
113 unsigned int MaxInterDCNTileRepeaters,
114 unsigned int VStartup,
115 unsigned int MaxVStartup,
116 unsigned int GPUVMPageTableLevels,
117 bool GPUVMEnable,
118 bool HostVMEnable,
119 unsigned int HostVMMaxNonCachedPageTableLevels,
120 double HostVMMinPageSize,
121 bool DynamicMetadataEnable,
122 bool DynamicMetadataVMEnabled,
123 int DynamicMetadataLinesBeforeActiveRequired,
124 unsigned int DynamicMetadataTransmittedBytes,
125 double UrgentLatency,
126 double UrgentExtraLatency,
127 double TCalc,
128 unsigned int PDEAndMetaPTEBytesFrame,
129 unsigned int MetaRowByte,
130 unsigned int PixelPTEBytesPerRow,
131 double PrefetchSourceLinesY,
132 unsigned int SwathWidthY,
133 double VInitPreFillY,
134 unsigned int MaxNumSwathY,
135 double PrefetchSourceLinesC,
136 unsigned int SwathWidthC,
137 double VInitPreFillC,
138 unsigned int MaxNumSwathC,
139 int swath_width_luma_ub,
140 int swath_width_chroma_ub,
141 unsigned int SwathHeightY,
142 unsigned int SwathHeightC,
143 double TWait,
144 double *DSTXAfterScaler,
145 double *DSTYAfterScaler,
146 double *DestinationLinesForPrefetch,
147 double *PrefetchBandwidth,
148 double *DestinationLinesToRequestVMInVBlank,
149 double *DestinationLinesToRequestRowInVBlank,
150 double *VRatioPrefetchY,
151 double *VRatioPrefetchC,
152 double *RequiredPrefetchPixDataBWLuma,
153 double *RequiredPrefetchPixDataBWChroma,
154 bool *NotEnoughTimeForDynamicMetadata,
155 double *Tno_bw,
156 double *prefetch_vmrow_bw,
157 double *Tdmdl_vm,
158 double *Tdmdl,
159 double *TSetup,
160 int *VUpdateOffsetPix,
161 double *VUpdateWidthPix,
162 double *VReadyOffsetPix);
163 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
164 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
165 static void CalculateDCCConfiguration(
166 bool DCCEnabled,
167 bool DCCProgrammingAssumesScanDirectionUnknown,
168 enum source_format_class SourcePixelFormat,
169 unsigned int SurfaceWidthLuma,
170 unsigned int SurfaceWidthChroma,
171 unsigned int SurfaceHeightLuma,
172 unsigned int SurfaceHeightChroma,
173 double DETBufferSize,
174 unsigned int RequestHeight256ByteLuma,
175 unsigned int RequestHeight256ByteChroma,
176 enum dm_swizzle_mode TilingFormat,
177 unsigned int BytePerPixelY,
178 unsigned int BytePerPixelC,
179 double BytePerPixelDETY,
180 double BytePerPixelDETC,
181 enum scan_direction_class ScanOrientation,
182 unsigned int *MaxUncompressedBlockLuma,
183 unsigned int *MaxUncompressedBlockChroma,
184 unsigned int *MaxCompressedBlockLuma,
185 unsigned int *MaxCompressedBlockChroma,
186 unsigned int *IndependentBlockLuma,
187 unsigned int *IndependentBlockChroma);
188 static double CalculatePrefetchSourceLines(
189 struct display_mode_lib *mode_lib,
190 double VRatio,
191 double vtaps,
192 bool Interlace,
193 bool ProgressiveToInterlaceUnitInOPP,
194 unsigned int SwathHeight,
195 unsigned int ViewportYStart,
196 double *VInitPreFill,
197 unsigned int *MaxNumSwath);
198 static unsigned int CalculateVMAndRowBytes(
199 struct display_mode_lib *mode_lib,
200 bool DCCEnable,
201 unsigned int BlockHeight256Bytes,
202 unsigned int BlockWidth256Bytes,
203 enum source_format_class SourcePixelFormat,
204 unsigned int SurfaceTiling,
205 unsigned int BytePerPixel,
206 enum scan_direction_class ScanDirection,
207 unsigned int SwathWidth,
208 unsigned int ViewportHeight,
209 bool GPUVMEnable,
210 bool HostVMEnable,
211 unsigned int HostVMMaxNonCachedPageTableLevels,
212 unsigned int GPUVMMinPageSize,
213 unsigned int HostVMMinPageSize,
214 unsigned int PTEBufferSizeInRequests,
215 unsigned int Pitch,
216 unsigned int DCCMetaPitch,
217 unsigned int *MacroTileWidth,
218 unsigned int *MetaRowByte,
219 unsigned int *PixelPTEBytesPerRow,
220 bool *PTEBufferSizeNotExceeded,
221 int *dpte_row_width_ub,
222 unsigned int *dpte_row_height,
223 unsigned int *MetaRequestWidth,
224 unsigned int *MetaRequestHeight,
225 unsigned int *meta_row_width,
226 unsigned int *meta_row_height,
227 int *vm_group_bytes,
228 unsigned int *dpte_group_bytes,
229 unsigned int *PixelPTEReqWidth,
230 unsigned int *PixelPTEReqHeight,
231 unsigned int *PTERequestSize,
232 int *DPDE0BytesFrame,
233 int *MetaPTEBytesFrame);
234 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
235 static void CalculateRowBandwidth(
236 bool GPUVMEnable,
237 enum source_format_class SourcePixelFormat,
238 double VRatio,
239 double VRatioChroma,
240 bool DCCEnable,
241 double LineTime,
242 unsigned int MetaRowByteLuma,
243 unsigned int MetaRowByteChroma,
244 unsigned int meta_row_height_luma,
245 unsigned int meta_row_height_chroma,
246 unsigned int PixelPTEBytesPerRowLuma,
247 unsigned int PixelPTEBytesPerRowChroma,
248 unsigned int dpte_row_height_luma,
249 unsigned int dpte_row_height_chroma,
250 double *meta_row_bw,
251 double *dpte_row_bw);
252
253 static void CalculateFlipSchedule(
254 struct display_mode_lib *mode_lib,
255 unsigned int k,
256 double HostVMInefficiencyFactor,
257 double UrgentExtraLatency,
258 double UrgentLatency,
259 double PDEAndMetaPTEBytesPerFrame,
260 double MetaRowBytes,
261 double DPTEBytesPerRow);
262 static double CalculateWriteBackDelay(
263 enum source_format_class WritebackPixelFormat,
264 double WritebackHRatio,
265 double WritebackVRatio,
266 unsigned int WritebackVTaps,
267 int WritebackDestinationWidth,
268 int WritebackDestinationHeight,
269 int WritebackSourceHeight,
270 unsigned int HTotal);
271
272 static void CalculateVupdateAndDynamicMetadataParameters(
273 int MaxInterDCNTileRepeaters,
274 double DPPCLK,
275 double DISPCLK,
276 double DCFClkDeepSleep,
277 double PixelClock,
278 int HTotal,
279 int VBlank,
280 int DynamicMetadataTransmittedBytes,
281 int DynamicMetadataLinesBeforeActiveRequired,
282 int InterlaceEnable,
283 bool ProgressiveToInterlaceUnitInOPP,
284 double *TSetup,
285 double *Tdmbf,
286 double *Tdmec,
287 double *Tdmsks,
288 int *VUpdateOffsetPix,
289 double *VUpdateWidthPix,
290 double *VReadyOffsetPix);
291
292 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
293 struct display_mode_lib *mode_lib,
294 unsigned int PrefetchMode,
295 double DCFCLK,
296 double ReturnBW,
297 double UrgentLatency,
298 double ExtraLatency,
299 double SOCCLK,
300 double DCFCLKDeepSleep,
301 unsigned int DETBufferSizeY[],
302 unsigned int DETBufferSizeC[],
303 unsigned int SwathHeightY[],
304 unsigned int SwathHeightC[],
305 double SwathWidthY[],
306 double SwathWidthC[],
307 unsigned int DPPPerPlane[],
308 double BytePerPixelDETY[],
309 double BytePerPixelDETC[],
310 bool UnboundedRequestEnabled,
311 int unsigned CompressedBufferSizeInkByte,
312 enum clock_change_support *DRAMClockChangeSupport,
313 double *StutterExitWatermark,
314 double *StutterEnterPlusExitWatermark,
315 double *Z8StutterExitWatermark,
316 double *Z8StutterEnterPlusExitWatermark);
317
318 static void CalculateDCFCLKDeepSleep(
319 struct display_mode_lib *mode_lib,
320 unsigned int NumberOfActivePlanes,
321 int BytePerPixelY[],
322 int BytePerPixelC[],
323 double VRatio[],
324 double VRatioChroma[],
325 double SwathWidthY[],
326 double SwathWidthC[],
327 unsigned int DPPPerPlane[],
328 double HRatio[],
329 double HRatioChroma[],
330 double PixelClock[],
331 double PSCL_THROUGHPUT[],
332 double PSCL_THROUGHPUT_CHROMA[],
333 double DPPCLK[],
334 double ReadBandwidthLuma[],
335 double ReadBandwidthChroma[],
336 int ReturnBusWidth,
337 double *DCFCLKDeepSleep);
338
339 static void CalculateUrgentBurstFactor(
340 int swath_width_luma_ub,
341 int swath_width_chroma_ub,
342 unsigned int SwathHeightY,
343 unsigned int SwathHeightC,
344 double LineTime,
345 double UrgentLatency,
346 double CursorBufferSize,
347 unsigned int CursorWidth,
348 unsigned int CursorBPP,
349 double VRatio,
350 double VRatioC,
351 double BytePerPixelInDETY,
352 double BytePerPixelInDETC,
353 double DETBufferSizeY,
354 double DETBufferSizeC,
355 double *UrgentBurstFactorCursor,
356 double *UrgentBurstFactorLuma,
357 double *UrgentBurstFactorChroma,
358 bool *NotEnoughUrgentLatencyHiding);
359
360 static void UseMinimumDCFCLK(
361 struct display_mode_lib *mode_lib,
362 int MaxPrefetchMode,
363 int ReorderingBytes);
364
365 static void CalculatePixelDeliveryTimes(
366 unsigned int NumberOfActivePlanes,
367 double VRatio[],
368 double VRatioChroma[],
369 double VRatioPrefetchY[],
370 double VRatioPrefetchC[],
371 unsigned int swath_width_luma_ub[],
372 unsigned int swath_width_chroma_ub[],
373 unsigned int DPPPerPlane[],
374 double HRatio[],
375 double HRatioChroma[],
376 double PixelClock[],
377 double PSCL_THROUGHPUT[],
378 double PSCL_THROUGHPUT_CHROMA[],
379 double DPPCLK[],
380 int BytePerPixelC[],
381 enum scan_direction_class SourceScan[],
382 unsigned int NumberOfCursors[],
383 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
384 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
385 unsigned int BlockWidth256BytesY[],
386 unsigned int BlockHeight256BytesY[],
387 unsigned int BlockWidth256BytesC[],
388 unsigned int BlockHeight256BytesC[],
389 double DisplayPipeLineDeliveryTimeLuma[],
390 double DisplayPipeLineDeliveryTimeChroma[],
391 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
392 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
393 double DisplayPipeRequestDeliveryTimeLuma[],
394 double DisplayPipeRequestDeliveryTimeChroma[],
395 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
396 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
397 double CursorRequestDeliveryTime[],
398 double CursorRequestDeliveryTimePrefetch[]);
399
400 static void CalculateMetaAndPTETimes(
401 int NumberOfActivePlanes,
402 bool GPUVMEnable,
403 int MetaChunkSize,
404 int MinMetaChunkSizeBytes,
405 int HTotal[],
406 double VRatio[],
407 double VRatioChroma[],
408 double DestinationLinesToRequestRowInVBlank[],
409 double DestinationLinesToRequestRowInImmediateFlip[],
410 bool DCCEnable[],
411 double PixelClock[],
412 int BytePerPixelY[],
413 int BytePerPixelC[],
414 enum scan_direction_class SourceScan[],
415 int dpte_row_height[],
416 int dpte_row_height_chroma[],
417 int meta_row_width[],
418 int meta_row_width_chroma[],
419 int meta_row_height[],
420 int meta_row_height_chroma[],
421 int meta_req_width[],
422 int meta_req_width_chroma[],
423 int meta_req_height[],
424 int meta_req_height_chroma[],
425 int dpte_group_bytes[],
426 int PTERequestSizeY[],
427 int PTERequestSizeC[],
428 int PixelPTEReqWidthY[],
429 int PixelPTEReqHeightY[],
430 int PixelPTEReqWidthC[],
431 int PixelPTEReqHeightC[],
432 int dpte_row_width_luma_ub[],
433 int dpte_row_width_chroma_ub[],
434 double DST_Y_PER_PTE_ROW_NOM_L[],
435 double DST_Y_PER_PTE_ROW_NOM_C[],
436 double DST_Y_PER_META_ROW_NOM_L[],
437 double DST_Y_PER_META_ROW_NOM_C[],
438 double TimePerMetaChunkNominal[],
439 double TimePerChromaMetaChunkNominal[],
440 double TimePerMetaChunkVBlank[],
441 double TimePerChromaMetaChunkVBlank[],
442 double TimePerMetaChunkFlip[],
443 double TimePerChromaMetaChunkFlip[],
444 double time_per_pte_group_nom_luma[],
445 double time_per_pte_group_vblank_luma[],
446 double time_per_pte_group_flip_luma[],
447 double time_per_pte_group_nom_chroma[],
448 double time_per_pte_group_vblank_chroma[],
449 double time_per_pte_group_flip_chroma[]);
450
451 static void CalculateVMGroupAndRequestTimes(
452 unsigned int NumberOfActivePlanes,
453 bool GPUVMEnable,
454 unsigned int GPUVMMaxPageTableLevels,
455 unsigned int HTotal[],
456 int BytePerPixelC[],
457 double DestinationLinesToRequestVMInVBlank[],
458 double DestinationLinesToRequestVMInImmediateFlip[],
459 bool DCCEnable[],
460 double PixelClock[],
461 int dpte_row_width_luma_ub[],
462 int dpte_row_width_chroma_ub[],
463 int vm_group_bytes[],
464 unsigned int dpde0_bytes_per_frame_ub_l[],
465 unsigned int dpde0_bytes_per_frame_ub_c[],
466 int meta_pte_bytes_per_frame_ub_l[],
467 int meta_pte_bytes_per_frame_ub_c[],
468 double TimePerVMGroupVBlank[],
469 double TimePerVMGroupFlip[],
470 double TimePerVMRequestVBlank[],
471 double TimePerVMRequestFlip[]);
472
473 static void CalculateStutterEfficiency(
474 struct display_mode_lib *mode_lib,
475 int CompressedBufferSizeInkByte,
476 bool UnboundedRequestEnabled,
477 int ConfigReturnBufferSizeInKByte,
478 int MetaFIFOSizeInKEntries,
479 int ZeroSizeBufferEntries,
480 int NumberOfActivePlanes,
481 int ROBBufferSizeInKByte,
482 double TotalDataReadBandwidth,
483 double DCFCLK,
484 double ReturnBW,
485 double COMPBUF_RESERVED_SPACE_64B,
486 double COMPBUF_RESERVED_SPACE_ZS,
487 double SRExitTime,
488 double SRExitZ8Time,
489 bool SynchronizedVBlank,
490 double Z8StutterEnterPlusExitWatermark,
491 double StutterEnterPlusExitWatermark,
492 bool ProgressiveToInterlaceUnitInOPP,
493 bool Interlace[],
494 double MinTTUVBlank[],
495 int DPPPerPlane[],
496 unsigned int DETBufferSizeY[],
497 int BytePerPixelY[],
498 double BytePerPixelDETY[],
499 double SwathWidthY[],
500 int SwathHeightY[],
501 int SwathHeightC[],
502 double NetDCCRateLuma[],
503 double NetDCCRateChroma[],
504 double DCCFractionOfZeroSizeRequestsLuma[],
505 double DCCFractionOfZeroSizeRequestsChroma[],
506 int HTotal[],
507 int VTotal[],
508 double PixelClock[],
509 double VRatio[],
510 enum scan_direction_class SourceScan[],
511 int BlockHeight256BytesY[],
512 int BlockWidth256BytesY[],
513 int BlockHeight256BytesC[],
514 int BlockWidth256BytesC[],
515 int DCCYMaxUncompressedBlock[],
516 int DCCCMaxUncompressedBlock[],
517 int VActive[],
518 bool DCCEnable[],
519 bool WritebackEnable[],
520 double ReadBandwidthPlaneLuma[],
521 double ReadBandwidthPlaneChroma[],
522 double meta_row_bw[],
523 double dpte_row_bw[],
524 double *StutterEfficiencyNotIncludingVBlank,
525 double *StutterEfficiency,
526 int *NumberOfStutterBurstsPerFrame,
527 double *Z8StutterEfficiencyNotIncludingVBlank,
528 double *Z8StutterEfficiency,
529 int *Z8NumberOfStutterBurstsPerFrame,
530 double *StutterPeriod);
531
532 static void CalculateSwathAndDETConfiguration(
533 bool ForceSingleDPP,
534 int NumberOfActivePlanes,
535 bool DETSharedByAllDPP,
536 unsigned int DETBufferSizeInKByte[],
537 double MaximumSwathWidthLuma[],
538 double MaximumSwathWidthChroma[],
539 enum scan_direction_class SourceScan[],
540 enum source_format_class SourcePixelFormat[],
541 enum dm_swizzle_mode SurfaceTiling[],
542 int ViewportWidth[],
543 int ViewportHeight[],
544 int SurfaceWidthY[],
545 int SurfaceWidthC[],
546 int SurfaceHeightY[],
547 int SurfaceHeightC[],
548 int Read256BytesBlockHeightY[],
549 int Read256BytesBlockHeightC[],
550 int Read256BytesBlockWidthY[],
551 int Read256BytesBlockWidthC[],
552 enum odm_combine_mode ODMCombineEnabled[],
553 int BlendingAndTiming[],
554 int BytePerPixY[],
555 int BytePerPixC[],
556 double BytePerPixDETY[],
557 double BytePerPixDETC[],
558 int HActive[],
559 double HRatio[],
560 double HRatioChroma[],
561 int DPPPerPlane[],
562 int swath_width_luma_ub[],
563 int swath_width_chroma_ub[],
564 double SwathWidth[],
565 double SwathWidthChroma[],
566 int SwathHeightY[],
567 int SwathHeightC[],
568 unsigned int DETBufferSizeY[],
569 unsigned int DETBufferSizeC[],
570 bool ViewportSizeSupportPerPlane[],
571 bool *ViewportSizeSupport);
572 static void CalculateSwathWidth(
573 bool ForceSingleDPP,
574 int NumberOfActivePlanes,
575 enum source_format_class SourcePixelFormat[],
576 enum scan_direction_class SourceScan[],
577 int ViewportWidth[],
578 int ViewportHeight[],
579 int SurfaceWidthY[],
580 int SurfaceWidthC[],
581 int SurfaceHeightY[],
582 int SurfaceHeightC[],
583 enum odm_combine_mode ODMCombineEnabled[],
584 int BytePerPixY[],
585 int BytePerPixC[],
586 int Read256BytesBlockHeightY[],
587 int Read256BytesBlockHeightC[],
588 int Read256BytesBlockWidthY[],
589 int Read256BytesBlockWidthC[],
590 int BlendingAndTiming[],
591 int HActive[],
592 double HRatio[],
593 int DPPPerPlane[],
594 double SwathWidthSingleDPPY[],
595 double SwathWidthSingleDPPC[],
596 double SwathWidthY[],
597 double SwathWidthC[],
598 int MaximumSwathHeightY[],
599 int MaximumSwathHeightC[],
600 int swath_width_luma_ub[],
601 int swath_width_chroma_ub[]);
602
603 static double CalculateExtraLatency(
604 int RoundTripPingLatencyCycles,
605 int ReorderingBytes,
606 double DCFCLK,
607 int TotalNumberOfActiveDPP,
608 int PixelChunkSizeInKByte,
609 int TotalNumberOfDCCActiveDPP,
610 int MetaChunkSize,
611 double ReturnBW,
612 bool GPUVMEnable,
613 bool HostVMEnable,
614 int NumberOfActivePlanes,
615 int NumberOfDPP[],
616 int dpte_group_bytes[],
617 double HostVMInefficiencyFactor,
618 double HostVMMinPageSize,
619 int HostVMMaxNonCachedPageTableLevels);
620
621 static double CalculateExtraLatencyBytes(
622 int ReorderingBytes,
623 int TotalNumberOfActiveDPP,
624 int PixelChunkSizeInKByte,
625 int TotalNumberOfDCCActiveDPP,
626 int MetaChunkSize,
627 bool GPUVMEnable,
628 bool HostVMEnable,
629 int NumberOfActivePlanes,
630 int NumberOfDPP[],
631 int dpte_group_bytes[],
632 double HostVMInefficiencyFactor,
633 double HostVMMinPageSize,
634 int HostVMMaxNonCachedPageTableLevels);
635
636 static double CalculateUrgentLatency(
637 double UrgentLatencyPixelDataOnly,
638 double UrgentLatencyPixelMixedWithVMData,
639 double UrgentLatencyVMDataOnly,
640 bool DoUrgentLatencyAdjustment,
641 double UrgentLatencyAdjustmentFabricClockComponent,
642 double UrgentLatencyAdjustmentFabricClockReference,
643 double FabricClockSingle);
644
645 static void CalculateUnboundedRequestAndCompressedBufferSize(
646 unsigned int DETBufferSizeInKByte,
647 int ConfigReturnBufferSizeInKByte,
648 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
649 int TotalActiveDPP,
650 bool NoChromaPlanes,
651 int MaxNumDPP,
652 int CompressedBufferSegmentSizeInkByteFinal,
653 enum output_encoder_class *Output,
654 bool *UnboundedRequestEnabled,
655 int *CompressedBufferSizeInkByte);
656
657 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
658
dml31_recalculate(struct display_mode_lib * mode_lib)659 void dml31_recalculate(struct display_mode_lib *mode_lib)
660 {
661 ModeSupportAndSystemConfiguration(mode_lib);
662 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
663 DisplayPipeConfiguration(mode_lib);
664 #ifdef __DML_VBA_DEBUG__
665 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
666 #endif
667 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
668 }
669
dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat,enum output_encoder_class Output)670 static unsigned int dscceComputeDelay(
671 unsigned int bpc,
672 double BPP,
673 unsigned int sliceWidth,
674 unsigned int numSlices,
675 enum output_format_class pixelFormat,
676 enum output_encoder_class Output)
677 {
678 // valid bpc = source bits per component in the set of {8, 10, 12}
679 // valid bpp = increments of 1/16 of a bit
680 // min = 6/7/8 in N420/N422/444, respectively
681 // max = such that compression is 1:1
682 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
683 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
684 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
685
686 // fixed value
687 unsigned int rcModelSize = 8192;
688
689 // N422/N420 operate at 2 pixels per clock
690 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
691
692 if (pixelFormat == dm_420)
693 pixelsPerClock = 2;
694 else if (pixelFormat == dm_444)
695 pixelsPerClock = 1;
696 else if (pixelFormat == dm_n422)
697 pixelsPerClock = 2;
698 // #all other modes operate at 1 pixel per clock
699 else
700 pixelsPerClock = 1;
701
702 //initial transmit delay as per PPS
703 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
704
705 //compute ssm delay
706 if (bpc == 8)
707 D = 81;
708 else if (bpc == 10)
709 D = 89;
710 else
711 D = 113;
712
713 //divide by pixel per cycle to compute slice width as seen by DSC
714 w = sliceWidth / pixelsPerClock;
715
716 //422 mode has an additional cycle of delay
717 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
718 s = 0;
719 else
720 s = 1;
721
722 //main calculation for the dscce
723 ix = initalXmitDelay + 45;
724 wx = (w + 2) / 3;
725 P = 3 * wx - w;
726 l0 = ix / w;
727 a = ix + P * l0;
728 ax = (a + 2) / 3 + D + 6 + 1;
729 L = (ax + wx - 1) / wx;
730 if ((ix % w) == 0 && P != 0)
731 lstall = 1;
732 else
733 lstall = 0;
734 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
735
736 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
737 pixels = Delay * 3 * pixelsPerClock;
738 return pixels;
739 }
740
dscComputeDelay(enum output_format_class pixelFormat,enum output_encoder_class Output)741 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
742 {
743 unsigned int Delay = 0;
744
745 if (pixelFormat == dm_420) {
746 // sfr
747 Delay = Delay + 2;
748 // dsccif
749 Delay = Delay + 0;
750 // dscc - input deserializer
751 Delay = Delay + 3;
752 // dscc gets pixels every other cycle
753 Delay = Delay + 2;
754 // dscc - input cdc fifo
755 Delay = Delay + 12;
756 // dscc gets pixels every other cycle
757 Delay = Delay + 13;
758 // dscc - cdc uncertainty
759 Delay = Delay + 2;
760 // dscc - output cdc fifo
761 Delay = Delay + 7;
762 // dscc gets pixels every other cycle
763 Delay = Delay + 3;
764 // dscc - cdc uncertainty
765 Delay = Delay + 2;
766 // dscc - output serializer
767 Delay = Delay + 1;
768 // sft
769 Delay = Delay + 1;
770 } else if (pixelFormat == dm_n422) {
771 // sfr
772 Delay = Delay + 2;
773 // dsccif
774 Delay = Delay + 1;
775 // dscc - input deserializer
776 Delay = Delay + 5;
777 // dscc - input cdc fifo
778 Delay = Delay + 25;
779 // dscc - cdc uncertainty
780 Delay = Delay + 2;
781 // dscc - output cdc fifo
782 Delay = Delay + 10;
783 // dscc - cdc uncertainty
784 Delay = Delay + 2;
785 // dscc - output serializer
786 Delay = Delay + 1;
787 // sft
788 Delay = Delay + 1;
789 } else {
790 // sfr
791 Delay = Delay + 2;
792 // dsccif
793 Delay = Delay + 0;
794 // dscc - input deserializer
795 Delay = Delay + 3;
796 // dscc - input cdc fifo
797 Delay = Delay + 12;
798 // dscc - cdc uncertainty
799 Delay = Delay + 2;
800 // dscc - output cdc fifo
801 Delay = Delay + 7;
802 // dscc - output serializer
803 Delay = Delay + 1;
804 // dscc - cdc uncertainty
805 Delay = Delay + 2;
806 // sft
807 Delay = Delay + 1;
808 }
809
810 return Delay;
811 }
812
CalculatePrefetchSchedule(struct display_mode_lib * mode_lib,double HostVMInefficiencyFactor,Pipe * myPipe,unsigned int DSCDelay,double DPPCLKDelaySubtotalPlusCNVCFormater,double DPPCLKDelaySCL,double DPPCLKDelaySCLLBOnly,double DPPCLKDelayCNVCCursor,double DISPCLKDelaySubtotal,unsigned int DPP_RECOUT_WIDTH,enum output_format_class OutputFormat,unsigned int MaxInterDCNTileRepeaters,unsigned int VStartup,unsigned int MaxVStartup,unsigned int GPUVMPageTableLevels,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,double HostVMMinPageSize,bool DynamicMetadataEnable,bool DynamicMetadataVMEnabled,int DynamicMetadataLinesBeforeActiveRequired,unsigned int DynamicMetadataTransmittedBytes,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,double VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,unsigned int SwathWidthC,double VInitPreFillC,unsigned int MaxNumSwathC,int swath_width_luma_ub,int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,bool * NotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,double * Tdmdl_vm,double * Tdmdl,double * TSetup,int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)813 static bool CalculatePrefetchSchedule(
814 struct display_mode_lib *mode_lib,
815 double HostVMInefficiencyFactor,
816 Pipe *myPipe,
817 unsigned int DSCDelay,
818 double DPPCLKDelaySubtotalPlusCNVCFormater,
819 double DPPCLKDelaySCL,
820 double DPPCLKDelaySCLLBOnly,
821 double DPPCLKDelayCNVCCursor,
822 double DISPCLKDelaySubtotal,
823 unsigned int DPP_RECOUT_WIDTH,
824 enum output_format_class OutputFormat,
825 unsigned int MaxInterDCNTileRepeaters,
826 unsigned int VStartup,
827 unsigned int MaxVStartup,
828 unsigned int GPUVMPageTableLevels,
829 bool GPUVMEnable,
830 bool HostVMEnable,
831 unsigned int HostVMMaxNonCachedPageTableLevels,
832 double HostVMMinPageSize,
833 bool DynamicMetadataEnable,
834 bool DynamicMetadataVMEnabled,
835 int DynamicMetadataLinesBeforeActiveRequired,
836 unsigned int DynamicMetadataTransmittedBytes,
837 double UrgentLatency,
838 double UrgentExtraLatency,
839 double TCalc,
840 unsigned int PDEAndMetaPTEBytesFrame,
841 unsigned int MetaRowByte,
842 unsigned int PixelPTEBytesPerRow,
843 double PrefetchSourceLinesY,
844 unsigned int SwathWidthY,
845 double VInitPreFillY,
846 unsigned int MaxNumSwathY,
847 double PrefetchSourceLinesC,
848 unsigned int SwathWidthC,
849 double VInitPreFillC,
850 unsigned int MaxNumSwathC,
851 int swath_width_luma_ub,
852 int swath_width_chroma_ub,
853 unsigned int SwathHeightY,
854 unsigned int SwathHeightC,
855 double TWait,
856 double *DSTXAfterScaler,
857 double *DSTYAfterScaler,
858 double *DestinationLinesForPrefetch,
859 double *PrefetchBandwidth,
860 double *DestinationLinesToRequestVMInVBlank,
861 double *DestinationLinesToRequestRowInVBlank,
862 double *VRatioPrefetchY,
863 double *VRatioPrefetchC,
864 double *RequiredPrefetchPixDataBWLuma,
865 double *RequiredPrefetchPixDataBWChroma,
866 bool *NotEnoughTimeForDynamicMetadata,
867 double *Tno_bw,
868 double *prefetch_vmrow_bw,
869 double *Tdmdl_vm,
870 double *Tdmdl,
871 double *TSetup,
872 int *VUpdateOffsetPix,
873 double *VUpdateWidthPix,
874 double *VReadyOffsetPix)
875 {
876 bool MyError = false;
877 unsigned int DPPCycles, DISPCLKCycles;
878 double DSTTotalPixelsAfterScaler;
879 double LineTime;
880 double dst_y_prefetch_equ;
881 #ifdef __DML_VBA_DEBUG__
882 double Tsw_oto;
883 #endif
884 double prefetch_bw_oto;
885 double prefetch_bw_pr;
886 double Tvm_oto;
887 double Tr0_oto;
888 double Tvm_oto_lines;
889 double Tr0_oto_lines;
890 double dst_y_prefetch_oto;
891 double TimeForFetchingMetaPTE = 0;
892 double TimeForFetchingRowInVBlank = 0;
893 double LinesToRequestPrefetchPixelData = 0;
894 unsigned int HostVMDynamicLevelsTrips;
895 double trip_to_mem;
896 double Tvm_trips;
897 double Tr0_trips;
898 double Tvm_trips_rounded;
899 double Tr0_trips_rounded;
900 double Lsw_oto;
901 double Tpre_rounded;
902 double prefetch_bw_equ;
903 double Tvm_equ;
904 double Tr0_equ;
905 double Tdmbf;
906 double Tdmec;
907 double Tdmsks;
908 double prefetch_sw_bytes;
909 double bytes_pp;
910 double dep_bytes;
911 int max_vratio_pre = 4;
912 double min_Lsw;
913 double Tsw_est1 = 0;
914 double Tsw_est3 = 0;
915 double max_Tsw = 0;
916
917 if (GPUVMEnable == true && HostVMEnable == true) {
918 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
919 } else {
920 HostVMDynamicLevelsTrips = 0;
921 }
922 #ifdef __DML_VBA_DEBUG__
923 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
924 #endif
925 CalculateVupdateAndDynamicMetadataParameters(
926 MaxInterDCNTileRepeaters,
927 myPipe->DPPCLK,
928 myPipe->DISPCLK,
929 myPipe->DCFCLKDeepSleep,
930 myPipe->PixelClock,
931 myPipe->HTotal,
932 myPipe->VBlank,
933 DynamicMetadataTransmittedBytes,
934 DynamicMetadataLinesBeforeActiveRequired,
935 myPipe->InterlaceEnable,
936 myPipe->ProgressiveToInterlaceUnitInOPP,
937 TSetup,
938 &Tdmbf,
939 &Tdmec,
940 &Tdmsks,
941 VUpdateOffsetPix,
942 VUpdateWidthPix,
943 VReadyOffsetPix);
944
945 LineTime = myPipe->HTotal / myPipe->PixelClock;
946 trip_to_mem = UrgentLatency;
947 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
948
949 #ifdef __DML_VBA_ALLOW_DELTA__
950 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
951 #else
952 if (DynamicMetadataVMEnabled == true) {
953 #endif
954 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
955 } else {
956 *Tdmdl = TWait + UrgentExtraLatency;
957 }
958
959 #ifdef __DML_VBA_ALLOW_DELTA__
960 if (DynamicMetadataEnable == false) {
961 *Tdmdl = 0.0;
962 }
963 #endif
964
965 if (DynamicMetadataEnable == true) {
966 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
967 *NotEnoughTimeForDynamicMetadata = true;
968 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
969 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
970 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
971 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
972 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *Tdmdl);
973 } else {
974 *NotEnoughTimeForDynamicMetadata = false;
975 }
976 } else {
977 *NotEnoughTimeForDynamicMetadata = false;
978 }
979
980 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
981
982 if (myPipe->ScalerEnabled)
983 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
984 else
985 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
986
987 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
988
989 DISPCLKCycles = DISPCLKDelaySubtotal;
990
991 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
992 return true;
993
994 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
995
996 #ifdef __DML_VBA_DEBUG__
997 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
998 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
999 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
1000 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
1001 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
1002 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
1003 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
1004 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
1005 #endif
1006
1007 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1008
1009 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
1010 *DSTYAfterScaler = 1;
1011 else
1012 *DSTYAfterScaler = 0;
1013
1014 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1015 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1016 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1017
1018 #ifdef __DML_VBA_DEBUG__
1019 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
1020 #endif
1021
1022 MyError = false;
1023
1024 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1025 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1026 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1027
1028 #ifdef __DML_VBA_ALLOW_DELTA__
1029 if (!myPipe->DCCEnable) {
1030 Tr0_trips = 0.0;
1031 Tr0_trips_rounded = 0.0;
1032 }
1033 #endif
1034
1035 if (!GPUVMEnable) {
1036 Tvm_trips = 0.0;
1037 Tvm_trips_rounded = 0.0;
1038 }
1039
1040 if (GPUVMEnable) {
1041 if (GPUVMPageTableLevels >= 3) {
1042 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1043 } else {
1044 *Tno_bw = 0;
1045 }
1046 } else if (!myPipe->DCCEnable) {
1047 *Tno_bw = LineTime;
1048 } else {
1049 *Tno_bw = LineTime / 4;
1050 }
1051
1052 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
1053 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
1054 else
1055 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
1056 /*rev 99*/
1057 prefetch_bw_pr = bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane;
1058 prefetch_bw_pr = dml_min(1, myPipe->VRatio) * prefetch_bw_pr;
1059 max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
1060 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
1061 prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw);
1062
1063 min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre);
1064 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
1065 #ifdef __DML_VBA_DEBUG__
1066 Tsw_oto = Lsw_oto * LineTime;
1067 #endif
1068
1069
1070 #ifdef __DML_VBA_DEBUG__
1071 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
1072 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
1073 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
1074 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
1075 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
1076 dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
1077 #endif
1078
1079 if (GPUVMEnable == true)
1080 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0);
1081 else
1082 Tvm_oto = LineTime / 4.0;
1083
1084 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1085 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term)
1086 LineTime - Tvm_oto,
1087 LineTime / 4);
1088 } else {
1089 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1090 }
1091
1092 #ifdef __DML_VBA_DEBUG__
1093 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
1094 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
1095 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
1096 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1097 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1098 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1099 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
1100 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
1101 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
1102 #endif
1103
1104 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1105 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1106 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1107 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1108 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1109 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1110
1111 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
1112
1113 if (prefetch_sw_bytes < dep_bytes)
1114 prefetch_sw_bytes = 2 * dep_bytes;
1115
1116 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1117 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
1118 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
1119 dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
1120 dml_print("DML: LineTime: %f\n", LineTime);
1121 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
1122
1123 dml_print("DML: LineTime: %f\n", LineTime);
1124 dml_print("DML: VStartup: %d\n", VStartup);
1125 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1126 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
1127 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1128 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1129 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1130 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1131 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1132 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm);
1133 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1134 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler);
1135 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler \n", *DSTYAfterScaler);
1136
1137 *PrefetchBandwidth = 0;
1138 *DestinationLinesToRequestVMInVBlank = 0;
1139 *DestinationLinesToRequestRowInVBlank = 0;
1140 *VRatioPrefetchY = 0;
1141 *VRatioPrefetchC = 0;
1142 *RequiredPrefetchPixDataBWLuma = 0;
1143 if (dst_y_prefetch_equ > 1) {
1144 double PrefetchBandwidth1;
1145 double PrefetchBandwidth2;
1146 double PrefetchBandwidth3;
1147 double PrefetchBandwidth4;
1148
1149 if (Tpre_rounded - *Tno_bw > 0) {
1150 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1151 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
1152 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
1153 } else {
1154 PrefetchBandwidth1 = 0;
1155 }
1156
1157 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
1158 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1159 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
1160 }
1161
1162 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1163 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1164 else
1165 PrefetchBandwidth2 = 0;
1166
1167 if (Tpre_rounded - Tvm_trips_rounded > 0) {
1168 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1169 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
1170 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
1171 } else {
1172 PrefetchBandwidth3 = 0;
1173 }
1174
1175 #ifdef __DML_VBA_DEBUG__
1176 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
1177 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
1178 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
1179 #endif
1180 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1181 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1182 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
1183 }
1184
1185 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1186 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1187 else
1188 PrefetchBandwidth4 = 0;
1189
1190 {
1191 bool Case1OK;
1192 bool Case2OK;
1193 bool Case3OK;
1194
1195 if (PrefetchBandwidth1 > 0) {
1196 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
1197 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1198 Case1OK = true;
1199 } else {
1200 Case1OK = false;
1201 }
1202 } else {
1203 Case1OK = false;
1204 }
1205
1206 if (PrefetchBandwidth2 > 0) {
1207 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
1208 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1209 Case2OK = true;
1210 } else {
1211 Case2OK = false;
1212 }
1213 } else {
1214 Case2OK = false;
1215 }
1216
1217 if (PrefetchBandwidth3 > 0) {
1218 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
1219 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1220 Case3OK = true;
1221 } else {
1222 Case3OK = false;
1223 }
1224 } else {
1225 Case3OK = false;
1226 }
1227
1228 if (Case1OK) {
1229 prefetch_bw_equ = PrefetchBandwidth1;
1230 } else if (Case2OK) {
1231 prefetch_bw_equ = PrefetchBandwidth2;
1232 } else if (Case3OK) {
1233 prefetch_bw_equ = PrefetchBandwidth3;
1234 } else {
1235 prefetch_bw_equ = PrefetchBandwidth4;
1236 }
1237
1238 #ifdef __DML_VBA_DEBUG__
1239 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
1240 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
1241 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
1242 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
1243 #endif
1244
1245 if (prefetch_bw_equ > 0) {
1246 if (GPUVMEnable == true) {
1247 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1248 } else {
1249 Tvm_equ = LineTime / 4;
1250 }
1251
1252 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1253 Tr0_equ = dml_max4(
1254 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1255 Tr0_trips,
1256 (LineTime - Tvm_equ) / 2,
1257 LineTime / 4);
1258 } else {
1259 Tr0_equ = (LineTime - Tvm_equ) / 2;
1260 }
1261 } else {
1262 Tvm_equ = 0;
1263 Tr0_equ = 0;
1264 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1265 }
1266 }
1267
1268 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1269 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1270 TimeForFetchingMetaPTE = Tvm_oto;
1271 TimeForFetchingRowInVBlank = Tr0_oto;
1272 *PrefetchBandwidth = prefetch_bw_oto;
1273 } else {
1274 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1275 TimeForFetchingMetaPTE = Tvm_equ;
1276 TimeForFetchingRowInVBlank = Tr0_equ;
1277 *PrefetchBandwidth = prefetch_bw_equ;
1278 }
1279
1280 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1281
1282 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1283
1284 #ifdef __DML_VBA_ALLOW_DELTA__
1285 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
1286 // See note above dated 5/30/2018
1287 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
1288 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this??
1289 #else
1290 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
1291 #endif
1292
1293 #ifdef __DML_VBA_DEBUG__
1294 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
1295 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1296 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
1297 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1298 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1299 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1300 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
1301 #endif
1302
1303 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1304
1305 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
1306 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1307 #ifdef __DML_VBA_DEBUG__
1308 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1309 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
1310 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
1311 #endif
1312 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1313 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1314 *VRatioPrefetchY = dml_max(
1315 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1316 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1317 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1318 } else {
1319 MyError = true;
1320 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1321 *VRatioPrefetchY = 0;
1322 }
1323 #ifdef __DML_VBA_DEBUG__
1324 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1325 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1326 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
1327 #endif
1328 }
1329
1330 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1331 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1332
1333 #ifdef __DML_VBA_DEBUG__
1334 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1335 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
1336 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
1337 #endif
1338 if ((SwathHeightC > 4) || VInitPreFillC > 3) {
1339 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1340 *VRatioPrefetchC = dml_max(
1341 *VRatioPrefetchC,
1342 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1343 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1344 } else {
1345 MyError = true;
1346 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1347 *VRatioPrefetchC = 0;
1348 }
1349 #ifdef __DML_VBA_DEBUG__
1350 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1351 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
1352 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
1353 #endif
1354 }
1355
1356 #ifdef __DML_VBA_DEBUG__
1357 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
1358 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
1359 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1360 #endif
1361
1362 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
1363
1364 #ifdef __DML_VBA_DEBUG__
1365 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
1366 #endif
1367
1368 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
1369 / LineTime;
1370 } else {
1371 MyError = true;
1372 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1373 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1374 *VRatioPrefetchY = 0;
1375 *VRatioPrefetchC = 0;
1376 *RequiredPrefetchPixDataBWLuma = 0;
1377 *RequiredPrefetchPixDataBWChroma = 0;
1378 }
1379
1380 dml_print(
1381 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
1382 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1383 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1384 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1385 dml_print(
1386 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
1387 (double) LinesToRequestPrefetchPixelData * LineTime);
1388 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
1389 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) /
1390 (double) myPipe->HTotal)) * LineTime);
1391 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
1392 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n",
1393 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
1394 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
1395 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1396
1397 } else {
1398 MyError = true;
1399 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1400 }
1401
1402 {
1403 double prefetch_vm_bw;
1404 double prefetch_row_bw;
1405
1406 if (PDEAndMetaPTEBytesFrame == 0) {
1407 prefetch_vm_bw = 0;
1408 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1409 #ifdef __DML_VBA_DEBUG__
1410 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1411 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1412 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1413 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1414 #endif
1415 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1416 #ifdef __DML_VBA_DEBUG__
1417 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
1418 #endif
1419 } else {
1420 prefetch_vm_bw = 0;
1421 MyError = true;
1422 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1423 }
1424
1425 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1426 prefetch_row_bw = 0;
1427 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1428 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1429
1430 #ifdef __DML_VBA_DEBUG__
1431 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1432 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1433 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1434 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
1435 #endif
1436 } else {
1437 prefetch_row_bw = 0;
1438 MyError = true;
1439 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1440 }
1441
1442 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1443 }
1444
1445 if (MyError) {
1446 *PrefetchBandwidth = 0;
1447 *DestinationLinesToRequestVMInVBlank = 0;
1448 *DestinationLinesToRequestRowInVBlank = 0;
1449 *DestinationLinesForPrefetch = 0;
1450 *VRatioPrefetchY = 0;
1451 *VRatioPrefetchC = 0;
1452 *RequiredPrefetchPixDataBWLuma = 0;
1453 *RequiredPrefetchPixDataBWChroma = 0;
1454 }
1455
1456 return MyError;
1457 }
1458
1459 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1460 {
1461 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1462 }
1463
1464 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1465 {
1466 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1467 }
1468
1469 static void CalculateDCCConfiguration(
1470 bool DCCEnabled,
1471 bool DCCProgrammingAssumesScanDirectionUnknown,
1472 enum source_format_class SourcePixelFormat,
1473 unsigned int SurfaceWidthLuma,
1474 unsigned int SurfaceWidthChroma,
1475 unsigned int SurfaceHeightLuma,
1476 unsigned int SurfaceHeightChroma,
1477 double DETBufferSize,
1478 unsigned int RequestHeight256ByteLuma,
1479 unsigned int RequestHeight256ByteChroma,
1480 enum dm_swizzle_mode TilingFormat,
1481 unsigned int BytePerPixelY,
1482 unsigned int BytePerPixelC,
1483 double BytePerPixelDETY,
1484 double BytePerPixelDETC,
1485 enum scan_direction_class ScanOrientation,
1486 unsigned int *MaxUncompressedBlockLuma,
1487 unsigned int *MaxUncompressedBlockChroma,
1488 unsigned int *MaxCompressedBlockLuma,
1489 unsigned int *MaxCompressedBlockChroma,
1490 unsigned int *IndependentBlockLuma,
1491 unsigned int *IndependentBlockChroma)
1492 {
1493 int yuv420;
1494 int horz_div_l;
1495 int horz_div_c;
1496 int vert_div_l;
1497 int vert_div_c;
1498
1499 int swath_buf_size;
1500 double detile_buf_vp_horz_limit;
1501 double detile_buf_vp_vert_limit;
1502
1503 int MAS_vp_horz_limit;
1504 int MAS_vp_vert_limit;
1505 int max_vp_horz_width;
1506 int max_vp_vert_height;
1507 int eff_surf_width_l;
1508 int eff_surf_width_c;
1509 int eff_surf_height_l;
1510 int eff_surf_height_c;
1511
1512 int full_swath_bytes_horz_wc_l;
1513 int full_swath_bytes_horz_wc_c;
1514 int full_swath_bytes_vert_wc_l;
1515 int full_swath_bytes_vert_wc_c;
1516 int req128_horz_wc_l;
1517 int req128_horz_wc_c;
1518 int req128_vert_wc_l;
1519 int req128_vert_wc_c;
1520 int segment_order_horz_contiguous_luma;
1521 int segment_order_horz_contiguous_chroma;
1522 int segment_order_vert_contiguous_luma;
1523 int segment_order_vert_contiguous_chroma;
1524
1525 typedef enum {
1526 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
1527 } RequestType;
1528 RequestType RequestLuma;
1529 RequestType RequestChroma;
1530
1531 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1532 horz_div_l = 1;
1533 horz_div_c = 1;
1534 vert_div_l = 1;
1535 vert_div_c = 1;
1536
1537 if (BytePerPixelY == 1)
1538 vert_div_l = 0;
1539 if (BytePerPixelC == 1)
1540 vert_div_c = 0;
1541 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1542 horz_div_l = 0;
1543 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1544 horz_div_c = 0;
1545
1546 if (BytePerPixelC == 0) {
1547 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1548 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
1549 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1550 } else {
1551 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1552 detile_buf_vp_horz_limit = (double) swath_buf_size
1553 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
1554 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
1555 detile_buf_vp_vert_limit = (double) swath_buf_size
1556 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
1557 }
1558
1559 if (SourcePixelFormat == dm_420_10) {
1560 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1561 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1562 }
1563
1564 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1565 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1566
1567 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
1568 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1569 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1570 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1571 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1572 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1573 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
1574 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1575
1576 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1577 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1578 if (BytePerPixelC > 0) {
1579 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
1580 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1581 } else {
1582 full_swath_bytes_horz_wc_c = 0;
1583 full_swath_bytes_vert_wc_c = 0;
1584 }
1585
1586 if (SourcePixelFormat == dm_420_10) {
1587 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1588 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1589 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1590 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1591 }
1592
1593 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1594 req128_horz_wc_l = 0;
1595 req128_horz_wc_c = 0;
1596 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
1597 req128_horz_wc_l = 0;
1598 req128_horz_wc_c = 1;
1599 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1600 req128_horz_wc_l = 1;
1601 req128_horz_wc_c = 0;
1602 } else {
1603 req128_horz_wc_l = 1;
1604 req128_horz_wc_c = 1;
1605 }
1606
1607 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1608 req128_vert_wc_l = 0;
1609 req128_vert_wc_c = 0;
1610 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
1611 req128_vert_wc_l = 0;
1612 req128_vert_wc_c = 1;
1613 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1614 req128_vert_wc_l = 1;
1615 req128_vert_wc_c = 0;
1616 } else {
1617 req128_vert_wc_l = 1;
1618 req128_vert_wc_c = 1;
1619 }
1620
1621 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1622 segment_order_horz_contiguous_luma = 0;
1623 } else {
1624 segment_order_horz_contiguous_luma = 1;
1625 }
1626 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1627 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1628 segment_order_vert_contiguous_luma = 0;
1629 } else {
1630 segment_order_vert_contiguous_luma = 1;
1631 }
1632 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1633 segment_order_horz_contiguous_chroma = 0;
1634 } else {
1635 segment_order_horz_contiguous_chroma = 1;
1636 }
1637 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1638 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1639 segment_order_vert_contiguous_chroma = 0;
1640 } else {
1641 segment_order_vert_contiguous_chroma = 1;
1642 }
1643
1644 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1645 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1646 RequestLuma = REQ_256Bytes;
1647 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1648 RequestLuma = REQ_128BytesNonContiguous;
1649 } else {
1650 RequestLuma = REQ_128BytesContiguous;
1651 }
1652 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1653 RequestChroma = REQ_256Bytes;
1654 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
1655 RequestChroma = REQ_128BytesNonContiguous;
1656 } else {
1657 RequestChroma = REQ_128BytesContiguous;
1658 }
1659 } else if (ScanOrientation != dm_vert) {
1660 if (req128_horz_wc_l == 0) {
1661 RequestLuma = REQ_256Bytes;
1662 } else if (segment_order_horz_contiguous_luma == 0) {
1663 RequestLuma = REQ_128BytesNonContiguous;
1664 } else {
1665 RequestLuma = REQ_128BytesContiguous;
1666 }
1667 if (req128_horz_wc_c == 0) {
1668 RequestChroma = REQ_256Bytes;
1669 } else if (segment_order_horz_contiguous_chroma == 0) {
1670 RequestChroma = REQ_128BytesNonContiguous;
1671 } else {
1672 RequestChroma = REQ_128BytesContiguous;
1673 }
1674 } else {
1675 if (req128_vert_wc_l == 0) {
1676 RequestLuma = REQ_256Bytes;
1677 } else if (segment_order_vert_contiguous_luma == 0) {
1678 RequestLuma = REQ_128BytesNonContiguous;
1679 } else {
1680 RequestLuma = REQ_128BytesContiguous;
1681 }
1682 if (req128_vert_wc_c == 0) {
1683 RequestChroma = REQ_256Bytes;
1684 } else if (segment_order_vert_contiguous_chroma == 0) {
1685 RequestChroma = REQ_128BytesNonContiguous;
1686 } else {
1687 RequestChroma = REQ_128BytesContiguous;
1688 }
1689 }
1690
1691 if (RequestLuma == REQ_256Bytes) {
1692 *MaxUncompressedBlockLuma = 256;
1693 *MaxCompressedBlockLuma = 256;
1694 *IndependentBlockLuma = 0;
1695 } else if (RequestLuma == REQ_128BytesContiguous) {
1696 *MaxUncompressedBlockLuma = 256;
1697 *MaxCompressedBlockLuma = 128;
1698 *IndependentBlockLuma = 128;
1699 } else {
1700 *MaxUncompressedBlockLuma = 256;
1701 *MaxCompressedBlockLuma = 64;
1702 *IndependentBlockLuma = 64;
1703 }
1704
1705 if (RequestChroma == REQ_256Bytes) {
1706 *MaxUncompressedBlockChroma = 256;
1707 *MaxCompressedBlockChroma = 256;
1708 *IndependentBlockChroma = 0;
1709 } else if (RequestChroma == REQ_128BytesContiguous) {
1710 *MaxUncompressedBlockChroma = 256;
1711 *MaxCompressedBlockChroma = 128;
1712 *IndependentBlockChroma = 128;
1713 } else {
1714 *MaxUncompressedBlockChroma = 256;
1715 *MaxCompressedBlockChroma = 64;
1716 *IndependentBlockChroma = 64;
1717 }
1718
1719 if (DCCEnabled != true || BytePerPixelC == 0) {
1720 *MaxUncompressedBlockChroma = 0;
1721 *MaxCompressedBlockChroma = 0;
1722 *IndependentBlockChroma = 0;
1723 }
1724
1725 if (DCCEnabled != true) {
1726 *MaxUncompressedBlockLuma = 0;
1727 *MaxCompressedBlockLuma = 0;
1728 *IndependentBlockLuma = 0;
1729 }
1730 }
1731
1732 static double CalculatePrefetchSourceLines(
1733 struct display_mode_lib *mode_lib,
1734 double VRatio,
1735 double vtaps,
1736 bool Interlace,
1737 bool ProgressiveToInterlaceUnitInOPP,
1738 unsigned int SwathHeight,
1739 unsigned int ViewportYStart,
1740 double *VInitPreFill,
1741 unsigned int *MaxNumSwath)
1742 {
1743 struct vba_vars_st *v = &mode_lib->vba;
1744 unsigned int MaxPartialSwath;
1745
1746 if (ProgressiveToInterlaceUnitInOPP)
1747 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1748 else
1749 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1750
1751 if (!v->IgnoreViewportPositioning) {
1752
1753 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1754
1755 if (*VInitPreFill > 1.0)
1756 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1757 else
1758 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
1759 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1760
1761 } else {
1762
1763 if (ViewportYStart != 0)
1764 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1765
1766 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1767
1768 if (*VInitPreFill > 1.0)
1769 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1770 else
1771 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
1772 }
1773
1774 #ifdef __DML_VBA_DEBUG__
1775 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
1776 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
1777 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
1778 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
1779 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
1780 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
1781 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
1782 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
1783 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
1784 #endif
1785 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1786 }
1787
1788 static unsigned int CalculateVMAndRowBytes(
1789 struct display_mode_lib *mode_lib,
1790 bool DCCEnable,
1791 unsigned int BlockHeight256Bytes,
1792 unsigned int BlockWidth256Bytes,
1793 enum source_format_class SourcePixelFormat,
1794 unsigned int SurfaceTiling,
1795 unsigned int BytePerPixel,
1796 enum scan_direction_class ScanDirection,
1797 unsigned int SwathWidth,
1798 unsigned int ViewportHeight,
1799 bool GPUVMEnable,
1800 bool HostVMEnable,
1801 unsigned int HostVMMaxNonCachedPageTableLevels,
1802 unsigned int GPUVMMinPageSize,
1803 unsigned int HostVMMinPageSize,
1804 unsigned int PTEBufferSizeInRequests,
1805 unsigned int Pitch,
1806 unsigned int DCCMetaPitch,
1807 unsigned int *MacroTileWidth,
1808 unsigned int *MetaRowByte,
1809 unsigned int *PixelPTEBytesPerRow,
1810 bool *PTEBufferSizeNotExceeded,
1811 int *dpte_row_width_ub,
1812 unsigned int *dpte_row_height,
1813 unsigned int *MetaRequestWidth,
1814 unsigned int *MetaRequestHeight,
1815 unsigned int *meta_row_width,
1816 unsigned int *meta_row_height,
1817 int *vm_group_bytes,
1818 unsigned int *dpte_group_bytes,
1819 unsigned int *PixelPTEReqWidth,
1820 unsigned int *PixelPTEReqHeight,
1821 unsigned int *PTERequestSize,
1822 int *DPDE0BytesFrame,
1823 int *MetaPTEBytesFrame)
1824 {
1825 struct vba_vars_st *v = &mode_lib->vba;
1826 unsigned int MPDEBytesFrame;
1827 unsigned int DCCMetaSurfaceBytes;
1828 unsigned int MacroTileSizeBytes;
1829 unsigned int MacroTileHeight;
1830 unsigned int ExtraDPDEBytesFrame;
1831 unsigned int PDEAndMetaPTEBytesFrame;
1832 unsigned int PixelPTEReqHeightPTEs = 0;
1833 unsigned int HostVMDynamicLevels = 0;
1834 double FractionOfPTEReturnDrop;
1835
1836 if (GPUVMEnable == true && HostVMEnable == true) {
1837 if (HostVMMinPageSize < 2048) {
1838 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1839 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1840 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1841 } else {
1842 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1843 }
1844 }
1845
1846 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1847 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1848 if (ScanDirection != dm_vert) {
1849 *meta_row_height = *MetaRequestHeight;
1850 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
1851 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1852 } else {
1853 *meta_row_height = *MetaRequestWidth;
1854 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
1855 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1856 }
1857 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1858 if (GPUVMEnable == true) {
1859 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1860 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
1861 } else {
1862 *MetaPTEBytesFrame = 0;
1863 MPDEBytesFrame = 0;
1864 }
1865
1866 if (DCCEnable != true) {
1867 *MetaPTEBytesFrame = 0;
1868 MPDEBytesFrame = 0;
1869 *MetaRowByte = 0;
1870 }
1871
1872 if (SurfaceTiling == dm_sw_linear) {
1873 MacroTileSizeBytes = 256;
1874 MacroTileHeight = BlockHeight256Bytes;
1875 } else {
1876 MacroTileSizeBytes = 65536;
1877 MacroTileHeight = 16 * BlockHeight256Bytes;
1878 }
1879 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1880
1881 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
1882 if (ScanDirection != dm_vert) {
1883 *DPDE0BytesFrame = 64
1884 * (dml_ceil(
1885 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1886 / (8 * 2097152),
1887 1) + 1);
1888 } else {
1889 *DPDE0BytesFrame = 64
1890 * (dml_ceil(
1891 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1892 / (8 * 2097152),
1893 1) + 1);
1894 }
1895 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
1896 } else {
1897 *DPDE0BytesFrame = 0;
1898 ExtraDPDEBytesFrame = 0;
1899 }
1900
1901 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
1902
1903 #ifdef __DML_VBA_DEBUG__
1904 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
1905 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
1906 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
1907 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
1908 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1909 #endif
1910
1911 if (HostVMEnable == true) {
1912 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1913 }
1914 #ifdef __DML_VBA_DEBUG__
1915 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1916 #endif
1917
1918 if (SurfaceTiling == dm_sw_linear) {
1919 PixelPTEReqHeightPTEs = 1;
1920 *PixelPTEReqHeight = 1;
1921 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1922 *PTERequestSize = 64;
1923 FractionOfPTEReturnDrop = 0;
1924 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1925 PixelPTEReqHeightPTEs = 16;
1926 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1927 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1928 *PTERequestSize = 128;
1929 FractionOfPTEReturnDrop = 0;
1930 } else {
1931 PixelPTEReqHeightPTEs = 1;
1932 *PixelPTEReqHeight = MacroTileHeight;
1933 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1934 *PTERequestSize = 64;
1935 FractionOfPTEReturnDrop = 0;
1936 }
1937
1938 if (SurfaceTiling == dm_sw_linear) {
1939 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
1940 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1941 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1942 } else if (ScanDirection != dm_vert) {
1943 *dpte_row_height = *PixelPTEReqHeight;
1944 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1945 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1946 } else {
1947 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1948 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1949 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1950 }
1951
1952 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
1953 *PTEBufferSizeNotExceeded = true;
1954 } else {
1955 *PTEBufferSizeNotExceeded = false;
1956 }
1957
1958 if (GPUVMEnable != true) {
1959 *PixelPTEBytesPerRow = 0;
1960 *PTEBufferSizeNotExceeded = true;
1961 }
1962
1963 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
1964
1965 if (HostVMEnable == true) {
1966 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
1967 }
1968
1969 if (HostVMEnable == true) {
1970 *vm_group_bytes = 512;
1971 *dpte_group_bytes = 512;
1972 } else if (GPUVMEnable == true) {
1973 *vm_group_bytes = 2048;
1974 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
1975 *dpte_group_bytes = 512;
1976 } else {
1977 *dpte_group_bytes = 2048;
1978 }
1979 } else {
1980 *vm_group_bytes = 0;
1981 *dpte_group_bytes = 0;
1982 }
1983 return PDEAndMetaPTEBytesFrame;
1984 }
1985
1986 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
1987 {
1988 struct vba_vars_st *v = &mode_lib->vba;
1989 unsigned int j, k;
1990 double HostVMInefficiencyFactor = 1.0;
1991 bool NoChromaPlanes = true;
1992 int ReorderBytes;
1993 double VMDataOnlyReturnBW;
1994 double MaxTotalRDBandwidth = 0;
1995 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
1996
1997 v->WritebackDISPCLK = 0.0;
1998 v->DISPCLKWithRamping = 0;
1999 v->DISPCLKWithoutRamping = 0;
2000 v->GlobalDPPCLK = 0.0;
2001 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */
2002 {
2003 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
2004 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
2005 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
2006 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
2007 if (v->HostVMEnable != true) {
2008 v->ReturnBW = dml_min(
2009 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2010 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
2011 } else {
2012 v->ReturnBW = dml_min(
2013 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2014 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
2015 }
2016 }
2017 /* End DAL custom code */
2018
2019 // DISPCLK and DPPCLK Calculation
2020 //
2021 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2022 if (v->WritebackEnable[k]) {
2023 v->WritebackDISPCLK = dml_max(
2024 v->WritebackDISPCLK,
2025 dml31_CalculateWriteBackDISPCLK(
2026 v->WritebackPixelFormat[k],
2027 v->PixelClock[k],
2028 v->WritebackHRatio[k],
2029 v->WritebackVRatio[k],
2030 v->WritebackHTaps[k],
2031 v->WritebackVTaps[k],
2032 v->WritebackSourceWidth[k],
2033 v->WritebackDestinationWidth[k],
2034 v->HTotal[k],
2035 v->WritebackLineBufferSize));
2036 }
2037 }
2038
2039 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2040 if (v->HRatio[k] > 1) {
2041 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
2042 v->MaxDCHUBToPSCLThroughput,
2043 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
2044 } else {
2045 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2046 }
2047
2048 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
2049 * dml_max(
2050 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
2051 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
2052
2053 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
2054 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
2055 }
2056
2057 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
2058 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
2059 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
2060 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
2061 } else {
2062 if (v->HRatioChroma[k] > 1) {
2063 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
2064 v->MaxDCHUBToPSCLThroughput,
2065 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
2066 } else {
2067 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2068 }
2069 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
2070 * dml_max3(
2071 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
2072 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
2073 1.0);
2074
2075 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
2076 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
2077 }
2078
2079 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma);
2080 }
2081 }
2082
2083 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2084 if (v->BlendingAndTiming[k] != k)
2085 continue;
2086 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2087 v->DISPCLKWithRamping = dml_max(
2088 v->DISPCLKWithRamping,
2089 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2090 * (1 + v->DISPCLKRampingMargin / 100));
2091 v->DISPCLKWithoutRamping = dml_max(
2092 v->DISPCLKWithoutRamping,
2093 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2094 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2095 v->DISPCLKWithRamping = dml_max(
2096 v->DISPCLKWithRamping,
2097 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2098 * (1 + v->DISPCLKRampingMargin / 100));
2099 v->DISPCLKWithoutRamping = dml_max(
2100 v->DISPCLKWithoutRamping,
2101 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2102 } else {
2103 v->DISPCLKWithRamping = dml_max(
2104 v->DISPCLKWithRamping,
2105 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
2106 v->DISPCLKWithoutRamping = dml_max(
2107 v->DISPCLKWithoutRamping,
2108 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2109 }
2110 }
2111
2112 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK);
2113 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK);
2114
2115 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2116 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed);
2117 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed);
2118 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2119 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
2120 v->DISPCLKDPPCLKVCOSpeed);
2121 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2122 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2123 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2124 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2125 } else {
2126 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
2127 }
2128 v->DISPCLK = v->DISPCLK_calculated;
2129 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2130
2131 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2132 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2133 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]);
2134 }
2135 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed);
2136 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2137 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1);
2138 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2139 }
2140
2141 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2142 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2143 }
2144
2145 // Urgent and B P-State/DRAM Clock Change Watermark
2146 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2147 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2148
2149 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2150 dml30_CalculateBytePerPixelAnd256BBlockSizes(
2151 v->SourcePixelFormat[k],
2152 v->SurfaceTiling[k],
2153 &v->BytePerPixelY[k],
2154 &v->BytePerPixelC[k],
2155 &v->BytePerPixelDETY[k],
2156 &v->BytePerPixelDETC[k],
2157 &v->BlockHeight256BytesY[k],
2158 &v->BlockHeight256BytesC[k],
2159 &v->BlockWidth256BytesY[k],
2160 &v->BlockWidth256BytesC[k]);
2161 }
2162
2163 CalculateSwathWidth(
2164 false,
2165 v->NumberOfActivePlanes,
2166 v->SourcePixelFormat,
2167 v->SourceScan,
2168 v->ViewportWidth,
2169 v->ViewportHeight,
2170 v->SurfaceWidthY,
2171 v->SurfaceWidthC,
2172 v->SurfaceHeightY,
2173 v->SurfaceHeightC,
2174 v->ODMCombineEnabled,
2175 v->BytePerPixelY,
2176 v->BytePerPixelC,
2177 v->BlockHeight256BytesY,
2178 v->BlockHeight256BytesC,
2179 v->BlockWidth256BytesY,
2180 v->BlockWidth256BytesC,
2181 v->BlendingAndTiming,
2182 v->HActive,
2183 v->HRatio,
2184 v->DPPPerPlane,
2185 v->SwathWidthSingleDPPY,
2186 v->SwathWidthSingleDPPC,
2187 v->SwathWidthY,
2188 v->SwathWidthC,
2189 v->dummyinteger3,
2190 v->dummyinteger4,
2191 v->swath_width_luma_ub,
2192 v->swath_width_chroma_ub);
2193
2194 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2195 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
2196 * v->VRatio[k];
2197 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
2198 * v->VRatioChroma[k];
2199 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2200 }
2201
2202 // DCFCLK Deep Sleep
2203 CalculateDCFCLKDeepSleep(
2204 mode_lib,
2205 v->NumberOfActivePlanes,
2206 v->BytePerPixelY,
2207 v->BytePerPixelC,
2208 v->VRatio,
2209 v->VRatioChroma,
2210 v->SwathWidthY,
2211 v->SwathWidthC,
2212 v->DPPPerPlane,
2213 v->HRatio,
2214 v->HRatioChroma,
2215 v->PixelClock,
2216 v->PSCL_THROUGHPUT_LUMA,
2217 v->PSCL_THROUGHPUT_CHROMA,
2218 v->DPPCLK,
2219 v->ReadBandwidthPlaneLuma,
2220 v->ReadBandwidthPlaneChroma,
2221 v->ReturnBusWidth,
2222 &v->DCFCLKDeepSleep);
2223
2224 // DSCCLK
2225 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2226 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2227 v->DSCCLK_calculated[k] = 0.0;
2228 } else {
2229 if (v->OutputFormat[k] == dm_420)
2230 v->DSCFormatFactor = 2;
2231 else if (v->OutputFormat[k] == dm_444)
2232 v->DSCFormatFactor = 1;
2233 else if (v->OutputFormat[k] == dm_n422)
2234 v->DSCFormatFactor = 2;
2235 else
2236 v->DSCFormatFactor = 1;
2237 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2238 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
2239 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2240 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2241 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
2242 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2243 else
2244 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
2245 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2246 }
2247 }
2248
2249 // DSC Delay
2250 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2251 double BPP = v->OutputBpp[k];
2252
2253 if (v->DSCEnabled[k] && BPP != 0) {
2254 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2255 v->DSCDelay[k] = dscceComputeDelay(
2256 v->DSCInputBitPerComponent[k],
2257 BPP,
2258 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2259 v->NumberOfDSCSlices[k],
2260 v->OutputFormat[k],
2261 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2262 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2263 v->DSCDelay[k] = 2
2264 * (dscceComputeDelay(
2265 v->DSCInputBitPerComponent[k],
2266 BPP,
2267 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2268 v->NumberOfDSCSlices[k] / 2.0,
2269 v->OutputFormat[k],
2270 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2271 } else {
2272 v->DSCDelay[k] = 4
2273 * (dscceComputeDelay(
2274 v->DSCInputBitPerComponent[k],
2275 BPP,
2276 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2277 v->NumberOfDSCSlices[k] / 4.0,
2278 v->OutputFormat[k],
2279 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2280 }
2281 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2282 } else {
2283 v->DSCDelay[k] = 0;
2284 }
2285 }
2286
2287 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2288 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2289 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
2290 v->DSCDelay[k] = v->DSCDelay[j];
2291
2292 // Prefetch
2293 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2294 unsigned int PDEAndMetaPTEBytesFrameY;
2295 unsigned int PixelPTEBytesPerRowY;
2296 unsigned int MetaRowByteY;
2297 unsigned int MetaRowByteC;
2298 unsigned int PDEAndMetaPTEBytesFrameC;
2299 unsigned int PixelPTEBytesPerRowC;
2300 bool PTEBufferSizeNotExceededY;
2301 bool PTEBufferSizeNotExceededC;
2302
2303 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2304 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2305 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2306 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2307 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2308 } else {
2309 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2310 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2311 }
2312
2313 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2314 mode_lib,
2315 v->DCCEnable[k],
2316 v->BlockHeight256BytesC[k],
2317 v->BlockWidth256BytesC[k],
2318 v->SourcePixelFormat[k],
2319 v->SurfaceTiling[k],
2320 v->BytePerPixelC[k],
2321 v->SourceScan[k],
2322 v->SwathWidthC[k],
2323 v->ViewportHeightChroma[k],
2324 v->GPUVMEnable,
2325 v->HostVMEnable,
2326 v->HostVMMaxNonCachedPageTableLevels,
2327 v->GPUVMMinPageSize,
2328 v->HostVMMinPageSize,
2329 v->PTEBufferSizeInRequestsForChroma,
2330 v->PitchC[k],
2331 v->DCCMetaPitchC[k],
2332 &v->MacroTileWidthC[k],
2333 &MetaRowByteC,
2334 &PixelPTEBytesPerRowC,
2335 &PTEBufferSizeNotExceededC,
2336 &v->dpte_row_width_chroma_ub[k],
2337 &v->dpte_row_height_chroma[k],
2338 &v->meta_req_width_chroma[k],
2339 &v->meta_req_height_chroma[k],
2340 &v->meta_row_width_chroma[k],
2341 &v->meta_row_height_chroma[k],
2342 &v->dummyinteger1,
2343 &v->dummyinteger2,
2344 &v->PixelPTEReqWidthC[k],
2345 &v->PixelPTEReqHeightC[k],
2346 &v->PTERequestSizeC[k],
2347 &v->dpde0_bytes_per_frame_ub_c[k],
2348 &v->meta_pte_bytes_per_frame_ub_c[k]);
2349
2350 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2351 mode_lib,
2352 v->VRatioChroma[k],
2353 v->VTAPsChroma[k],
2354 v->Interlace[k],
2355 v->ProgressiveToInterlaceUnitInOPP,
2356 v->SwathHeightC[k],
2357 v->ViewportYStartC[k],
2358 &v->VInitPreFillC[k],
2359 &v->MaxNumSwathC[k]);
2360 } else {
2361 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2362 v->PTEBufferSizeInRequestsForChroma = 0;
2363 PixelPTEBytesPerRowC = 0;
2364 PDEAndMetaPTEBytesFrameC = 0;
2365 MetaRowByteC = 0;
2366 v->MaxNumSwathC[k] = 0;
2367 v->PrefetchSourceLinesC[k] = 0;
2368 }
2369
2370 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2371 mode_lib,
2372 v->DCCEnable[k],
2373 v->BlockHeight256BytesY[k],
2374 v->BlockWidth256BytesY[k],
2375 v->SourcePixelFormat[k],
2376 v->SurfaceTiling[k],
2377 v->BytePerPixelY[k],
2378 v->SourceScan[k],
2379 v->SwathWidthY[k],
2380 v->ViewportHeight[k],
2381 v->GPUVMEnable,
2382 v->HostVMEnable,
2383 v->HostVMMaxNonCachedPageTableLevels,
2384 v->GPUVMMinPageSize,
2385 v->HostVMMinPageSize,
2386 v->PTEBufferSizeInRequestsForLuma,
2387 v->PitchY[k],
2388 v->DCCMetaPitchY[k],
2389 &v->MacroTileWidthY[k],
2390 &MetaRowByteY,
2391 &PixelPTEBytesPerRowY,
2392 &PTEBufferSizeNotExceededY,
2393 &v->dpte_row_width_luma_ub[k],
2394 &v->dpte_row_height[k],
2395 &v->meta_req_width[k],
2396 &v->meta_req_height[k],
2397 &v->meta_row_width[k],
2398 &v->meta_row_height[k],
2399 &v->vm_group_bytes[k],
2400 &v->dpte_group_bytes[k],
2401 &v->PixelPTEReqWidthY[k],
2402 &v->PixelPTEReqHeightY[k],
2403 &v->PTERequestSizeY[k],
2404 &v->dpde0_bytes_per_frame_ub_l[k],
2405 &v->meta_pte_bytes_per_frame_ub_l[k]);
2406
2407 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2408 mode_lib,
2409 v->VRatio[k],
2410 v->vtaps[k],
2411 v->Interlace[k],
2412 v->ProgressiveToInterlaceUnitInOPP,
2413 v->SwathHeightY[k],
2414 v->ViewportYStartY[k],
2415 &v->VInitPreFillY[k],
2416 &v->MaxNumSwathY[k]);
2417 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2418 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2419 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2420
2421 CalculateRowBandwidth(
2422 v->GPUVMEnable,
2423 v->SourcePixelFormat[k],
2424 v->VRatio[k],
2425 v->VRatioChroma[k],
2426 v->DCCEnable[k],
2427 v->HTotal[k] / v->PixelClock[k],
2428 MetaRowByteY,
2429 MetaRowByteC,
2430 v->meta_row_height[k],
2431 v->meta_row_height_chroma[k],
2432 PixelPTEBytesPerRowY,
2433 PixelPTEBytesPerRowC,
2434 v->dpte_row_height[k],
2435 v->dpte_row_height_chroma[k],
2436 &v->meta_row_bw[k],
2437 &v->dpte_row_bw[k]);
2438 }
2439
2440 v->TotalDCCActiveDPP = 0;
2441 v->TotalActiveDPP = 0;
2442 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2443 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
2444 if (v->DCCEnable[k])
2445 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
2446 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2447 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
2448 NoChromaPlanes = false;
2449 }
2450
2451 ReorderBytes = v->NumberOfChannels
2452 * dml_max3(
2453 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2454 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2455 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2456
2457 VMDataOnlyReturnBW = dml_min(
2458 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn)
2459 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2460 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
2461 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
2462
2463 #ifdef __DML_VBA_DEBUG__
2464 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
2465 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
2466 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
2467 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
2468 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
2469 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
2470 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
2471 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
2472 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
2473 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
2474 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
2475 #endif
2476
2477 if (v->GPUVMEnable && v->HostVMEnable)
2478 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
2479
2480 v->UrgentExtraLatency = CalculateExtraLatency(
2481 v->RoundTripPingLatencyCycles,
2482 ReorderBytes,
2483 v->DCFCLK,
2484 v->TotalActiveDPP,
2485 v->PixelChunkSizeInKByte,
2486 v->TotalDCCActiveDPP,
2487 v->MetaChunkSize,
2488 v->ReturnBW,
2489 v->GPUVMEnable,
2490 v->HostVMEnable,
2491 v->NumberOfActivePlanes,
2492 v->DPPPerPlane,
2493 v->dpte_group_bytes,
2494 HostVMInefficiencyFactor,
2495 v->HostVMMinPageSize,
2496 v->HostVMMaxNonCachedPageTableLevels);
2497
2498 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2499
2500 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2501 if (v->BlendingAndTiming[k] == k) {
2502 if (v->WritebackEnable[k] == true) {
2503 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
2504 + CalculateWriteBackDelay(
2505 v->WritebackPixelFormat[k],
2506 v->WritebackHRatio[k],
2507 v->WritebackVRatio[k],
2508 v->WritebackVTaps[k],
2509 v->WritebackDestinationWidth[k],
2510 v->WritebackDestinationHeight[k],
2511 v->WritebackSourceHeight[k],
2512 v->HTotal[k]) / v->DISPCLK;
2513 } else
2514 v->WritebackDelay[v->VoltageLevel][k] = 0;
2515 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2516 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
2517 v->WritebackDelay[v->VoltageLevel][k] = dml_max(
2518 v->WritebackDelay[v->VoltageLevel][k],
2519 v->WritebackLatency
2520 + CalculateWriteBackDelay(
2521 v->WritebackPixelFormat[j],
2522 v->WritebackHRatio[j],
2523 v->WritebackVRatio[j],
2524 v->WritebackVTaps[j],
2525 v->WritebackDestinationWidth[j],
2526 v->WritebackDestinationHeight[j],
2527 v->WritebackSourceHeight[j],
2528 v->HTotal[k]) / v->DISPCLK);
2529 }
2530 }
2531 }
2532 }
2533
2534 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2535 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2536 if (v->BlendingAndTiming[k] == j)
2537 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2538
2539 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2540 v->MaxVStartupLines[k] =
2541 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
2542 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
2543 v->VTotal[k] - v->VActive[k]
2544 - dml_max(
2545 1.0,
2546 dml_ceil(
2547 (double) v->WritebackDelay[v->VoltageLevel][k]
2548 / (v->HTotal[k] / v->PixelClock[k]),
2549 1));
2550 if (v->MaxVStartupLines[k] > 1023)
2551 v->MaxVStartupLines[k] = 1023;
2552
2553 #ifdef __DML_VBA_DEBUG__
2554 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
2555 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
2556 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
2557 #endif
2558 }
2559
2560 v->MaximumMaxVStartupLines = 0;
2561 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2562 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2563
2564 // VBA_DELTA
2565 // We don't really care to iterate between the various prefetch modes
2566 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode);
2567
2568 v->UrgentLatency = CalculateUrgentLatency(
2569 v->UrgentLatencyPixelDataOnly,
2570 v->UrgentLatencyPixelMixedWithVMData,
2571 v->UrgentLatencyVMDataOnly,
2572 v->DoUrgentLatencyAdjustment,
2573 v->UrgentLatencyAdjustmentFabricClockComponent,
2574 v->UrgentLatencyAdjustmentFabricClockReference,
2575 v->FabricClock);
2576
2577 v->FractionOfUrgentBandwidth = 0.0;
2578 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2579
2580 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
2581
2582 do {
2583 double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
2584 bool DestinationLineTimesForPrefetchLessThan2 = false;
2585 bool VRatioPrefetchMoreThan4 = false;
2586 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime);
2587 MaxTotalRDBandwidth = 0;
2588
2589 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
2590
2591 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2592 Pipe myPipe;
2593
2594 myPipe.DPPCLK = v->DPPCLK[k];
2595 myPipe.DISPCLK = v->DISPCLK;
2596 myPipe.PixelClock = v->PixelClock[k];
2597 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2598 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2599 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2600 myPipe.VRatio = v->VRatio[k];
2601 myPipe.VRatioChroma = v->VRatioChroma[k];
2602 myPipe.SourceScan = v->SourceScan[k];
2603 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2604 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2605 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2606 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2607 myPipe.InterlaceEnable = v->Interlace[k];
2608 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2609 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2610 myPipe.HTotal = v->HTotal[k];
2611 myPipe.DCCEnable = v->DCCEnable[k];
2612 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
2613 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
2614 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
2615 myPipe.BytePerPixelY = v->BytePerPixelY[k];
2616 myPipe.BytePerPixelC = v->BytePerPixelC[k];
2617 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
2618 v->ErrorResult[k] = CalculatePrefetchSchedule(
2619 mode_lib,
2620 HostVMInefficiencyFactor,
2621 &myPipe,
2622 v->DSCDelay[k],
2623 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
2624 v->DPPCLKDelaySCL,
2625 v->DPPCLKDelaySCLLBOnly,
2626 v->DPPCLKDelayCNVCCursor,
2627 v->DISPCLKDelaySubtotal,
2628 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2629 v->OutputFormat[k],
2630 v->MaxInterDCNTileRepeaters,
2631 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2632 v->MaxVStartupLines[k],
2633 v->GPUVMMaxPageTableLevels,
2634 v->GPUVMEnable,
2635 v->HostVMEnable,
2636 v->HostVMMaxNonCachedPageTableLevels,
2637 v->HostVMMinPageSize,
2638 v->DynamicMetadataEnable[k],
2639 v->DynamicMetadataVMEnabled,
2640 v->DynamicMetadataLinesBeforeActiveRequired[k],
2641 v->DynamicMetadataTransmittedBytes[k],
2642 v->UrgentLatency,
2643 v->UrgentExtraLatency,
2644 v->TCalc,
2645 v->PDEAndMetaPTEBytesFrame[k],
2646 v->MetaRowByte[k],
2647 v->PixelPTEBytesPerRow[k],
2648 v->PrefetchSourceLinesY[k],
2649 v->SwathWidthY[k],
2650 v->VInitPreFillY[k],
2651 v->MaxNumSwathY[k],
2652 v->PrefetchSourceLinesC[k],
2653 v->SwathWidthC[k],
2654 v->VInitPreFillC[k],
2655 v->MaxNumSwathC[k],
2656 v->swath_width_luma_ub[k],
2657 v->swath_width_chroma_ub[k],
2658 v->SwathHeightY[k],
2659 v->SwathHeightC[k],
2660 TWait,
2661 &v->DSTXAfterScaler[k],
2662 &v->DSTYAfterScaler[k],
2663 &v->DestinationLinesForPrefetch[k],
2664 &v->PrefetchBandwidth[k],
2665 &v->DestinationLinesToRequestVMInVBlank[k],
2666 &v->DestinationLinesToRequestRowInVBlank[k],
2667 &v->VRatioPrefetchY[k],
2668 &v->VRatioPrefetchC[k],
2669 &v->RequiredPrefetchPixDataBWLuma[k],
2670 &v->RequiredPrefetchPixDataBWChroma[k],
2671 &v->NotEnoughTimeForDynamicMetadata[k],
2672 &v->Tno_bw[k],
2673 &v->prefetch_vmrow_bw[k],
2674 &v->Tdmdl_vm[k],
2675 &v->Tdmdl[k],
2676 &v->TSetup[k],
2677 &v->VUpdateOffsetPix[k],
2678 &v->VUpdateWidthPix[k],
2679 &v->VReadyOffsetPix[k]);
2680
2681 #ifdef __DML_VBA_DEBUG__
2682 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
2683 #endif
2684 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2685 }
2686
2687 v->NoEnoughUrgentLatencyHiding = false;
2688 v->NoEnoughUrgentLatencyHidingPre = false;
2689
2690 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2691 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2692 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2693 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2694 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
2695
2696 CalculateUrgentBurstFactor(
2697 v->swath_width_luma_ub[k],
2698 v->swath_width_chroma_ub[k],
2699 v->SwathHeightY[k],
2700 v->SwathHeightC[k],
2701 v->HTotal[k] / v->PixelClock[k],
2702 v->UrgentLatency,
2703 v->CursorBufferSize,
2704 v->CursorWidth[k][0],
2705 v->CursorBPP[k][0],
2706 v->VRatio[k],
2707 v->VRatioChroma[k],
2708 v->BytePerPixelDETY[k],
2709 v->BytePerPixelDETC[k],
2710 v->DETBufferSizeY[k],
2711 v->DETBufferSizeC[k],
2712 &v->UrgBurstFactorCursor[k],
2713 &v->UrgBurstFactorLuma[k],
2714 &v->UrgBurstFactorChroma[k],
2715 &v->NoUrgentLatencyHiding[k]);
2716
2717 CalculateUrgentBurstFactor(
2718 v->swath_width_luma_ub[k],
2719 v->swath_width_chroma_ub[k],
2720 v->SwathHeightY[k],
2721 v->SwathHeightC[k],
2722 v->HTotal[k] / v->PixelClock[k],
2723 v->UrgentLatency,
2724 v->CursorBufferSize,
2725 v->CursorWidth[k][0],
2726 v->CursorBPP[k][0],
2727 v->VRatioPrefetchY[k],
2728 v->VRatioPrefetchC[k],
2729 v->BytePerPixelDETY[k],
2730 v->BytePerPixelDETC[k],
2731 v->DETBufferSizeY[k],
2732 v->DETBufferSizeC[k],
2733 &v->UrgBurstFactorCursorPre[k],
2734 &v->UrgBurstFactorLumaPre[k],
2735 &v->UrgBurstFactorChromaPre[k],
2736 &v->NoUrgentLatencyHidingPre[k]);
2737
2738 MaxTotalRDBandwidth = MaxTotalRDBandwidth
2739 + dml_max3(
2740 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2741 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2742 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2743 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
2744 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2745 v->DPPPerPlane[k]
2746 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2747 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2748 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2749
2750 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
2751 + dml_max3(
2752 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2753 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
2754 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2755 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
2756 + v->cursor_bw_pre[k]);
2757
2758 #ifdef __DML_VBA_DEBUG__
2759 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
2760 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
2761 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
2762 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
2763 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
2764
2765 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
2766 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
2767
2768 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
2769 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
2770 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
2771 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
2772 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
2773 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
2774 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
2775 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
2776 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
2777 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
2778 #endif
2779
2780 if (v->DestinationLinesForPrefetch[k] < 2)
2781 DestinationLineTimesForPrefetchLessThan2 = true;
2782
2783 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2784 VRatioPrefetchMoreThan4 = true;
2785
2786 if (v->NoUrgentLatencyHiding[k] == true)
2787 v->NoEnoughUrgentLatencyHiding = true;
2788
2789 if (v->NoUrgentLatencyHidingPre[k] == true)
2790 v->NoEnoughUrgentLatencyHidingPre = true;
2791 }
2792
2793 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2794
2795 #ifdef __DML_VBA_DEBUG__
2796 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
2797 dml_print("DML::%s: ReturnBW=%f \n", __func__, v->ReturnBW);
2798 dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, v->FractionOfUrgentBandwidth);
2799 #endif
2800
2801 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
2802 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
2803 v->PrefetchModeSupported = true;
2804 else {
2805 v->PrefetchModeSupported = false;
2806 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
2807 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
2808 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
2809 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2810 }
2811
2812 // PREVIOUS_ERROR
2813 // This error result check was done after the PrefetchModeSupported. So we will
2814 // still try to calculate flip schedule even prefetch mode not supported
2815 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2816 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
2817 v->PrefetchModeSupported = false;
2818 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
2819 }
2820 }
2821
2822 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2823 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2824 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2825 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
2826 - dml_max(
2827 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2828 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2829 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2830 v->DPPPerPlane[k]
2831 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2832 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2833 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2834 }
2835
2836 v->TotImmediateFlipBytes = 0;
2837 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2838 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
2839 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2840 }
2841 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2842 CalculateFlipSchedule(
2843 mode_lib,
2844 k,
2845 HostVMInefficiencyFactor,
2846 v->UrgentExtraLatency,
2847 v->UrgentLatency,
2848 v->PDEAndMetaPTEBytesFrame[k],
2849 v->MetaRowByte[k],
2850 v->PixelPTEBytesPerRow[k]);
2851 }
2852
2853 v->total_dcn_read_bw_with_flip = 0.0;
2854 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2855 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2856 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
2857 + dml_max3(
2858 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2859 v->DPPPerPlane[k] * v->final_flip_bw[k]
2860 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
2861 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
2862 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2863 v->DPPPerPlane[k]
2864 * (v->final_flip_bw[k]
2865 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2866 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2867 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2868 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
2869 + dml_max3(
2870 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2871 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
2872 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2873 v->DPPPerPlane[k]
2874 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
2875 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2876 }
2877 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2878
2879 v->ImmediateFlipSupported = true;
2880 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2881 #ifdef __DML_VBA_DEBUG__
2882 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
2883 #endif
2884 v->ImmediateFlipSupported = false;
2885 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2886 }
2887 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2888 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2889 #ifdef __DML_VBA_DEBUG__
2890 dml_print("DML::%s: Pipe %0d not supporting iflip\n",
2891 __func__, k);
2892 #endif
2893 v->ImmediateFlipSupported = false;
2894 }
2895 }
2896 } else {
2897 v->ImmediateFlipSupported = false;
2898 }
2899
2900 v->PrefetchAndImmediateFlipSupported =
2901 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable
2902 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2903 v->ImmediateFlipSupported)) ? true : false;
2904 #ifdef __DML_VBA_DEBUG__
2905 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
2906 dml_print("DML::%s: ImmediateFlipRequirement[0] %d\n", __func__, v->ImmediateFlipRequirement[0] == dm_immediate_flip_required);
2907 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
2908 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
2909 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
2910 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
2911 #endif
2912 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
2913
2914 v->VStartupLines = v->VStartupLines + 1;
2915 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2916 ASSERT(v->PrefetchAndImmediateFlipSupported);
2917
2918 // Unbounded Request Enabled
2919 CalculateUnboundedRequestAndCompressedBufferSize(
2920 v->DETBufferSizeInKByte[0],
2921 v->ConfigReturnBufferSizeInKByte,
2922 v->UseUnboundedRequesting,
2923 v->TotalActiveDPP,
2924 NoChromaPlanes,
2925 v->MaxNumDPP,
2926 v->CompressedBufferSegmentSizeInkByte,
2927 v->Output,
2928 &v->UnboundedRequestEnabled,
2929 &v->CompressedBufferSizeInkByte);
2930
2931 //Watermarks and NB P-State/DRAM Clock Change Support
2932 {
2933 enum clock_change_support DRAMClockChangeSupport; // dummy
2934 CalculateWatermarksAndDRAMSpeedChangeSupport(
2935 mode_lib,
2936 PrefetchMode,
2937 v->DCFCLK,
2938 v->ReturnBW,
2939 v->UrgentLatency,
2940 v->UrgentExtraLatency,
2941 v->SOCCLK,
2942 v->DCFCLKDeepSleep,
2943 v->DETBufferSizeY,
2944 v->DETBufferSizeC,
2945 v->SwathHeightY,
2946 v->SwathHeightC,
2947 v->SwathWidthY,
2948 v->SwathWidthC,
2949 v->DPPPerPlane,
2950 v->BytePerPixelDETY,
2951 v->BytePerPixelDETC,
2952 v->UnboundedRequestEnabled,
2953 v->CompressedBufferSizeInkByte,
2954 &DRAMClockChangeSupport,
2955 &v->StutterExitWatermark,
2956 &v->StutterEnterPlusExitWatermark,
2957 &v->Z8StutterExitWatermark,
2958 &v->Z8StutterEnterPlusExitWatermark);
2959
2960 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2961 if (v->WritebackEnable[k] == true) {
2962 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
2963 0,
2964 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
2965 } else {
2966 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
2967 }
2968 }
2969 }
2970
2971 //Display Pipeline Delivery Time in Prefetch, Groups
2972 CalculatePixelDeliveryTimes(
2973 v->NumberOfActivePlanes,
2974 v->VRatio,
2975 v->VRatioChroma,
2976 v->VRatioPrefetchY,
2977 v->VRatioPrefetchC,
2978 v->swath_width_luma_ub,
2979 v->swath_width_chroma_ub,
2980 v->DPPPerPlane,
2981 v->HRatio,
2982 v->HRatioChroma,
2983 v->PixelClock,
2984 v->PSCL_THROUGHPUT_LUMA,
2985 v->PSCL_THROUGHPUT_CHROMA,
2986 v->DPPCLK,
2987 v->BytePerPixelC,
2988 v->SourceScan,
2989 v->NumberOfCursors,
2990 v->CursorWidth,
2991 v->CursorBPP,
2992 v->BlockWidth256BytesY,
2993 v->BlockHeight256BytesY,
2994 v->BlockWidth256BytesC,
2995 v->BlockHeight256BytesC,
2996 v->DisplayPipeLineDeliveryTimeLuma,
2997 v->DisplayPipeLineDeliveryTimeChroma,
2998 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
2999 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
3000 v->DisplayPipeRequestDeliveryTimeLuma,
3001 v->DisplayPipeRequestDeliveryTimeChroma,
3002 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
3003 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
3004 v->CursorRequestDeliveryTime,
3005 v->CursorRequestDeliveryTimePrefetch);
3006
3007 CalculateMetaAndPTETimes(
3008 v->NumberOfActivePlanes,
3009 v->GPUVMEnable,
3010 v->MetaChunkSize,
3011 v->MinMetaChunkSizeBytes,
3012 v->HTotal,
3013 v->VRatio,
3014 v->VRatioChroma,
3015 v->DestinationLinesToRequestRowInVBlank,
3016 v->DestinationLinesToRequestRowInImmediateFlip,
3017 v->DCCEnable,
3018 v->PixelClock,
3019 v->BytePerPixelY,
3020 v->BytePerPixelC,
3021 v->SourceScan,
3022 v->dpte_row_height,
3023 v->dpte_row_height_chroma,
3024 v->meta_row_width,
3025 v->meta_row_width_chroma,
3026 v->meta_row_height,
3027 v->meta_row_height_chroma,
3028 v->meta_req_width,
3029 v->meta_req_width_chroma,
3030 v->meta_req_height,
3031 v->meta_req_height_chroma,
3032 v->dpte_group_bytes,
3033 v->PTERequestSizeY,
3034 v->PTERequestSizeC,
3035 v->PixelPTEReqWidthY,
3036 v->PixelPTEReqHeightY,
3037 v->PixelPTEReqWidthC,
3038 v->PixelPTEReqHeightC,
3039 v->dpte_row_width_luma_ub,
3040 v->dpte_row_width_chroma_ub,
3041 v->DST_Y_PER_PTE_ROW_NOM_L,
3042 v->DST_Y_PER_PTE_ROW_NOM_C,
3043 v->DST_Y_PER_META_ROW_NOM_L,
3044 v->DST_Y_PER_META_ROW_NOM_C,
3045 v->TimePerMetaChunkNominal,
3046 v->TimePerChromaMetaChunkNominal,
3047 v->TimePerMetaChunkVBlank,
3048 v->TimePerChromaMetaChunkVBlank,
3049 v->TimePerMetaChunkFlip,
3050 v->TimePerChromaMetaChunkFlip,
3051 v->time_per_pte_group_nom_luma,
3052 v->time_per_pte_group_vblank_luma,
3053 v->time_per_pte_group_flip_luma,
3054 v->time_per_pte_group_nom_chroma,
3055 v->time_per_pte_group_vblank_chroma,
3056 v->time_per_pte_group_flip_chroma);
3057
3058 CalculateVMGroupAndRequestTimes(
3059 v->NumberOfActivePlanes,
3060 v->GPUVMEnable,
3061 v->GPUVMMaxPageTableLevels,
3062 v->HTotal,
3063 v->BytePerPixelC,
3064 v->DestinationLinesToRequestVMInVBlank,
3065 v->DestinationLinesToRequestVMInImmediateFlip,
3066 v->DCCEnable,
3067 v->PixelClock,
3068 v->dpte_row_width_luma_ub,
3069 v->dpte_row_width_chroma_ub,
3070 v->vm_group_bytes,
3071 v->dpde0_bytes_per_frame_ub_l,
3072 v->dpde0_bytes_per_frame_ub_c,
3073 v->meta_pte_bytes_per_frame_ub_l,
3074 v->meta_pte_bytes_per_frame_ub_c,
3075 v->TimePerVMGroupVBlank,
3076 v->TimePerVMGroupFlip,
3077 v->TimePerVMRequestVBlank,
3078 v->TimePerVMRequestFlip);
3079
3080 // Min TTUVBlank
3081 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3082 if (PrefetchMode == 0) {
3083 v->AllowDRAMClockChangeDuringVBlank[k] = true;
3084 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3085 v->MinTTUVBlank[k] = dml_max(
3086 v->DRAMClockChangeWatermark,
3087 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark));
3088 } else if (PrefetchMode == 1) {
3089 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3090 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3091 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark);
3092 } else {
3093 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3094 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3095 v->MinTTUVBlank[k] = v->UrgentWatermark;
3096 }
3097 if (!v->DynamicMetadataEnable[k])
3098 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
3099 }
3100
3101 // DCC Configuration
3102 v->ActiveDPPs = 0;
3103 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3104 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3105 v->SourcePixelFormat[k],
3106 v->SurfaceWidthY[k],
3107 v->SurfaceWidthC[k],
3108 v->SurfaceHeightY[k],
3109 v->SurfaceHeightC[k],
3110 v->DETBufferSizeInKByte[k] * 1024,
3111 v->BlockHeight256BytesY[k],
3112 v->BlockHeight256BytesC[k],
3113 v->SurfaceTiling[k],
3114 v->BytePerPixelY[k],
3115 v->BytePerPixelC[k],
3116 v->BytePerPixelDETY[k],
3117 v->BytePerPixelDETC[k],
3118 v->SourceScan[k],
3119 &v->DCCYMaxUncompressedBlock[k],
3120 &v->DCCCMaxUncompressedBlock[k],
3121 &v->DCCYMaxCompressedBlock[k],
3122 &v->DCCCMaxCompressedBlock[k],
3123 &v->DCCYIndependentBlock[k],
3124 &v->DCCCIndependentBlock[k]);
3125 }
3126
3127 // VStartup Adjustment
3128 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3129 bool isInterlaceTiming;
3130 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
3131 #ifdef __DML_VBA_DEBUG__
3132 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
3133 #endif
3134
3135 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
3136
3137 #ifdef __DML_VBA_DEBUG__
3138 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
3139 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
3140 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3141 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
3142 #endif
3143
3144 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
3145 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
3146 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
3147 }
3148
3149 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
3150
3151 v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((v->VTotal[k] - v->VFrontPorch[k]) / 2.0, 1.0) : v->VTotal[k])
3152 - v->VFrontPorch[k])
3153 + dml_max(1.0, dml_ceil(v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1.0))
3154 + dml_floor(4.0 * v->TSetup[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0;
3155
3156 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
3157
3158 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
3159 <= (isInterlaceTiming ?
3160 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
3161 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
3162 v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
3163 } else {
3164 v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
3165 }
3166 #ifdef __DML_VBA_DEBUG__
3167 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
3168 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
3169 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
3170 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
3171 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
3172 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
3173 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
3174 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
3175 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3176 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
3177 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
3178 #endif
3179 }
3180
3181 {
3182 //Maximum Bandwidth Used
3183 double TotalWRBandwidth = 0;
3184 double MaxPerPlaneVActiveWRBandwidth = 0;
3185 double WRBandwidth = 0;
3186 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3187 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
3188 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3189 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3190 } else if (v->WritebackEnable[k] == true) {
3191 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3192 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3193 }
3194 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3195 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
3196 }
3197
3198 v->TotalDataReadBandwidth = 0;
3199 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3200 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
3201 }
3202 }
3203 // Stutter Efficiency
3204 CalculateStutterEfficiency(
3205 mode_lib,
3206 v->CompressedBufferSizeInkByte,
3207 v->UnboundedRequestEnabled,
3208 v->ConfigReturnBufferSizeInKByte,
3209 v->MetaFIFOSizeInKEntries,
3210 v->ZeroSizeBufferEntries,
3211 v->NumberOfActivePlanes,
3212 v->ROBBufferSizeInKByte,
3213 v->TotalDataReadBandwidth,
3214 v->DCFCLK,
3215 v->ReturnBW,
3216 v->COMPBUF_RESERVED_SPACE_64B,
3217 v->COMPBUF_RESERVED_SPACE_ZS,
3218 v->SRExitTime,
3219 v->SRExitZ8Time,
3220 v->SynchronizedVBlank,
3221 v->StutterEnterPlusExitWatermark,
3222 v->Z8StutterEnterPlusExitWatermark,
3223 v->ProgressiveToInterlaceUnitInOPP,
3224 v->Interlace,
3225 v->MinTTUVBlank,
3226 v->DPPPerPlane,
3227 v->DETBufferSizeY,
3228 v->BytePerPixelY,
3229 v->BytePerPixelDETY,
3230 v->SwathWidthY,
3231 v->SwathHeightY,
3232 v->SwathHeightC,
3233 v->DCCRateLuma,
3234 v->DCCRateChroma,
3235 v->DCCFractionOfZeroSizeRequestsLuma,
3236 v->DCCFractionOfZeroSizeRequestsChroma,
3237 v->HTotal,
3238 v->VTotal,
3239 v->PixelClock,
3240 v->VRatio,
3241 v->SourceScan,
3242 v->BlockHeight256BytesY,
3243 v->BlockWidth256BytesY,
3244 v->BlockHeight256BytesC,
3245 v->BlockWidth256BytesC,
3246 v->DCCYMaxUncompressedBlock,
3247 v->DCCCMaxUncompressedBlock,
3248 v->VActive,
3249 v->DCCEnable,
3250 v->WritebackEnable,
3251 v->ReadBandwidthPlaneLuma,
3252 v->ReadBandwidthPlaneChroma,
3253 v->meta_row_bw,
3254 v->dpte_row_bw,
3255 &v->StutterEfficiencyNotIncludingVBlank,
3256 &v->StutterEfficiency,
3257 &v->NumberOfStutterBurstsPerFrame,
3258 &v->Z8StutterEfficiencyNotIncludingVBlank,
3259 &v->Z8StutterEfficiency,
3260 &v->Z8NumberOfStutterBurstsPerFrame,
3261 &v->StutterPeriod);
3262 }
3263
3264 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3265 {
3266 struct vba_vars_st *v = &mode_lib->vba;
3267 // Display Pipe Configuration
3268 double BytePerPixDETY[DC__NUM_DPP__MAX];
3269 double BytePerPixDETC[DC__NUM_DPP__MAX];
3270 int BytePerPixY[DC__NUM_DPP__MAX];
3271 int BytePerPixC[DC__NUM_DPP__MAX];
3272 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
3273 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
3274 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
3275 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
3276 double dummy1[DC__NUM_DPP__MAX];
3277 double dummy2[DC__NUM_DPP__MAX];
3278 double dummy3[DC__NUM_DPP__MAX];
3279 double dummy4[DC__NUM_DPP__MAX];
3280 int dummy5[DC__NUM_DPP__MAX];
3281 int dummy6[DC__NUM_DPP__MAX];
3282 bool dummy7[DC__NUM_DPP__MAX];
3283 bool dummysinglestring;
3284
3285 unsigned int k;
3286
3287 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3288
3289 dml30_CalculateBytePerPixelAnd256BBlockSizes(
3290 v->SourcePixelFormat[k],
3291 v->SurfaceTiling[k],
3292 &BytePerPixY[k],
3293 &BytePerPixC[k],
3294 &BytePerPixDETY[k],
3295 &BytePerPixDETC[k],
3296 &Read256BytesBlockHeightY[k],
3297 &Read256BytesBlockHeightC[k],
3298 &Read256BytesBlockWidthY[k],
3299 &Read256BytesBlockWidthC[k]);
3300 }
3301
3302 CalculateSwathAndDETConfiguration(
3303 false,
3304 v->NumberOfActivePlanes,
3305 mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0],
3306 v->DETBufferSizeInKByte,
3307 dummy1,
3308 dummy2,
3309 v->SourceScan,
3310 v->SourcePixelFormat,
3311 v->SurfaceTiling,
3312 v->ViewportWidth,
3313 v->ViewportHeight,
3314 v->SurfaceWidthY,
3315 v->SurfaceWidthC,
3316 v->SurfaceHeightY,
3317 v->SurfaceHeightC,
3318 Read256BytesBlockHeightY,
3319 Read256BytesBlockHeightC,
3320 Read256BytesBlockWidthY,
3321 Read256BytesBlockWidthC,
3322 v->ODMCombineEnabled,
3323 v->BlendingAndTiming,
3324 BytePerPixY,
3325 BytePerPixC,
3326 BytePerPixDETY,
3327 BytePerPixDETC,
3328 v->HActive,
3329 v->HRatio,
3330 v->HRatioChroma,
3331 v->DPPPerPlane,
3332 dummy5,
3333 dummy6,
3334 dummy3,
3335 dummy4,
3336 v->SwathHeightY,
3337 v->SwathHeightC,
3338 v->DETBufferSizeY,
3339 v->DETBufferSizeC,
3340 dummy7,
3341 &dummysinglestring);
3342 }
3343
3344 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
3345 {
3346 if (PrefetchMode == 0) {
3347 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency));
3348 } else if (PrefetchMode == 1) {
3349 return dml_max(SREnterPlusExitTime, UrgentLatency);
3350 } else {
3351 return UrgentLatency;
3352 }
3353 }
3354
3355 double dml31_CalculateWriteBackDISPCLK(
3356 enum source_format_class WritebackPixelFormat,
3357 double PixelClock,
3358 double WritebackHRatio,
3359 double WritebackVRatio,
3360 unsigned int WritebackHTaps,
3361 unsigned int WritebackVTaps,
3362 long WritebackSourceWidth,
3363 long WritebackDestinationWidth,
3364 unsigned int HTotal,
3365 unsigned int WritebackLineBufferSize)
3366 {
3367 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
3368
3369 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3370 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3371 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3372 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3373 }
3374
3375 static double CalculateWriteBackDelay(
3376 enum source_format_class WritebackPixelFormat,
3377 double WritebackHRatio,
3378 double WritebackVRatio,
3379 unsigned int WritebackVTaps,
3380 int WritebackDestinationWidth,
3381 int WritebackDestinationHeight,
3382 int WritebackSourceHeight,
3383 unsigned int HTotal)
3384 {
3385 double CalculateWriteBackDelay;
3386 double Line_length;
3387 double Output_lines_last_notclamped;
3388 double WritebackVInit;
3389
3390 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3391 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3392 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3393 if (Output_lines_last_notclamped < 0) {
3394 CalculateWriteBackDelay = 0;
3395 } else {
3396 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3397 }
3398 return CalculateWriteBackDelay;
3399 }
3400
3401 static void CalculateVupdateAndDynamicMetadataParameters(
3402 int MaxInterDCNTileRepeaters,
3403 double DPPCLK,
3404 double DISPCLK,
3405 double DCFClkDeepSleep,
3406 double PixelClock,
3407 int HTotal,
3408 int VBlank,
3409 int DynamicMetadataTransmittedBytes,
3410 int DynamicMetadataLinesBeforeActiveRequired,
3411 int InterlaceEnable,
3412 bool ProgressiveToInterlaceUnitInOPP,
3413 double *TSetup,
3414 double *Tdmbf,
3415 double *Tdmec,
3416 double *Tdmsks,
3417 int *VUpdateOffsetPix,
3418 double *VUpdateWidthPix,
3419 double *VReadyOffsetPix)
3420 {
3421 double TotalRepeaterDelayTime;
3422
3423 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3424 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0);
3425 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0);
3426 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3427 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3428 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3429 *Tdmec = HTotal / PixelClock;
3430 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3431 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3432 } else {
3433 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3434 }
3435 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3436 *Tdmsks = *Tdmsks / 2;
3437 }
3438 #ifdef __DML_VBA_DEBUG__
3439 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3440 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3441 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3442 #endif
3443 }
3444
3445 static void CalculateRowBandwidth(
3446 bool GPUVMEnable,
3447 enum source_format_class SourcePixelFormat,
3448 double VRatio,
3449 double VRatioChroma,
3450 bool DCCEnable,
3451 double LineTime,
3452 unsigned int MetaRowByteLuma,
3453 unsigned int MetaRowByteChroma,
3454 unsigned int meta_row_height_luma,
3455 unsigned int meta_row_height_chroma,
3456 unsigned int PixelPTEBytesPerRowLuma,
3457 unsigned int PixelPTEBytesPerRowChroma,
3458 unsigned int dpte_row_height_luma,
3459 unsigned int dpte_row_height_chroma,
3460 double *meta_row_bw,
3461 double *dpte_row_bw)
3462 {
3463 if (DCCEnable != true) {
3464 *meta_row_bw = 0;
3465 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3466 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
3467 } else {
3468 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3469 }
3470
3471 if (GPUVMEnable != true) {
3472 *dpte_row_bw = 0;
3473 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3474 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3475 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
3476 } else {
3477 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3478 }
3479 }
3480
3481 static void CalculateFlipSchedule(
3482 struct display_mode_lib *mode_lib,
3483 unsigned int k,
3484 double HostVMInefficiencyFactor,
3485 double UrgentExtraLatency,
3486 double UrgentLatency,
3487 double PDEAndMetaPTEBytesPerFrame,
3488 double MetaRowBytes,
3489 double DPTEBytesPerRow)
3490 {
3491 struct vba_vars_st *v = &mode_lib->vba;
3492 double min_row_time = 0.0;
3493 unsigned int HostVMDynamicLevelsTrips;
3494 double TimeForFetchingMetaPTEImmediateFlip;
3495 double TimeForFetchingRowInVBlankImmediateFlip;
3496 double ImmediateFlipBW = 1.0;
3497 double LineTime = v->HTotal[k] / v->PixelClock[k];
3498
3499 if (v->GPUVMEnable == true && v->HostVMEnable == true) {
3500 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3501 } else {
3502 HostVMDynamicLevelsTrips = 0;
3503 }
3504
3505 if (v->GPUVMEnable == true || v->DCCEnable[k] == true) {
3506 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes;
3507 }
3508
3509 if (v->GPUVMEnable == true) {
3510 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3511 v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3512 UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
3513 LineTime / 4.0);
3514 } else {
3515 TimeForFetchingMetaPTEImmediateFlip = 0;
3516 }
3517
3518 v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3519 if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3520 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
3521 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3522 UrgentLatency * (HostVMDynamicLevelsTrips + 1),
3523 LineTime / 4);
3524 } else {
3525 TimeForFetchingRowInVBlankImmediateFlip = 0;
3526 }
3527
3528 v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3529
3530 if (v->GPUVMEnable == true) {
3531 v->final_flip_bw[k] = dml_max(
3532 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime),
3533 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime));
3534 } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3535 v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime);
3536 } else {
3537 v->final_flip_bw[k] = 0;
3538 }
3539
3540 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
3541 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3542 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3543 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3544 min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3545 } else {
3546 min_row_time = dml_min4(
3547 v->dpte_row_height[k] * LineTime / v->VRatio[k],
3548 v->meta_row_height[k] * LineTime / v->VRatio[k],
3549 v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k],
3550 v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3551 }
3552 } else {
3553 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3554 min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k];
3555 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3556 min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k];
3557 } else {
3558 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]);
3559 }
3560 }
3561
3562 if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16
3563 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3564 v->ImmediateFlipSupportedForPipe[k] = false;
3565 } else {
3566 v->ImmediateFlipSupportedForPipe[k] = true;
3567 }
3568
3569 #ifdef __DML_VBA_DEBUG__
3570 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]);
3571 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]);
3572 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
3573 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
3574 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
3575 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]);
3576 #endif
3577
3578 }
3579
3580 static double TruncToValidBPP(
3581 double LinkBitRate,
3582 int Lanes,
3583 int HTotal,
3584 int HActive,
3585 double PixelClock,
3586 double DesiredBPP,
3587 bool DSCEnable,
3588 enum output_encoder_class Output,
3589 enum output_format_class Format,
3590 unsigned int DSCInputBitPerComponent,
3591 int DSCSlices,
3592 int AudioRate,
3593 int AudioLayout,
3594 enum odm_combine_mode ODMCombine)
3595 {
3596 double MaxLinkBPP;
3597 int MinDSCBPP;
3598 double MaxDSCBPP;
3599 int NonDSCBPP0;
3600 int NonDSCBPP1;
3601 int NonDSCBPP2;
3602
3603 if (Format == dm_420) {
3604 NonDSCBPP0 = 12;
3605 NonDSCBPP1 = 15;
3606 NonDSCBPP2 = 18;
3607 MinDSCBPP = 6;
3608 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
3609 } else if (Format == dm_444) {
3610 NonDSCBPP0 = 24;
3611 NonDSCBPP1 = 30;
3612 NonDSCBPP2 = 36;
3613 MinDSCBPP = 8;
3614 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3615 } else {
3616
3617 NonDSCBPP0 = 16;
3618 NonDSCBPP1 = 20;
3619 NonDSCBPP2 = 24;
3620
3621 if (Format == dm_n422) {
3622 MinDSCBPP = 7;
3623 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3624 } else {
3625 MinDSCBPP = 8;
3626 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3627 }
3628 }
3629
3630 if (DSCEnable && Output == dm_dp) {
3631 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3632 } else {
3633 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3634 }
3635
3636 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3637 MaxLinkBPP = 16;
3638 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3639 MaxLinkBPP = 32;
3640 }
3641
3642 if (DesiredBPP == 0) {
3643 if (DSCEnable) {
3644 if (MaxLinkBPP < MinDSCBPP) {
3645 return BPP_INVALID;
3646 } else if (MaxLinkBPP >= MaxDSCBPP) {
3647 return MaxDSCBPP;
3648 } else {
3649 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3650 }
3651 } else {
3652 if (MaxLinkBPP >= NonDSCBPP2) {
3653 return NonDSCBPP2;
3654 } else if (MaxLinkBPP >= NonDSCBPP1) {
3655 return NonDSCBPP1;
3656 } else if (MaxLinkBPP >= NonDSCBPP0) {
3657 return 16.0;
3658 } else {
3659 return BPP_INVALID;
3660 }
3661 }
3662 } else {
3663 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
3664 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3665 return BPP_INVALID;
3666 } else {
3667 return DesiredBPP;
3668 }
3669 }
3670 }
3671
3672 static noinline void CalculatePrefetchSchedulePerPlane(
3673 struct display_mode_lib *mode_lib,
3674 double HostVMInefficiencyFactor,
3675 int i,
3676 unsigned j,
3677 unsigned k)
3678 {
3679 struct vba_vars_st *v = &mode_lib->vba;
3680 Pipe myPipe;
3681
3682 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
3683 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
3684 myPipe.PixelClock = v->PixelClock[k];
3685 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
3686 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
3687 myPipe.ScalerEnabled = v->ScalerEnabled[k];
3688 myPipe.VRatio = mode_lib->vba.VRatio[k];
3689 myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k];
3690
3691 myPipe.SourceScan = v->SourceScan[k];
3692 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
3693 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
3694 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
3695 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
3696 myPipe.InterlaceEnable = v->Interlace[k];
3697 myPipe.NumberOfCursors = v->NumberOfCursors[k];
3698 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
3699 myPipe.HTotal = v->HTotal[k];
3700 myPipe.DCCEnable = v->DCCEnable[k];
3701 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
3702 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
3703 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
3704 myPipe.BytePerPixelY = v->BytePerPixelY[k];
3705 myPipe.BytePerPixelC = v->BytePerPixelC[k];
3706 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
3707 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
3708 mode_lib,
3709 HostVMInefficiencyFactor,
3710 &myPipe,
3711 v->DSCDelayPerState[i][k],
3712 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
3713 v->DPPCLKDelaySCL,
3714 v->DPPCLKDelaySCLLBOnly,
3715 v->DPPCLKDelayCNVCCursor,
3716 v->DISPCLKDelaySubtotal,
3717 v->SwathWidthYThisState[k] / v->HRatio[k],
3718 v->OutputFormat[k],
3719 v->MaxInterDCNTileRepeaters,
3720 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
3721 v->MaximumVStartup[i][j][k],
3722 v->GPUVMMaxPageTableLevels,
3723 v->GPUVMEnable,
3724 v->HostVMEnable,
3725 v->HostVMMaxNonCachedPageTableLevels,
3726 v->HostVMMinPageSize,
3727 v->DynamicMetadataEnable[k],
3728 v->DynamicMetadataVMEnabled,
3729 v->DynamicMetadataLinesBeforeActiveRequired[k],
3730 v->DynamicMetadataTransmittedBytes[k],
3731 v->UrgLatency[i],
3732 v->ExtraLatency,
3733 v->TimeCalc,
3734 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
3735 v->MetaRowBytes[i][j][k],
3736 v->DPTEBytesPerRow[i][j][k],
3737 v->PrefetchLinesY[i][j][k],
3738 v->SwathWidthYThisState[k],
3739 v->PrefillY[k],
3740 v->MaxNumSwY[k],
3741 v->PrefetchLinesC[i][j][k],
3742 v->SwathWidthCThisState[k],
3743 v->PrefillC[k],
3744 v->MaxNumSwC[k],
3745 v->swath_width_luma_ub_this_state[k],
3746 v->swath_width_chroma_ub_this_state[k],
3747 v->SwathHeightYThisState[k],
3748 v->SwathHeightCThisState[k],
3749 v->TWait,
3750 &v->DSTXAfterScaler[k],
3751 &v->DSTYAfterScaler[k],
3752 &v->LineTimesForPrefetch[k],
3753 &v->PrefetchBW[k],
3754 &v->LinesForMetaPTE[k],
3755 &v->LinesForMetaAndDPTERow[k],
3756 &v->VRatioPreY[i][j][k],
3757 &v->VRatioPreC[i][j][k],
3758 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
3759 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
3760 &v->NoTimeForDynamicMetadata[i][j][k],
3761 &v->Tno_bw[k],
3762 &v->prefetch_vmrow_bw[k],
3763 &v->dummy7[k],
3764 &v->dummy8[k],
3765 &v->dummy13[k],
3766 &v->VUpdateOffsetPix[k],
3767 &v->VUpdateWidthPix[k],
3768 &v->VReadyOffsetPix[k]);
3769 }
3770
3771 static void PatchDETBufferSizeInKByte(unsigned int NumberOfActivePlanes, int NoOfDPPThisState[], unsigned int config_return_buffer_size_in_kbytes, unsigned int DETBufferSizeInKByte[])
3772 {
3773 int i, total_pipes = 0;
3774 for (i = 0; i < NumberOfActivePlanes; i++)
3775 total_pipes += NoOfDPPThisState[i];
3776 DETBufferSizeInKByte[0] = ((config_return_buffer_size_in_kbytes - DCN3_15_MIN_COMPBUF_SIZE_KB) / 64 / total_pipes) * 64;
3777 if (DETBufferSizeInKByte[0] > DCN3_15_MAX_DET_SIZE)
3778 DETBufferSizeInKByte[0] = DCN3_15_MAX_DET_SIZE;
3779 for (i = 1; i < NumberOfActivePlanes; i++)
3780 DETBufferSizeInKByte[i] = DETBufferSizeInKByte[0];
3781 }
3782
3783
3784 void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3785 {
3786 struct vba_vars_st *v = &mode_lib->vba;
3787
3788 int i, j;
3789 unsigned int k, m;
3790 int ReorderingBytes;
3791 int MinPrefetchMode = 0, MaxPrefetchMode = 2;
3792 bool NoChroma = true;
3793 bool EnoughWritebackUnits = true;
3794 bool P2IWith420 = false;
3795 bool DSCOnlyIfNecessaryWithBPP = false;
3796 bool DSC422NativeNotSupported = false;
3797 double MaxTotalVActiveRDBandwidth;
3798 bool ViewportExceedsSurface = false;
3799 bool FMTBufferExceeded = false;
3800
3801 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3802
3803 CalculateMinAndMaxPrefetchMode(
3804 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3805 &MinPrefetchMode, &MaxPrefetchMode);
3806
3807 /*Scale Ratio, taps Support Check*/
3808
3809 v->ScaleRatioAndTapsSupport = true;
3810 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3811 if (v->ScalerEnabled[k] == false
3812 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3813 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3814 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3815 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
3816 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
3817 v->ScaleRatioAndTapsSupport = false;
3818 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3819 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
3820 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
3821 || v->VRatio[k] > v->vtaps[k]
3822 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3823 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3824 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3825 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
3826 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3827 || v->HRatioChroma[k] > v->MaxHSCLRatio
3828 || v->VRatioChroma[k] > v->MaxVSCLRatio
3829 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3830 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3831 v->ScaleRatioAndTapsSupport = false;
3832 }
3833 }
3834 /*Source Format, Pixel Format and Scan Support Check*/
3835
3836 v->SourceFormatPixelAndScanSupport = true;
3837 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3838 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
3839 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t
3840 || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) {
3841 v->SourceFormatPixelAndScanSupport = false;
3842 }
3843 }
3844 /*Bandwidth Support Check*/
3845
3846 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3847 dml30_CalculateBytePerPixelAnd256BBlockSizes(
3848 v->SourcePixelFormat[k],
3849 v->SurfaceTiling[k],
3850 &v->BytePerPixelY[k],
3851 &v->BytePerPixelC[k],
3852 &v->BytePerPixelInDETY[k],
3853 &v->BytePerPixelInDETC[k],
3854 &v->Read256BlockHeightY[k],
3855 &v->Read256BlockHeightC[k],
3856 &v->Read256BlockWidthY[k],
3857 &v->Read256BlockWidthC[k]);
3858 }
3859 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3860 if (v->SourceScan[k] != dm_vert) {
3861 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3862 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3863 } else {
3864 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3865 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3866 }
3867 }
3868 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3869 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
3870 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3871 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
3872 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3873 }
3874 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3875 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
3876 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3877 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
3878 } else if (v->WritebackEnable[k] == true) {
3879 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3880 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
3881 } else {
3882 v->WriteBandwidth[k] = 0.0;
3883 }
3884 }
3885
3886 /*Writeback Latency support check*/
3887
3888 v->WritebackLatencySupport = true;
3889 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3890 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
3891 v->WritebackLatencySupport = false;
3892 }
3893 }
3894
3895 /*Writeback Mode Support Check*/
3896
3897 v->TotalNumberOfActiveWriteback = 0;
3898 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3899 if (v->WritebackEnable[k] == true) {
3900 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
3901 }
3902 }
3903
3904 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
3905 EnoughWritebackUnits = false;
3906 }
3907
3908 /*Writeback Scale Ratio and Taps Support Check*/
3909
3910 v->WritebackScaleRatioAndTapsSupport = true;
3911 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3912 if (v->WritebackEnable[k] == true) {
3913 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
3914 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
3915 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
3916 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
3917 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
3918 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
3919 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
3920 v->WritebackScaleRatioAndTapsSupport = false;
3921 }
3922 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
3923 v->WritebackScaleRatioAndTapsSupport = false;
3924 }
3925 }
3926 }
3927 /*Maximum DISPCLK/DPPCLK Support check*/
3928
3929 v->WritebackRequiredDISPCLK = 0.0;
3930 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3931 if (v->WritebackEnable[k] == true) {
3932 v->WritebackRequiredDISPCLK = dml_max(
3933 v->WritebackRequiredDISPCLK,
3934 dml31_CalculateWriteBackDISPCLK(
3935 v->WritebackPixelFormat[k],
3936 v->PixelClock[k],
3937 v->WritebackHRatio[k],
3938 v->WritebackVRatio[k],
3939 v->WritebackHTaps[k],
3940 v->WritebackVTaps[k],
3941 v->WritebackSourceWidth[k],
3942 v->WritebackDestinationWidth[k],
3943 v->HTotal[k],
3944 v->WritebackLineBufferSize));
3945 }
3946 }
3947 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3948 if (v->HRatio[k] > 1.0) {
3949 v->PSCL_FACTOR[k] = dml_min(
3950 v->MaxDCHUBToPSCLThroughput,
3951 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
3952 } else {
3953 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3954 }
3955 if (v->BytePerPixelC[k] == 0.0) {
3956 v->PSCL_FACTOR_CHROMA[k] = 0.0;
3957 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
3958 * dml_max3(
3959 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
3960 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
3961 1.0);
3962 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3963 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3964 }
3965 } else {
3966 if (v->HRatioChroma[k] > 1.0) {
3967 v->PSCL_FACTOR_CHROMA[k] = dml_min(
3968 v->MaxDCHUBToPSCLThroughput,
3969 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
3970 } else {
3971 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3972 }
3973 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
3974 * dml_max5(
3975 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
3976 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
3977 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
3978 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
3979 1.0);
3980 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
3981 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3982 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3983 }
3984 }
3985 }
3986 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3987 int MaximumSwathWidthSupportLuma;
3988 int MaximumSwathWidthSupportChroma;
3989
3990 if (v->SurfaceTiling[k] == dm_sw_linear) {
3991 MaximumSwathWidthSupportLuma = 8192.0;
3992 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
3993 MaximumSwathWidthSupportLuma = 2880.0;
3994 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
3995 MaximumSwathWidthSupportLuma = 3840.0;
3996 } else {
3997 MaximumSwathWidthSupportLuma = 5760.0;
3998 }
3999
4000 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
4001 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
4002 } else {
4003 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
4004 }
4005 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
4006 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
4007 if (v->BytePerPixelC[k] == 0.0) {
4008 v->MaximumSwathWidthInLineBufferChroma = 0;
4009 } else {
4010 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
4011 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
4012 }
4013 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
4014 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
4015 }
4016
4017 CalculateSwathAndDETConfiguration(
4018 true,
4019 v->NumberOfActivePlanes,
4020 mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0],
4021 v->DETBufferSizeInKByte,
4022 v->MaximumSwathWidthLuma,
4023 v->MaximumSwathWidthChroma,
4024 v->SourceScan,
4025 v->SourcePixelFormat,
4026 v->SurfaceTiling,
4027 v->ViewportWidth,
4028 v->ViewportHeight,
4029 v->SurfaceWidthY,
4030 v->SurfaceWidthC,
4031 v->SurfaceHeightY,
4032 v->SurfaceHeightC,
4033 v->Read256BlockHeightY,
4034 v->Read256BlockHeightC,
4035 v->Read256BlockWidthY,
4036 v->Read256BlockWidthC,
4037 v->odm_combine_dummy,
4038 v->BlendingAndTiming,
4039 v->BytePerPixelY,
4040 v->BytePerPixelC,
4041 v->BytePerPixelInDETY,
4042 v->BytePerPixelInDETC,
4043 v->HActive,
4044 v->HRatio,
4045 v->HRatioChroma,
4046 v->NoOfDPPThisState,
4047 v->swath_width_luma_ub_this_state,
4048 v->swath_width_chroma_ub_this_state,
4049 v->SwathWidthYThisState,
4050 v->SwathWidthCThisState,
4051 v->SwathHeightYThisState,
4052 v->SwathHeightCThisState,
4053 v->DETBufferSizeYThisState,
4054 v->DETBufferSizeCThisState,
4055 v->SingleDPPViewportSizeSupportPerPlane,
4056 &v->ViewportSizeSupport[0][0]);
4057
4058 for (i = 0; i < v->soc.num_states; i++) {
4059 for (j = 0; j < 2; j++) {
4060 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
4061 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
4062 v->RequiredDISPCLK[i][j] = 0.0;
4063 v->DISPCLK_DPPCLK_Support[i][j] = true;
4064 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4065 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4066 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4067 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
4068 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4069 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4070 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
4071 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4072 }
4073 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4074 * (1 + v->DISPCLKRampingMargin / 100.0);
4075 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
4076 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4077 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4078 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
4079 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4080 }
4081 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4082 * (1 + v->DISPCLKRampingMargin / 100.0);
4083 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
4084 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4085 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4086 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
4087 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4088 }
4089
4090 if (v->ODMCombinePolicy == dm_odm_combine_policy_none
4091 || !(v->Output[k] == dm_dp ||
4092 v->Output[k] == dm_dp2p0 ||
4093 v->Output[k] == dm_edp)) {
4094 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4095 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4096
4097 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4098 FMTBufferExceeded = true;
4099 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
4100 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4101 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4102 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
4103 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
4104 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4105 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4106 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
4107 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4108 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4109 } else {
4110 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4111 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4112 }
4113 if (v->DSCEnabled[k] && v->HActive[k] > DCN31_MAX_DSC_IMAGE_WIDTH
4114 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4115 if (v->HActive[k] / 2 > DCN31_MAX_DSC_IMAGE_WIDTH) {
4116 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4117 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4118 } else {
4119 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4120 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4121 }
4122 }
4123 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN31_MAX_FMT_420_BUFFER_WIDTH
4124 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4125 if (v->Output[k] == dm_hdmi) {
4126 FMTBufferExceeded = true;
4127 } else if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) {
4128 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4129 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4130
4131 if (v->HActive[k] / 4 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4132 FMTBufferExceeded = true;
4133 } else {
4134 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4135 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4136 }
4137 }
4138 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4139 v->MPCCombine[i][j][k] = false;
4140 v->NoOfDPP[i][j][k] = 4;
4141 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4142 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4143 v->MPCCombine[i][j][k] = false;
4144 v->NoOfDPP[i][j][k] = 2;
4145 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4146 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4147 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4148 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4149 v->MPCCombine[i][j][k] = false;
4150 v->NoOfDPP[i][j][k] = 1;
4151 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4152 } else {
4153 v->MPCCombine[i][j][k] = true;
4154 v->NoOfDPP[i][j][k] = 2;
4155 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4156 }
4157 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4158 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4159 > v->MaxDppclkRoundedDownToDFSGranularity)
4160 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4161 v->DISPCLK_DPPCLK_Support[i][j] = false;
4162 }
4163 if (mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[k] > DCN3_15_MAX_DET_SIZE && v->NoOfDPP[i][j][k] < 2) {
4164 v->MPCCombine[i][j][k] = true;
4165 v->NoOfDPP[i][j][k] = 2;
4166 }
4167 }
4168 v->TotalNumberOfActiveDPP[i][j] = 0;
4169 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4170 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4171 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4172 if (v->NoOfDPP[i][j][k] == 1)
4173 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4174 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4175 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
4176 NoChroma = false;
4177 }
4178
4179 // UPTO
4180 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
4181 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) {
4182 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4183 double BWOfNonSplitPlaneOfMaximumBandwidth;
4184 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4185 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4186 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4187 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4188 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4189 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4190 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4191 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4192 }
4193 }
4194 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4195 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4196 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4197 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4198 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4199 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4200 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4201 }
4202 }
4203 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4204 v->RequiredDISPCLK[i][j] = 0.0;
4205 v->DISPCLK_DPPCLK_Support[i][j] = true;
4206 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4207 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4208 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4209 v->MPCCombine[i][j][k] = true;
4210 v->NoOfDPP[i][j][k] = 2;
4211 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4212 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4213 } else {
4214 v->MPCCombine[i][j][k] = false;
4215 v->NoOfDPP[i][j][k] = 1;
4216 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4217 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4218 }
4219 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4220 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4221 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4222 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4223 } else {
4224 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4225 }
4226 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4227 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4228 > v->MaxDppclkRoundedDownToDFSGranularity)
4229 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4230 v->DISPCLK_DPPCLK_Support[i][j] = false;
4231 }
4232 }
4233 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4234 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4235 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4236 }
4237 }
4238 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4239 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4240 v->DISPCLK_DPPCLK_Support[i][j] = false;
4241 }
4242 }
4243 }
4244
4245 /*Total Available Pipes Support Check*/
4246
4247 for (i = 0; i < v->soc.num_states; i++) {
4248 for (j = 0; j < 2; j++) {
4249 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4250 v->TotalAvailablePipesSupport[i][j] = true;
4251 } else {
4252 v->TotalAvailablePipesSupport[i][j] = false;
4253 }
4254 }
4255 }
4256 /*Display IO and DSC Support Check*/
4257
4258 v->NonsupportedDSCInputBPC = false;
4259 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4260 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
4261 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
4262 v->NonsupportedDSCInputBPC = true;
4263 }
4264 }
4265
4266 /*Number Of DSC Slices*/
4267 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4268 if (v->BlendingAndTiming[k] == k) {
4269 if (v->PixelClockBackEnd[k] > 3200) {
4270 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4271 } else if (v->PixelClockBackEnd[k] > 1360) {
4272 v->NumberOfDSCSlices[k] = 8;
4273 } else if (v->PixelClockBackEnd[k] > 680) {
4274 v->NumberOfDSCSlices[k] = 4;
4275 } else if (v->PixelClockBackEnd[k] > 340) {
4276 v->NumberOfDSCSlices[k] = 2;
4277 } else {
4278 v->NumberOfDSCSlices[k] = 1;
4279 }
4280 } else {
4281 v->NumberOfDSCSlices[k] = 0;
4282 }
4283 }
4284
4285 for (i = 0; i < v->soc.num_states; i++) {
4286 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4287 v->RequiresDSC[i][k] = false;
4288 v->RequiresFEC[i][k] = false;
4289 if (v->BlendingAndTiming[k] == k) {
4290 if (v->Output[k] == dm_hdmi) {
4291 v->RequiresDSC[i][k] = false;
4292 v->RequiresFEC[i][k] = false;
4293 v->OutputBppPerState[i][k] = TruncToValidBPP(
4294 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4295 3,
4296 v->HTotal[k],
4297 v->HActive[k],
4298 v->PixelClockBackEnd[k],
4299 v->ForcedOutputLinkBPP[k],
4300 false,
4301 v->Output[k],
4302 v->OutputFormat[k],
4303 v->DSCInputBitPerComponent[k],
4304 v->NumberOfDSCSlices[k],
4305 v->AudioSampleRate[k],
4306 v->AudioSampleLayout[k],
4307 v->ODMCombineEnablePerState[i][k]);
4308 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_dp2p0) {
4309 if (v->DSCEnable[k] == true) {
4310 v->RequiresDSC[i][k] = true;
4311 v->LinkDSCEnable = true;
4312 if (v->Output[k] == dm_dp || v->Output[k] == dm_dp2p0) {
4313 v->RequiresFEC[i][k] = true;
4314 } else {
4315 v->RequiresFEC[i][k] = false;
4316 }
4317 } else {
4318 v->RequiresDSC[i][k] = false;
4319 v->LinkDSCEnable = false;
4320 if (v->Output[k] == dm_dp2p0) {
4321 v->RequiresFEC[i][k] = true;
4322 } else {
4323 v->RequiresFEC[i][k] = false;
4324 }
4325 }
4326 if (v->Output[k] == dm_dp2p0) {
4327 v->Outbpp = BPP_INVALID;
4328 if ((v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr10) &&
4329 v->PHYCLKD18PerState[k] >= 10000.0 / 18.0) {
4330 v->Outbpp = TruncToValidBPP(
4331 (1.0 - v->Downspreading / 100.0) * 10000,
4332 v->OutputLinkDPLanes[k],
4333 v->HTotal[k],
4334 v->HActive[k],
4335 v->PixelClockBackEnd[k],
4336 v->ForcedOutputLinkBPP[k],
4337 v->LinkDSCEnable,
4338 v->Output[k],
4339 v->OutputFormat[k],
4340 v->DSCInputBitPerComponent[k],
4341 v->NumberOfDSCSlices[k],
4342 v->AudioSampleRate[k],
4343 v->AudioSampleLayout[k],
4344 v->ODMCombineEnablePerState[i][k]);
4345 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 13500.0 / 18.0 &&
4346 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
4347 v->RequiresDSC[i][k] = true;
4348 v->LinkDSCEnable = true;
4349 v->Outbpp = TruncToValidBPP(
4350 (1.0 - v->Downspreading / 100.0) * 10000,
4351 v->OutputLinkDPLanes[k],
4352 v->HTotal[k],
4353 v->HActive[k],
4354 v->PixelClockBackEnd[k],
4355 v->ForcedOutputLinkBPP[k],
4356 v->LinkDSCEnable,
4357 v->Output[k],
4358 v->OutputFormat[k],
4359 v->DSCInputBitPerComponent[k],
4360 v->NumberOfDSCSlices[k],
4361 v->AudioSampleRate[k],
4362 v->AudioSampleLayout[k],
4363 v->ODMCombineEnablePerState[i][k]);
4364 }
4365 v->OutputBppPerState[i][k] = v->Outbpp;
4366 // TODO: Need some other way to handle this nonsense
4367 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR10"
4368 }
4369 if (v->Outbpp == BPP_INVALID &&
4370 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr13p5) &&
4371 v->PHYCLKD18PerState[k] >= 13500.0 / 18.0) {
4372 v->Outbpp = TruncToValidBPP(
4373 (1.0 - v->Downspreading / 100.0) * 13500,
4374 v->OutputLinkDPLanes[k],
4375 v->HTotal[k],
4376 v->HActive[k],
4377 v->PixelClockBackEnd[k],
4378 v->ForcedOutputLinkBPP[k],
4379 v->LinkDSCEnable,
4380 v->Output[k],
4381 v->OutputFormat[k],
4382 v->DSCInputBitPerComponent[k],
4383 v->NumberOfDSCSlices[k],
4384 v->AudioSampleRate[k],
4385 v->AudioSampleLayout[k],
4386 v->ODMCombineEnablePerState[i][k]);
4387 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 20000.0 / 18.0 &&
4388 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
4389 v->RequiresDSC[i][k] = true;
4390 v->LinkDSCEnable = true;
4391 v->Outbpp = TruncToValidBPP(
4392 (1.0 - v->Downspreading / 100.0) * 13500,
4393 v->OutputLinkDPLanes[k],
4394 v->HTotal[k],
4395 v->HActive[k],
4396 v->PixelClockBackEnd[k],
4397 v->ForcedOutputLinkBPP[k],
4398 v->LinkDSCEnable,
4399 v->Output[k],
4400 v->OutputFormat[k],
4401 v->DSCInputBitPerComponent[k],
4402 v->NumberOfDSCSlices[k],
4403 v->AudioSampleRate[k],
4404 v->AudioSampleLayout[k],
4405 v->ODMCombineEnablePerState[i][k]);
4406 }
4407 v->OutputBppPerState[i][k] = v->Outbpp;
4408 // TODO: Need some other way to handle this nonsense
4409 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR13p5"
4410 }
4411 if (v->Outbpp == BPP_INVALID &&
4412 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr20) &&
4413 v->PHYCLKD18PerState[k] >= 20000.0 / 18.0) {
4414 v->Outbpp = TruncToValidBPP(
4415 (1.0 - v->Downspreading / 100.0) * 20000,
4416 v->OutputLinkDPLanes[k],
4417 v->HTotal[k],
4418 v->HActive[k],
4419 v->PixelClockBackEnd[k],
4420 v->ForcedOutputLinkBPP[k],
4421 v->LinkDSCEnable,
4422 v->Output[k],
4423 v->OutputFormat[k],
4424 v->DSCInputBitPerComponent[k],
4425 v->NumberOfDSCSlices[k],
4426 v->AudioSampleRate[k],
4427 v->AudioSampleLayout[k],
4428 v->ODMCombineEnablePerState[i][k]);
4429 if (v->Outbpp == BPP_INVALID && v->DSCEnable[k] == true &&
4430 v->ForcedOutputLinkBPP[k] == 0) {
4431 v->RequiresDSC[i][k] = true;
4432 v->LinkDSCEnable = true;
4433 v->Outbpp = TruncToValidBPP(
4434 (1.0 - v->Downspreading / 100.0) * 20000,
4435 v->OutputLinkDPLanes[k],
4436 v->HTotal[k],
4437 v->HActive[k],
4438 v->PixelClockBackEnd[k],
4439 v->ForcedOutputLinkBPP[k],
4440 v->LinkDSCEnable,
4441 v->Output[k],
4442 v->OutputFormat[k],
4443 v->DSCInputBitPerComponent[k],
4444 v->NumberOfDSCSlices[k],
4445 v->AudioSampleRate[k],
4446 v->AudioSampleLayout[k],
4447 v->ODMCombineEnablePerState[i][k]);
4448 }
4449 v->OutputBppPerState[i][k] = v->Outbpp;
4450 // TODO: Need some other way to handle this nonsense
4451 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR20"
4452 }
4453 } else {
4454 v->Outbpp = BPP_INVALID;
4455 if (v->PHYCLKPerState[i] >= 270.0) {
4456 v->Outbpp = TruncToValidBPP(
4457 (1.0 - v->Downspreading / 100.0) * 2700,
4458 v->OutputLinkDPLanes[k],
4459 v->HTotal[k],
4460 v->HActive[k],
4461 v->PixelClockBackEnd[k],
4462 v->ForcedOutputLinkBPP[k],
4463 v->LinkDSCEnable,
4464 v->Output[k],
4465 v->OutputFormat[k],
4466 v->DSCInputBitPerComponent[k],
4467 v->NumberOfDSCSlices[k],
4468 v->AudioSampleRate[k],
4469 v->AudioSampleLayout[k],
4470 v->ODMCombineEnablePerState[i][k]);
4471 v->OutputBppPerState[i][k] = v->Outbpp;
4472 // TODO: Need some other way to handle this nonsense
4473 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4474 }
4475 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4476 v->Outbpp = TruncToValidBPP(
4477 (1.0 - v->Downspreading / 100.0) * 5400,
4478 v->OutputLinkDPLanes[k],
4479 v->HTotal[k],
4480 v->HActive[k],
4481 v->PixelClockBackEnd[k],
4482 v->ForcedOutputLinkBPP[k],
4483 v->LinkDSCEnable,
4484 v->Output[k],
4485 v->OutputFormat[k],
4486 v->DSCInputBitPerComponent[k],
4487 v->NumberOfDSCSlices[k],
4488 v->AudioSampleRate[k],
4489 v->AudioSampleLayout[k],
4490 v->ODMCombineEnablePerState[i][k]);
4491 v->OutputBppPerState[i][k] = v->Outbpp;
4492 // TODO: Need some other way to handle this nonsense
4493 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4494 }
4495 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4496 v->Outbpp = TruncToValidBPP(
4497 (1.0 - v->Downspreading / 100.0) * 8100,
4498 v->OutputLinkDPLanes[k],
4499 v->HTotal[k],
4500 v->HActive[k],
4501 v->PixelClockBackEnd[k],
4502 v->ForcedOutputLinkBPP[k],
4503 v->LinkDSCEnable,
4504 v->Output[k],
4505 v->OutputFormat[k],
4506 v->DSCInputBitPerComponent[k],
4507 v->NumberOfDSCSlices[k],
4508 v->AudioSampleRate[k],
4509 v->AudioSampleLayout[k],
4510 v->ODMCombineEnablePerState[i][k]);
4511 v->OutputBppPerState[i][k] = v->Outbpp;
4512 // TODO: Need some other way to handle this nonsense
4513 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4514 }
4515 }
4516 }
4517 } else {
4518 v->OutputBppPerState[i][k] = 0;
4519 }
4520 }
4521 }
4522
4523 for (i = 0; i < v->soc.num_states; i++) {
4524 v->LinkCapacitySupport[i] = true;
4525 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4526 if (v->BlendingAndTiming[k] == k
4527 && (v->Output[k] == dm_dp ||
4528 v->Output[k] == dm_edp ||
4529 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
4530 v->LinkCapacitySupport[i] = false;
4531 }
4532 }
4533 }
4534
4535 // UPTO 2172
4536 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4537 if (v->BlendingAndTiming[k] == k
4538 && (v->Output[k] == dm_dp ||
4539 v->Output[k] == dm_edp ||
4540 v->Output[k] == dm_hdmi)) {
4541 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
4542 P2IWith420 = true;
4543 }
4544 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
4545 && !v->DSC422NativeSupport) {
4546 DSC422NativeNotSupported = true;
4547 }
4548 }
4549 }
4550
4551 for (i = 0; i < v->soc.num_states; ++i) {
4552 v->ODMCombine4To1SupportCheckOK[i] = true;
4553 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4554 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4555 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
4556 || v->Output[k] == dm_hdmi)) {
4557 v->ODMCombine4To1SupportCheckOK[i] = false;
4558 }
4559 }
4560 }
4561
4562 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4563
4564 for (i = 0; i < v->soc.num_states; i++) {
4565 v->NotEnoughDSCUnits[i] = false;
4566 v->TotalDSCUnitsRequired = 0.0;
4567 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4568 if (v->RequiresDSC[i][k] == true) {
4569 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4570 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4571 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4572 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4573 } else {
4574 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4575 }
4576 }
4577 }
4578 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4579 v->NotEnoughDSCUnits[i] = true;
4580 }
4581 }
4582 /*DSC Delay per state*/
4583
4584 for (i = 0; i < v->soc.num_states; i++) {
4585 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4586 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4587 v->BPP = 0.0;
4588 } else {
4589 v->BPP = v->OutputBppPerState[i][k];
4590 }
4591 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4592 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4593 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4594 v->DSCInputBitPerComponent[k],
4595 v->BPP,
4596 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4597 v->NumberOfDSCSlices[k],
4598 v->OutputFormat[k],
4599 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4600 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4601 v->DSCDelayPerState[i][k] = 2.0
4602 * (dscceComputeDelay(
4603 v->DSCInputBitPerComponent[k],
4604 v->BPP,
4605 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4606 v->NumberOfDSCSlices[k] / 2,
4607 v->OutputFormat[k],
4608 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4609 } else {
4610 v->DSCDelayPerState[i][k] = 4.0
4611 * (dscceComputeDelay(
4612 v->DSCInputBitPerComponent[k],
4613 v->BPP,
4614 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4615 v->NumberOfDSCSlices[k] / 4,
4616 v->OutputFormat[k],
4617 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4618 }
4619 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4620 } else {
4621 v->DSCDelayPerState[i][k] = 0.0;
4622 }
4623 }
4624 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4625 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4626 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4627 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4628 }
4629 }
4630 }
4631 }
4632
4633 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4634 //
4635 for (i = 0; i < v->soc.num_states; ++i) {
4636 for (j = 0; j <= 1; ++j) {
4637 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4638 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4639 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4640 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4641 }
4642
4643 if (v->NumberOfActivePlanes > 1 && mode_lib->project == DML_PROJECT_DCN315 && !v->DETSizeOverride[0])
4644 PatchDETBufferSizeInKByte(v->NumberOfActivePlanes, v->NoOfDPPThisState, v->ip.config_return_buffer_size_in_kbytes, v->DETBufferSizeInKByte);
4645 CalculateSwathAndDETConfiguration(
4646 false,
4647 v->NumberOfActivePlanes,
4648 mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0],
4649 v->DETBufferSizeInKByte,
4650 v->MaximumSwathWidthLuma,
4651 v->MaximumSwathWidthChroma,
4652 v->SourceScan,
4653 v->SourcePixelFormat,
4654 v->SurfaceTiling,
4655 v->ViewportWidth,
4656 v->ViewportHeight,
4657 v->SurfaceWidthY,
4658 v->SurfaceWidthC,
4659 v->SurfaceHeightY,
4660 v->SurfaceHeightC,
4661 v->Read256BlockHeightY,
4662 v->Read256BlockHeightC,
4663 v->Read256BlockWidthY,
4664 v->Read256BlockWidthC,
4665 v->ODMCombineEnableThisState,
4666 v->BlendingAndTiming,
4667 v->BytePerPixelY,
4668 v->BytePerPixelC,
4669 v->BytePerPixelInDETY,
4670 v->BytePerPixelInDETC,
4671 v->HActive,
4672 v->HRatio,
4673 v->HRatioChroma,
4674 v->NoOfDPPThisState,
4675 v->swath_width_luma_ub_this_state,
4676 v->swath_width_chroma_ub_this_state,
4677 v->SwathWidthYThisState,
4678 v->SwathWidthCThisState,
4679 v->SwathHeightYThisState,
4680 v->SwathHeightCThisState,
4681 v->DETBufferSizeYThisState,
4682 v->DETBufferSizeCThisState,
4683 v->dummystring,
4684 &v->ViewportSizeSupport[i][j]);
4685
4686 CalculateDCFCLKDeepSleep(
4687 mode_lib,
4688 v->NumberOfActivePlanes,
4689 v->BytePerPixelY,
4690 v->BytePerPixelC,
4691 v->VRatio,
4692 v->VRatioChroma,
4693 v->SwathWidthYThisState,
4694 v->SwathWidthCThisState,
4695 v->NoOfDPPThisState,
4696 v->HRatio,
4697 v->HRatioChroma,
4698 v->PixelClock,
4699 v->PSCL_FACTOR,
4700 v->PSCL_FACTOR_CHROMA,
4701 v->RequiredDPPCLKThisState,
4702 v->ReadBandwidthLuma,
4703 v->ReadBandwidthChroma,
4704 v->ReturnBusWidth,
4705 &v->ProjectedDCFCLKDeepSleep[i][j]);
4706
4707 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4708 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4709 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4710 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4711 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4712 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4713 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4714 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4715 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4716 }
4717 }
4718 }
4719
4720 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4721 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
4722 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4723 }
4724
4725 for (i = 0; i < v->soc.num_states; i++) {
4726 for (j = 0; j < 2; j++) {
4727 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
4728
4729 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4730 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4731 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4732 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4733 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4734 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4735 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4736 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4737 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4738 }
4739
4740 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4741 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4742 if (v->DCCEnable[k] == true) {
4743 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4744 }
4745 }
4746
4747 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4748 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4749 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4750
4751 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
4752 && v->SourceScan[k] != dm_vert) {
4753 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
4754 / 2;
4755 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4756 } else {
4757 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4758 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4759 }
4760
4761 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4762 mode_lib,
4763 v->DCCEnable[k],
4764 v->Read256BlockHeightC[k],
4765 v->Read256BlockWidthC[k],
4766 v->SourcePixelFormat[k],
4767 v->SurfaceTiling[k],
4768 v->BytePerPixelC[k],
4769 v->SourceScan[k],
4770 v->SwathWidthCThisState[k],
4771 v->ViewportHeightChroma[k],
4772 v->GPUVMEnable,
4773 v->HostVMEnable,
4774 v->HostVMMaxNonCachedPageTableLevels,
4775 v->GPUVMMinPageSize,
4776 v->HostVMMinPageSize,
4777 v->PTEBufferSizeInRequestsForChroma,
4778 v->PitchC[k],
4779 0.0,
4780 &v->MacroTileWidthC[k],
4781 &v->MetaRowBytesC,
4782 &v->DPTEBytesPerRowC,
4783 &v->PTEBufferSizeNotExceededC[i][j][k],
4784 &v->dummyinteger7,
4785 &v->dpte_row_height_chroma[k],
4786 &v->dummyinteger28,
4787 &v->dummyinteger26,
4788 &v->dummyinteger23,
4789 &v->meta_row_height_chroma[k],
4790 &v->dummyinteger8,
4791 &v->dummyinteger9,
4792 &v->dummyinteger19,
4793 &v->dummyinteger20,
4794 &v->dummyinteger17,
4795 &v->dummyinteger10,
4796 &v->dummyinteger11);
4797
4798 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4799 mode_lib,
4800 v->VRatioChroma[k],
4801 v->VTAPsChroma[k],
4802 v->Interlace[k],
4803 v->ProgressiveToInterlaceUnitInOPP,
4804 v->SwathHeightCThisState[k],
4805 v->ViewportYStartC[k],
4806 &v->PrefillC[k],
4807 &v->MaxNumSwC[k]);
4808 } else {
4809 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4810 v->PTEBufferSizeInRequestsForChroma = 0;
4811 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4812 v->MetaRowBytesC = 0.0;
4813 v->DPTEBytesPerRowC = 0.0;
4814 v->PrefetchLinesC[i][j][k] = 0.0;
4815 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4816 }
4817 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4818 mode_lib,
4819 v->DCCEnable[k],
4820 v->Read256BlockHeightY[k],
4821 v->Read256BlockWidthY[k],
4822 v->SourcePixelFormat[k],
4823 v->SurfaceTiling[k],
4824 v->BytePerPixelY[k],
4825 v->SourceScan[k],
4826 v->SwathWidthYThisState[k],
4827 v->ViewportHeight[k],
4828 v->GPUVMEnable,
4829 v->HostVMEnable,
4830 v->HostVMMaxNonCachedPageTableLevels,
4831 v->GPUVMMinPageSize,
4832 v->HostVMMinPageSize,
4833 v->PTEBufferSizeInRequestsForLuma,
4834 v->PitchY[k],
4835 v->DCCMetaPitchY[k],
4836 &v->MacroTileWidthY[k],
4837 &v->MetaRowBytesY,
4838 &v->DPTEBytesPerRowY,
4839 &v->PTEBufferSizeNotExceededY[i][j][k],
4840 &v->dummyinteger7,
4841 &v->dpte_row_height[k],
4842 &v->dummyinteger29,
4843 &v->dummyinteger27,
4844 &v->dummyinteger24,
4845 &v->meta_row_height[k],
4846 &v->dummyinteger25,
4847 &v->dpte_group_bytes[k],
4848 &v->dummyinteger21,
4849 &v->dummyinteger22,
4850 &v->dummyinteger18,
4851 &v->dummyinteger5,
4852 &v->dummyinteger6);
4853 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4854 mode_lib,
4855 v->VRatio[k],
4856 v->vtaps[k],
4857 v->Interlace[k],
4858 v->ProgressiveToInterlaceUnitInOPP,
4859 v->SwathHeightYThisState[k],
4860 v->ViewportYStartY[k],
4861 &v->PrefillY[k],
4862 &v->MaxNumSwY[k]);
4863 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4864 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4865 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4866
4867 CalculateRowBandwidth(
4868 v->GPUVMEnable,
4869 v->SourcePixelFormat[k],
4870 v->VRatio[k],
4871 v->VRatioChroma[k],
4872 v->DCCEnable[k],
4873 v->HTotal[k] / v->PixelClock[k],
4874 v->MetaRowBytesY,
4875 v->MetaRowBytesC,
4876 v->meta_row_height[k],
4877 v->meta_row_height_chroma[k],
4878 v->DPTEBytesPerRowY,
4879 v->DPTEBytesPerRowC,
4880 v->dpte_row_height[k],
4881 v->dpte_row_height_chroma[k],
4882 &v->meta_row_bandwidth[i][j][k],
4883 &v->dpte_row_bandwidth[i][j][k]);
4884 }
4885 /*DCCMetaBufferSizeSupport(i, j) = True
4886 For k = 0 To NumberOfActivePlanes - 1
4887 If MetaRowBytes(i, j, k) > 24064 Then
4888 DCCMetaBufferSizeSupport(i, j) = False
4889 End If
4890 Next k*/
4891 v->DCCMetaBufferSizeSupport[i][j] = true;
4892 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4893 if (v->MetaRowBytes[i][j][k] > 24064)
4894 v->DCCMetaBufferSizeSupport[i][j] = false;
4895 }
4896 v->UrgLatency[i] = CalculateUrgentLatency(
4897 v->UrgentLatencyPixelDataOnly,
4898 v->UrgentLatencyPixelMixedWithVMData,
4899 v->UrgentLatencyVMDataOnly,
4900 v->DoUrgentLatencyAdjustment,
4901 v->UrgentLatencyAdjustmentFabricClockComponent,
4902 v->UrgentLatencyAdjustmentFabricClockReference,
4903 v->FabricClockPerState[i]);
4904
4905 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4906 CalculateUrgentBurstFactor(
4907 v->swath_width_luma_ub_this_state[k],
4908 v->swath_width_chroma_ub_this_state[k],
4909 v->SwathHeightYThisState[k],
4910 v->SwathHeightCThisState[k],
4911 v->HTotal[k] / v->PixelClock[k],
4912 v->UrgLatency[i],
4913 v->CursorBufferSize,
4914 v->CursorWidth[k][0],
4915 v->CursorBPP[k][0],
4916 v->VRatio[k],
4917 v->VRatioChroma[k],
4918 v->BytePerPixelInDETY[k],
4919 v->BytePerPixelInDETC[k],
4920 v->DETBufferSizeYThisState[k],
4921 v->DETBufferSizeCThisState[k],
4922 &v->UrgentBurstFactorCursor[k],
4923 &v->UrgentBurstFactorLuma[k],
4924 &v->UrgentBurstFactorChroma[k],
4925 &NotUrgentLatencyHiding[k]);
4926 }
4927
4928 v->NotEnoughUrgentLatencyHidingA[i][j] = false;
4929 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4930 if (NotUrgentLatencyHiding[k]) {
4931 v->NotEnoughUrgentLatencyHidingA[i][j] = true;
4932 }
4933 }
4934
4935 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4936 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
4937 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
4938 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
4939 }
4940
4941 v->TotalVActivePixelBandwidth[i][j] = 0;
4942 v->TotalVActiveCursorBandwidth[i][j] = 0;
4943 v->TotalMetaRowBandwidth[i][j] = 0;
4944 v->TotalDPTERowBandwidth[i][j] = 0;
4945 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4946 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
4947 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
4948 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
4949 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
4950 }
4951 }
4952 }
4953
4954 //Calculate Return BW
4955 for (i = 0; i < v->soc.num_states; ++i) {
4956 for (j = 0; j <= 1; ++j) {
4957 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4958 if (v->BlendingAndTiming[k] == k) {
4959 if (v->WritebackEnable[k] == true) {
4960 v->WritebackDelayTime[k] = v->WritebackLatency
4961 + CalculateWriteBackDelay(
4962 v->WritebackPixelFormat[k],
4963 v->WritebackHRatio[k],
4964 v->WritebackVRatio[k],
4965 v->WritebackVTaps[k],
4966 v->WritebackDestinationWidth[k],
4967 v->WritebackDestinationHeight[k],
4968 v->WritebackSourceHeight[k],
4969 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
4970 } else {
4971 v->WritebackDelayTime[k] = 0.0;
4972 }
4973 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4974 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
4975 v->WritebackDelayTime[k] = dml_max(
4976 v->WritebackDelayTime[k],
4977 v->WritebackLatency
4978 + CalculateWriteBackDelay(
4979 v->WritebackPixelFormat[m],
4980 v->WritebackHRatio[m],
4981 v->WritebackVRatio[m],
4982 v->WritebackVTaps[m],
4983 v->WritebackDestinationWidth[m],
4984 v->WritebackDestinationHeight[m],
4985 v->WritebackSourceHeight[m],
4986 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
4987 }
4988 }
4989 }
4990 }
4991 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4992 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4993 if (v->BlendingAndTiming[k] == m) {
4994 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
4995 }
4996 }
4997 }
4998 v->MaxMaxVStartup[i][j] = 0;
4999 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5000 v->MaximumVStartup[i][j][k] =
5001 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
5002 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
5003 v->VTotal[k] - v->VActive[k]
5004 - dml_max(
5005 1.0,
5006 dml_ceil(
5007 1.0 * v->WritebackDelayTime[k]
5008 / (v->HTotal[k]
5009 / v->PixelClock[k]),
5010 1.0));
5011 if (v->MaximumVStartup[i][j][k] > 1023)
5012 v->MaximumVStartup[i][j][k] = 1023;
5013 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
5014 }
5015 }
5016 }
5017
5018 ReorderingBytes = v->NumberOfChannels
5019 * dml_max3(
5020 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
5021 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
5022 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
5023
5024 for (i = 0; i < v->soc.num_states; ++i) {
5025 for (j = 0; j <= 1; ++j) {
5026 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
5027 }
5028 }
5029
5030 if (v->UseMinimumRequiredDCFCLK == true)
5031 UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes);
5032
5033 for (i = 0; i < v->soc.num_states; ++i) {
5034 for (j = 0; j <= 1; ++j) {
5035 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
5036 v->ReturnBusWidth * v->DCFCLKState[i][j],
5037 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
5038 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
5039 double PixelDataOnlyReturnBWPerState = dml_min(
5040 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5041 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
5042 double PixelMixedWithVMDataReturnBWPerState = dml_min(
5043 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5044 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
5045
5046 if (v->HostVMEnable != true) {
5047 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
5048 } else {
5049 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
5050 }
5051 }
5052 }
5053
5054 //Re-ordering Buffer Support Check
5055 for (i = 0; i < v->soc.num_states; ++i) {
5056 for (j = 0; j <= 1; ++j) {
5057 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
5058 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
5059 v->ROBSupport[i][j] = true;
5060 } else {
5061 v->ROBSupport[i][j] = false;
5062 }
5063 }
5064 }
5065
5066 //Vertical Active BW support check
5067
5068 MaxTotalVActiveRDBandwidth = 0;
5069 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5070 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
5071 }
5072
5073 for (i = 0; i < v->soc.num_states; ++i) {
5074 for (j = 0; j <= 1; ++j) {
5075 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
5076 dml_min(
5077 v->ReturnBusWidth * v->DCFCLKState[i][j],
5078 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5079 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
5080 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5081 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
5082
5083 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
5084 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
5085 } else {
5086 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
5087 }
5088 }
5089 }
5090
5091 v->UrgentLatency = CalculateUrgentLatency(
5092 v->UrgentLatencyPixelDataOnly,
5093 v->UrgentLatencyPixelMixedWithVMData,
5094 v->UrgentLatencyVMDataOnly,
5095 v->DoUrgentLatencyAdjustment,
5096 v->UrgentLatencyAdjustmentFabricClockComponent,
5097 v->UrgentLatencyAdjustmentFabricClockReference,
5098 v->FabricClock);
5099 //Prefetch Check
5100 for (i = 0; i < v->soc.num_states; ++i) {
5101 for (j = 0; j <= 1; ++j) {
5102 double VMDataOnlyReturnBWPerState;
5103 double HostVMInefficiencyFactor = 1;
5104 int NextPrefetchModeState = MinPrefetchMode;
5105 bool UnboundedRequestEnabledThisState = false;
5106 int CompressedBufferSizeInkByteThisState = 0;
5107 double dummy;
5108
5109 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
5110
5111 v->BandwidthWithoutPrefetchSupported[i][j] = true;
5112 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
5113 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
5114 v->BandwidthWithoutPrefetchSupported[i][j] = false;
5115 }
5116
5117 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5118 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
5119 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
5120 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
5121 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
5122 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
5123 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
5124 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
5125 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
5126 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
5127 }
5128
5129 VMDataOnlyReturnBWPerState = dml_min(
5130 dml_min(
5131 v->ReturnBusWidth * v->DCFCLKState[i][j],
5132 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5133 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5134 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5135 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
5136 if (v->GPUVMEnable && v->HostVMEnable)
5137 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
5138
5139 v->ExtraLatency = CalculateExtraLatency(
5140 v->RoundTripPingLatencyCycles,
5141 ReorderingBytes,
5142 v->DCFCLKState[i][j],
5143 v->TotalNumberOfActiveDPP[i][j],
5144 v->PixelChunkSizeInKByte,
5145 v->TotalNumberOfDCCActiveDPP[i][j],
5146 v->MetaChunkSize,
5147 v->ReturnBWPerState[i][j],
5148 v->GPUVMEnable,
5149 v->HostVMEnable,
5150 v->NumberOfActivePlanes,
5151 v->NoOfDPPThisState,
5152 v->dpte_group_bytes,
5153 HostVMInefficiencyFactor,
5154 v->HostVMMinPageSize,
5155 v->HostVMMaxNonCachedPageTableLevels);
5156
5157 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5158 do {
5159 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
5160 v->MaxVStartup = v->NextMaxVStartup;
5161
5162 v->TWait = CalculateTWait(
5163 v->PrefetchModePerState[i][j],
5164 v->DRAMClockChangeLatency,
5165 v->UrgLatency[i],
5166 v->SREnterPlusExitTime);
5167
5168 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5169 CalculatePrefetchSchedulePerPlane(mode_lib,
5170 HostVMInefficiencyFactor,
5171 i, j, k);
5172 }
5173
5174 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5175 CalculateUrgentBurstFactor(
5176 v->swath_width_luma_ub_this_state[k],
5177 v->swath_width_chroma_ub_this_state[k],
5178 v->SwathHeightYThisState[k],
5179 v->SwathHeightCThisState[k],
5180 v->HTotal[k] / v->PixelClock[k],
5181 v->UrgLatency[i],
5182 v->CursorBufferSize,
5183 v->CursorWidth[k][0],
5184 v->CursorBPP[k][0],
5185 v->VRatioPreY[i][j][k],
5186 v->VRatioPreC[i][j][k],
5187 v->BytePerPixelInDETY[k],
5188 v->BytePerPixelInDETC[k],
5189 v->DETBufferSizeYThisState[k],
5190 v->DETBufferSizeCThisState[k],
5191 &v->UrgentBurstFactorCursorPre[k],
5192 &v->UrgentBurstFactorLumaPre[k],
5193 &v->UrgentBurstFactorChromaPre[k],
5194 &v->NotUrgentLatencyHidingPre[k]);
5195 }
5196
5197 v->MaximumReadBandwidthWithPrefetch = 0.0;
5198 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5199 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
5200 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
5201
5202 v->MaximumReadBandwidthWithPrefetch =
5203 v->MaximumReadBandwidthWithPrefetch
5204 + dml_max3(
5205 v->VActivePixelBandwidth[i][j][k]
5206 + v->VActiveCursorBandwidth[i][j][k]
5207 + v->NoOfDPP[i][j][k]
5208 * (v->meta_row_bandwidth[i][j][k]
5209 + v->dpte_row_bandwidth[i][j][k]),
5210 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5211 v->NoOfDPP[i][j][k]
5212 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5213 * v->UrgentBurstFactorLumaPre[k]
5214 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5215 * v->UrgentBurstFactorChromaPre[k])
5216 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5217 }
5218
5219 v->NotEnoughUrgentLatencyHidingPre = false;
5220 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5221 if (v->NotUrgentLatencyHidingPre[k] == true) {
5222 v->NotEnoughUrgentLatencyHidingPre = true;
5223 }
5224 }
5225
5226 v->PrefetchSupported[i][j] = true;
5227 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5228 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5229 v->PrefetchSupported[i][j] = false;
5230 }
5231 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5232 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5233 || v->NoTimeForPrefetch[i][j][k] == true) {
5234 v->PrefetchSupported[i][j] = false;
5235 }
5236 }
5237
5238 v->DynamicMetadataSupported[i][j] = true;
5239 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5240 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5241 v->DynamicMetadataSupported[i][j] = false;
5242 }
5243 }
5244
5245 v->VRatioInPrefetchSupported[i][j] = true;
5246 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5247 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5248 v->VRatioInPrefetchSupported[i][j] = false;
5249 }
5250 }
5251 v->AnyLinesForVMOrRowTooLarge = false;
5252 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5253 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5254 v->AnyLinesForVMOrRowTooLarge = true;
5255 }
5256 }
5257
5258 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5259
5260 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5261 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5262 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5263 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5264 - dml_max(
5265 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5266 v->NoOfDPP[i][j][k]
5267 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5268 * v->UrgentBurstFactorLumaPre[k]
5269 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5270 * v->UrgentBurstFactorChromaPre[k])
5271 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5272 }
5273 v->TotImmediateFlipBytes = 0.0;
5274 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5275 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
5276 + v->NoOfDPP[i][j][k] * (v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
5277 + v->DPTEBytesPerRow[i][j][k]);
5278 }
5279
5280 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5281 CalculateFlipSchedule(
5282 mode_lib,
5283 k,
5284 HostVMInefficiencyFactor,
5285 v->ExtraLatency,
5286 v->UrgLatency[i],
5287 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5288 v->MetaRowBytes[i][j][k],
5289 v->DPTEBytesPerRow[i][j][k]);
5290 }
5291 v->total_dcn_read_bw_with_flip = 0.0;
5292 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5293 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5294 + dml_max3(
5295 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5296 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5297 + v->VActiveCursorBandwidth[i][j][k],
5298 v->NoOfDPP[i][j][k]
5299 * (v->final_flip_bw[k]
5300 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5301 * v->UrgentBurstFactorLumaPre[k]
5302 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5303 * v->UrgentBurstFactorChromaPre[k])
5304 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5305 }
5306 v->ImmediateFlipSupportedForState[i][j] = true;
5307 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5308 v->ImmediateFlipSupportedForState[i][j] = false;
5309 }
5310 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5311 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5312 v->ImmediateFlipSupportedForState[i][j] = false;
5313 }
5314 }
5315 } else {
5316 v->ImmediateFlipSupportedForState[i][j] = false;
5317 }
5318
5319 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
5320 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5321 NextPrefetchModeState = NextPrefetchModeState + 1;
5322 } else {
5323 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5324 }
5325 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5326 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5327 && ((v->HostVMEnable == false &&
5328 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5329 || v->ImmediateFlipSupportedForState[i][j] == true))
5330 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5331
5332 CalculateUnboundedRequestAndCompressedBufferSize(
5333 v->DETBufferSizeInKByte[0],
5334 v->ConfigReturnBufferSizeInKByte,
5335 v->UseUnboundedRequesting,
5336 v->TotalNumberOfActiveDPP[i][j],
5337 NoChroma,
5338 v->MaxNumDPP,
5339 v->CompressedBufferSegmentSizeInkByte,
5340 v->Output,
5341 &UnboundedRequestEnabledThisState,
5342 &CompressedBufferSizeInkByteThisState);
5343
5344 CalculateWatermarksAndDRAMSpeedChangeSupport(
5345 mode_lib,
5346 v->PrefetchModePerState[i][j],
5347 v->DCFCLKState[i][j],
5348 v->ReturnBWPerState[i][j],
5349 v->UrgLatency[i],
5350 v->ExtraLatency,
5351 v->SOCCLKPerState[i],
5352 v->ProjectedDCFCLKDeepSleep[i][j],
5353 v->DETBufferSizeYThisState,
5354 v->DETBufferSizeCThisState,
5355 v->SwathHeightYThisState,
5356 v->SwathHeightCThisState,
5357 v->SwathWidthYThisState,
5358 v->SwathWidthCThisState,
5359 v->NoOfDPPThisState,
5360 v->BytePerPixelInDETY,
5361 v->BytePerPixelInDETC,
5362 UnboundedRequestEnabledThisState,
5363 CompressedBufferSizeInkByteThisState,
5364 &v->DRAMClockChangeSupport[i][j],
5365 &dummy,
5366 &dummy,
5367 &dummy,
5368 &dummy);
5369 }
5370 }
5371
5372 /*PTE Buffer Size Check*/
5373 for (i = 0; i < v->soc.num_states; i++) {
5374 for (j = 0; j < 2; j++) {
5375 v->PTEBufferSizeNotExceeded[i][j] = true;
5376 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5377 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5378 v->PTEBufferSizeNotExceeded[i][j] = false;
5379 }
5380 }
5381 }
5382 }
5383
5384 /*Cursor Support Check*/
5385 v->CursorSupport = true;
5386 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5387 if (v->CursorWidth[k][0] > 0.0) {
5388 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5389 v->CursorSupport = false;
5390 }
5391 }
5392 }
5393
5394 /*Valid Pitch Check*/
5395 v->PitchSupport = true;
5396 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5397 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5398 if (v->DCCEnable[k] == true) {
5399 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5400 } else {
5401 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5402 }
5403 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5404 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
5405 && v->SourcePixelFormat[k] != dm_mono_8) {
5406 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5407 if (v->DCCEnable[k] == true) {
5408 v->AlignedDCCMetaPitchC[k] = dml_ceil(
5409 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]),
5410 64.0 * v->Read256BlockWidthC[k]);
5411 } else {
5412 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5413 }
5414 } else {
5415 v->AlignedCPitch[k] = v->PitchC[k];
5416 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5417 }
5418 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
5419 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5420 v->PitchSupport = false;
5421 }
5422 }
5423
5424 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5425 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
5426 ViewportExceedsSurface = true;
5427 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
5428 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
5429 && v->SourcePixelFormat[k] != dm_rgbe) {
5430 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
5431 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5432 ViewportExceedsSurface = true;
5433 }
5434 }
5435 }
5436 }
5437
5438 /*Mode Support, Voltage State and SOC Configuration*/
5439 for (i = v->soc.num_states - 1; i >= 0; i--) {
5440 for (j = 0; j < 2; j++) {
5441 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
5442 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
5443 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
5444 && v->DTBCLKRequiredMoreThanSupported[i] == false
5445 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
5446 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
5447 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
5448 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
5449 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
5450 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5451 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
5452 && ((v->HostVMEnable == false
5453 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5454 || v->ImmediateFlipSupportedForState[i][j] == true)
5455 && FMTBufferExceeded == false) {
5456 v->ModeSupport[i][j] = true;
5457 } else {
5458 v->ModeSupport[i][j] = false;
5459 #ifdef __DML_VBA_DEBUG__
5460 if (v->ScaleRatioAndTapsSupport == false)
5461 dml_print("DML SUPPORT: ScaleRatioAndTapsSupport failed");
5462 if (v->SourceFormatPixelAndScanSupport == false)
5463 dml_print("DML SUPPORT: SourceFormatPixelAndScanSupport failed");
5464 if (v->ViewportSizeSupport[i][j] == false)
5465 dml_print("DML SUPPORT: ViewportSizeSupport failed");
5466 if (v->LinkCapacitySupport[i] == false)
5467 dml_print("DML SUPPORT: LinkCapacitySupport failed");
5468 if (v->ODMCombine4To1SupportCheckOK[i] == false)
5469 dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
5470 if (v->NotEnoughDSCUnits[i] == true)
5471 dml_print("DML SUPPORT: NotEnoughDSCUnits");
5472 if (v->DTBCLKRequiredMoreThanSupported[i] == true)
5473 dml_print("DML SUPPORT: DTBCLKRequiredMoreThanSupported");
5474 if (v->ROBSupport[i][j] == false)
5475 dml_print("DML SUPPORT: ROBSupport failed");
5476 if (v->DISPCLK_DPPCLK_Support[i][j] == false)
5477 dml_print("DML SUPPORT: DISPCLK_DPPCLK_Support failed");
5478 if (v->TotalAvailablePipesSupport[i][j] == false)
5479 dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
5480 if (EnoughWritebackUnits == false)
5481 dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
5482 if (v->WritebackLatencySupport == false)
5483 dml_print("DML SUPPORT: WritebackLatencySupport failed");
5484 if (v->WritebackScaleRatioAndTapsSupport == false)
5485 dml_print("DML SUPPORT: DSC422NativeNotSupported ");
5486 if (v->CursorSupport == false)
5487 dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
5488 if (v->PitchSupport == false)
5489 dml_print("DML SUPPORT: PitchSupport failed");
5490 if (ViewportExceedsSurface == true)
5491 dml_print("DML SUPPORT: ViewportExceedsSurface failed");
5492 if (v->PrefetchSupported[i][j] == false)
5493 dml_print("DML SUPPORT: PrefetchSupported failed");
5494 if (v->DynamicMetadataSupported[i][j] == false)
5495 dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
5496 if (v->TotalVerticalActiveBandwidthSupport[i][j] == false)
5497 dml_print("DML SUPPORT: TotalVerticalActiveBandwidthSupport failed");
5498 if (v->VRatioInPrefetchSupported[i][j] == false)
5499 dml_print("DML SUPPORT: VRatioInPrefetchSupported failed");
5500 if (v->PTEBufferSizeNotExceeded[i][j] == false)
5501 dml_print("DML SUPPORT: PTEBufferSizeNotExceeded failed");
5502 if (v->NonsupportedDSCInputBPC == true)
5503 dml_print("DML SUPPORT: NonsupportedDSCInputBPC failed");
5504 if (!((v->HostVMEnable == false
5505 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5506 || v->ImmediateFlipSupportedForState[i][j] == true))
5507 dml_print("DML SUPPORT: ImmediateFlipRequirement failed");
5508 if (FMTBufferExceeded == true)
5509 dml_print("DML SUPPORT: FMTBufferExceeded failed");
5510 #endif
5511 }
5512 }
5513 }
5514
5515 {
5516 unsigned int MaximumMPCCombine = 0;
5517 for (i = v->soc.num_states; i >= 0; i--) {
5518 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5519 v->VoltageLevel = i;
5520 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5521 if (v->ModeSupport[i][0] == true) {
5522 MaximumMPCCombine = 0;
5523 } else {
5524 MaximumMPCCombine = 1;
5525 }
5526 }
5527 }
5528 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5529 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5530 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5531 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5532 }
5533 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5534 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5535 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5536 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5537 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5538 v->maxMpcComb = MaximumMPCCombine;
5539 }
5540 }
5541
5542 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5543 struct display_mode_lib *mode_lib,
5544 unsigned int PrefetchMode,
5545 double DCFCLK,
5546 double ReturnBW,
5547 double UrgentLatency,
5548 double ExtraLatency,
5549 double SOCCLK,
5550 double DCFCLKDeepSleep,
5551 unsigned int DETBufferSizeY[],
5552 unsigned int DETBufferSizeC[],
5553 unsigned int SwathHeightY[],
5554 unsigned int SwathHeightC[],
5555 double SwathWidthY[],
5556 double SwathWidthC[],
5557 unsigned int DPPPerPlane[],
5558 double BytePerPixelDETY[],
5559 double BytePerPixelDETC[],
5560 bool UnboundedRequestEnabled,
5561 int unsigned CompressedBufferSizeInkByte,
5562 enum clock_change_support *DRAMClockChangeSupport,
5563 double *StutterExitWatermark,
5564 double *StutterEnterPlusExitWatermark,
5565 double *Z8StutterExitWatermark,
5566 double *Z8StutterEnterPlusExitWatermark)
5567 {
5568 struct vba_vars_st *v = &mode_lib->vba;
5569 double EffectiveLBLatencyHidingY;
5570 double EffectiveLBLatencyHidingC;
5571 double LinesInDETY[DC__NUM_DPP__MAX];
5572 double LinesInDETC;
5573 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5574 unsigned int LinesInDETCRoundedDownToSwath;
5575 double FullDETBufferingTimeY;
5576 double FullDETBufferingTimeC;
5577 double ActiveDRAMClockChangeLatencyMarginY;
5578 double ActiveDRAMClockChangeLatencyMarginC;
5579 double WritebackDRAMClockChangeLatencyMargin;
5580 double PlaneWithMinActiveDRAMClockChangeMargin;
5581 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5582 double WritebackDRAMClockChangeLatencyHiding;
5583 double TotalPixelBW = 0.0;
5584 int k, j;
5585
5586 v->UrgentWatermark = UrgentLatency + ExtraLatency;
5587
5588 #ifdef __DML_VBA_DEBUG__
5589 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
5590 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
5591 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark);
5592 #endif
5593
5594 v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark;
5595
5596 #ifdef __DML_VBA_DEBUG__
5597 dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency);
5598 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark);
5599 #endif
5600
5601 v->TotalActiveWriteback = 0;
5602 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5603 if (v->WritebackEnable[k] == true) {
5604 v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
5605 }
5606 }
5607
5608 if (v->TotalActiveWriteback <= 1) {
5609 v->WritebackUrgentWatermark = v->WritebackLatency;
5610 } else {
5611 v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5612 }
5613
5614 if (v->TotalActiveWriteback <= 1) {
5615 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency;
5616 } else {
5617 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5618 }
5619
5620 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5621 TotalPixelBW = TotalPixelBW
5622 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k])
5623 / (v->HTotal[k] / v->PixelClock[k]);
5624 }
5625
5626 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5627 double EffectiveDETBufferSizeY = DETBufferSizeY[k];
5628
5629 v->LBLatencyHidingSourceLinesY = dml_min(
5630 (double) v->MaxLineBufferLines,
5631 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
5632
5633 v->LBLatencyHidingSourceLinesC = dml_min(
5634 (double) v->MaxLineBufferLines,
5635 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
5636
5637 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
5638
5639 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
5640
5641 if (UnboundedRequestEnabled) {
5642 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
5643 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
5644 }
5645
5646 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5647 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5648 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
5649 if (BytePerPixelDETC[k] > 0) {
5650 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5651 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5652 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k];
5653 } else {
5654 LinesInDETC = 0;
5655 FullDETBufferingTimeC = 999999;
5656 }
5657
5658 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
5659 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5660
5661 if (v->NumberOfActivePlanes > 1) {
5662 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5663 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k];
5664 }
5665
5666 if (BytePerPixelDETC[k] > 0) {
5667 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
5668 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5669
5670 if (v->NumberOfActivePlanes > 1) {
5671 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5672 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k];
5673 }
5674 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5675 } else {
5676 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5677 }
5678
5679 if (v->WritebackEnable[k] == true) {
5680 WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024
5681 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
5682 if (v->WritebackPixelFormat[k] == dm_444_64) {
5683 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5684 }
5685 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
5686 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5687 }
5688 }
5689
5690 v->MinActiveDRAMClockChangeMargin = 999999;
5691 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5692 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5693 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
5694 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
5695 if (v->BlendingAndTiming[k] == k) {
5696 PlaneWithMinActiveDRAMClockChangeMargin = k;
5697 } else {
5698 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
5699 if (v->BlendingAndTiming[k] == j) {
5700 PlaneWithMinActiveDRAMClockChangeMargin = j;
5701 }
5702 }
5703 }
5704 }
5705 }
5706
5707 v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ;
5708
5709 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5710 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5711 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5712 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5713 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
5714 }
5715 }
5716
5717 v->TotalNumberOfActiveOTG = 0;
5718
5719 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5720 if (v->BlendingAndTiming[k] == k) {
5721 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
5722 }
5723 }
5724
5725 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
5726 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5727 } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
5728 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
5729 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5730 } else {
5731 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5732 }
5733
5734 *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5735 *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
5736 *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5737 *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5738
5739 #ifdef __DML_VBA_DEBUG__
5740 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
5741 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
5742 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
5743 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
5744 #endif
5745 }
5746
5747 static void CalculateDCFCLKDeepSleep(
5748 struct display_mode_lib *mode_lib,
5749 unsigned int NumberOfActivePlanes,
5750 int BytePerPixelY[],
5751 int BytePerPixelC[],
5752 double VRatio[],
5753 double VRatioChroma[],
5754 double SwathWidthY[],
5755 double SwathWidthC[],
5756 unsigned int DPPPerPlane[],
5757 double HRatio[],
5758 double HRatioChroma[],
5759 double PixelClock[],
5760 double PSCL_THROUGHPUT[],
5761 double PSCL_THROUGHPUT_CHROMA[],
5762 double DPPCLK[],
5763 double ReadBandwidthLuma[],
5764 double ReadBandwidthChroma[],
5765 int ReturnBusWidth,
5766 double *DCFCLKDeepSleep)
5767 {
5768 struct vba_vars_st *v = &mode_lib->vba;
5769 double DisplayPipeLineDeliveryTimeLuma;
5770 double DisplayPipeLineDeliveryTimeChroma;
5771 double ReadBandwidth = 0.0;
5772 int k;
5773
5774 for (k = 0; k < NumberOfActivePlanes; ++k) {
5775
5776 if (VRatio[k] <= 1) {
5777 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5778 } else {
5779 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5780 }
5781 if (BytePerPixelC[k] == 0) {
5782 DisplayPipeLineDeliveryTimeChroma = 0;
5783 } else {
5784 if (VRatioChroma[k] <= 1) {
5785 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5786 } else {
5787 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5788 }
5789 }
5790
5791 if (BytePerPixelC[k] > 0) {
5792 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
5793 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5794 } else {
5795 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5796 }
5797 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5798
5799 }
5800
5801 for (k = 0; k < NumberOfActivePlanes; ++k) {
5802 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5803 }
5804
5805 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
5806
5807 for (k = 0; k < NumberOfActivePlanes; ++k) {
5808 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]);
5809 }
5810 }
5811
5812 static void CalculateUrgentBurstFactor(
5813 int swath_width_luma_ub,
5814 int swath_width_chroma_ub,
5815 unsigned int SwathHeightY,
5816 unsigned int SwathHeightC,
5817 double LineTime,
5818 double UrgentLatency,
5819 double CursorBufferSize,
5820 unsigned int CursorWidth,
5821 unsigned int CursorBPP,
5822 double VRatio,
5823 double VRatioC,
5824 double BytePerPixelInDETY,
5825 double BytePerPixelInDETC,
5826 double DETBufferSizeY,
5827 double DETBufferSizeC,
5828 double *UrgentBurstFactorCursor,
5829 double *UrgentBurstFactorLuma,
5830 double *UrgentBurstFactorChroma,
5831 bool *NotEnoughUrgentLatencyHiding)
5832 {
5833 double LinesInDETLuma;
5834 double LinesInDETChroma;
5835 unsigned int LinesInCursorBuffer;
5836 double CursorBufferSizeInTime;
5837 double DETBufferSizeInTimeLuma;
5838 double DETBufferSizeInTimeChroma;
5839
5840 *NotEnoughUrgentLatencyHiding = 0;
5841
5842 if (CursorWidth > 0) {
5843 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5844 if (VRatio > 0) {
5845 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5846 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5847 *NotEnoughUrgentLatencyHiding = 1;
5848 *UrgentBurstFactorCursor = 0;
5849 } else {
5850 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5851 }
5852 } else {
5853 *UrgentBurstFactorCursor = 1;
5854 }
5855 }
5856
5857 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5858 if (VRatio > 0) {
5859 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5860 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5861 *NotEnoughUrgentLatencyHiding = 1;
5862 *UrgentBurstFactorLuma = 0;
5863 } else {
5864 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5865 }
5866 } else {
5867 *UrgentBurstFactorLuma = 1;
5868 }
5869
5870 if (BytePerPixelInDETC > 0) {
5871 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5872 if (VRatio > 0) {
5873 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
5874 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5875 *NotEnoughUrgentLatencyHiding = 1;
5876 *UrgentBurstFactorChroma = 0;
5877 } else {
5878 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5879 }
5880 } else {
5881 *UrgentBurstFactorChroma = 1;
5882 }
5883 }
5884 }
5885
5886 static void CalculatePixelDeliveryTimes(
5887 unsigned int NumberOfActivePlanes,
5888 double VRatio[],
5889 double VRatioChroma[],
5890 double VRatioPrefetchY[],
5891 double VRatioPrefetchC[],
5892 unsigned int swath_width_luma_ub[],
5893 unsigned int swath_width_chroma_ub[],
5894 unsigned int DPPPerPlane[],
5895 double HRatio[],
5896 double HRatioChroma[],
5897 double PixelClock[],
5898 double PSCL_THROUGHPUT[],
5899 double PSCL_THROUGHPUT_CHROMA[],
5900 double DPPCLK[],
5901 int BytePerPixelC[],
5902 enum scan_direction_class SourceScan[],
5903 unsigned int NumberOfCursors[],
5904 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
5905 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
5906 unsigned int BlockWidth256BytesY[],
5907 unsigned int BlockHeight256BytesY[],
5908 unsigned int BlockWidth256BytesC[],
5909 unsigned int BlockHeight256BytesC[],
5910 double DisplayPipeLineDeliveryTimeLuma[],
5911 double DisplayPipeLineDeliveryTimeChroma[],
5912 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
5913 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
5914 double DisplayPipeRequestDeliveryTimeLuma[],
5915 double DisplayPipeRequestDeliveryTimeChroma[],
5916 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
5917 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
5918 double CursorRequestDeliveryTime[],
5919 double CursorRequestDeliveryTimePrefetch[])
5920 {
5921 double req_per_swath_ub;
5922 int k;
5923
5924 for (k = 0; k < NumberOfActivePlanes; ++k) {
5925 if (VRatio[k] <= 1) {
5926 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5927 } else {
5928 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5929 }
5930
5931 if (BytePerPixelC[k] == 0) {
5932 DisplayPipeLineDeliveryTimeChroma[k] = 0;
5933 } else {
5934 if (VRatioChroma[k] <= 1) {
5935 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5936 } else {
5937 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5938 }
5939 }
5940
5941 if (VRatioPrefetchY[k] <= 1) {
5942 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5943 } else {
5944 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5945 }
5946
5947 if (BytePerPixelC[k] == 0) {
5948 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
5949 } else {
5950 if (VRatioPrefetchC[k] <= 1) {
5951 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5952 } else {
5953 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5954 }
5955 }
5956 }
5957
5958 for (k = 0; k < NumberOfActivePlanes; ++k) {
5959 if (SourceScan[k] != dm_vert) {
5960 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
5961 } else {
5962 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
5963 }
5964 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
5965 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
5966 if (BytePerPixelC[k] == 0) {
5967 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
5968 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
5969 } else {
5970 if (SourceScan[k] != dm_vert) {
5971 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
5972 } else {
5973 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
5974 }
5975 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
5976 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
5977 }
5978 #ifdef __DML_VBA_DEBUG__
5979 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
5980 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
5981 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
5982 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
5983 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
5984 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
5985 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
5986 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
5987 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
5988 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
5989 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
5990 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
5991 #endif
5992 }
5993
5994 for (k = 0; k < NumberOfActivePlanes; ++k) {
5995 int cursor_req_per_width;
5996 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
5997 if (NumberOfCursors[k] > 0) {
5998 if (VRatio[k] <= 1) {
5999 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6000 } else {
6001 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6002 }
6003 if (VRatioPrefetchY[k] <= 1) {
6004 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6005 } else {
6006 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6007 }
6008 } else {
6009 CursorRequestDeliveryTime[k] = 0;
6010 CursorRequestDeliveryTimePrefetch[k] = 0;
6011 }
6012 #ifdef __DML_VBA_DEBUG__
6013 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
6014 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
6015 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
6016 #endif
6017 }
6018 }
6019
6020 static void CalculateMetaAndPTETimes(
6021 int NumberOfActivePlanes,
6022 bool GPUVMEnable,
6023 int MetaChunkSize,
6024 int MinMetaChunkSizeBytes,
6025 int HTotal[],
6026 double VRatio[],
6027 double VRatioChroma[],
6028 double DestinationLinesToRequestRowInVBlank[],
6029 double DestinationLinesToRequestRowInImmediateFlip[],
6030 bool DCCEnable[],
6031 double PixelClock[],
6032 int BytePerPixelY[],
6033 int BytePerPixelC[],
6034 enum scan_direction_class SourceScan[],
6035 int dpte_row_height[],
6036 int dpte_row_height_chroma[],
6037 int meta_row_width[],
6038 int meta_row_width_chroma[],
6039 int meta_row_height[],
6040 int meta_row_height_chroma[],
6041 int meta_req_width[],
6042 int meta_req_width_chroma[],
6043 int meta_req_height[],
6044 int meta_req_height_chroma[],
6045 int dpte_group_bytes[],
6046 int PTERequestSizeY[],
6047 int PTERequestSizeC[],
6048 int PixelPTEReqWidthY[],
6049 int PixelPTEReqHeightY[],
6050 int PixelPTEReqWidthC[],
6051 int PixelPTEReqHeightC[],
6052 int dpte_row_width_luma_ub[],
6053 int dpte_row_width_chroma_ub[],
6054 double DST_Y_PER_PTE_ROW_NOM_L[],
6055 double DST_Y_PER_PTE_ROW_NOM_C[],
6056 double DST_Y_PER_META_ROW_NOM_L[],
6057 double DST_Y_PER_META_ROW_NOM_C[],
6058 double TimePerMetaChunkNominal[],
6059 double TimePerChromaMetaChunkNominal[],
6060 double TimePerMetaChunkVBlank[],
6061 double TimePerChromaMetaChunkVBlank[],
6062 double TimePerMetaChunkFlip[],
6063 double TimePerChromaMetaChunkFlip[],
6064 double time_per_pte_group_nom_luma[],
6065 double time_per_pte_group_vblank_luma[],
6066 double time_per_pte_group_flip_luma[],
6067 double time_per_pte_group_nom_chroma[],
6068 double time_per_pte_group_vblank_chroma[],
6069 double time_per_pte_group_flip_chroma[])
6070 {
6071 unsigned int meta_chunk_width;
6072 unsigned int min_meta_chunk_width;
6073 unsigned int meta_chunk_per_row_int;
6074 unsigned int meta_row_remainder;
6075 unsigned int meta_chunk_threshold;
6076 unsigned int meta_chunks_per_row_ub;
6077 unsigned int meta_chunk_width_chroma;
6078 unsigned int min_meta_chunk_width_chroma;
6079 unsigned int meta_chunk_per_row_int_chroma;
6080 unsigned int meta_row_remainder_chroma;
6081 unsigned int meta_chunk_threshold_chroma;
6082 unsigned int meta_chunks_per_row_ub_chroma;
6083 unsigned int dpte_group_width_luma;
6084 unsigned int dpte_groups_per_row_luma_ub;
6085 unsigned int dpte_group_width_chroma;
6086 unsigned int dpte_groups_per_row_chroma_ub;
6087 int k;
6088
6089 for (k = 0; k < NumberOfActivePlanes; ++k) {
6090 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
6091 if (BytePerPixelC[k] == 0) {
6092 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
6093 } else {
6094 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
6095 }
6096 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
6097 if (BytePerPixelC[k] == 0) {
6098 DST_Y_PER_META_ROW_NOM_C[k] = 0;
6099 } else {
6100 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
6101 }
6102 }
6103
6104 for (k = 0; k < NumberOfActivePlanes; ++k) {
6105 if (DCCEnable[k] == true) {
6106 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
6107 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
6108 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
6109 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
6110 if (SourceScan[k] != dm_vert) {
6111 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
6112 } else {
6113 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
6114 }
6115 if (meta_row_remainder <= meta_chunk_threshold) {
6116 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
6117 } else {
6118 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
6119 }
6120 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6121 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6122 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6123 if (BytePerPixelC[k] == 0) {
6124 TimePerChromaMetaChunkNominal[k] = 0;
6125 TimePerChromaMetaChunkVBlank[k] = 0;
6126 TimePerChromaMetaChunkFlip[k] = 0;
6127 } else {
6128 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6129 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6130 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
6131 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
6132 if (SourceScan[k] != dm_vert) {
6133 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
6134 } else {
6135 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
6136 }
6137 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
6138 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
6139 } else {
6140 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
6141 }
6142 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6143 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6144 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6145 }
6146 } else {
6147 TimePerMetaChunkNominal[k] = 0;
6148 TimePerMetaChunkVBlank[k] = 0;
6149 TimePerMetaChunkFlip[k] = 0;
6150 TimePerChromaMetaChunkNominal[k] = 0;
6151 TimePerChromaMetaChunkVBlank[k] = 0;
6152 TimePerChromaMetaChunkFlip[k] = 0;
6153 }
6154 }
6155
6156 for (k = 0; k < NumberOfActivePlanes; ++k) {
6157 if (GPUVMEnable == true) {
6158 if (SourceScan[k] != dm_vert) {
6159 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
6160 } else {
6161 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
6162 }
6163 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
6164 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6165 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6166 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6167 if (BytePerPixelC[k] == 0) {
6168 time_per_pte_group_nom_chroma[k] = 0;
6169 time_per_pte_group_vblank_chroma[k] = 0;
6170 time_per_pte_group_flip_chroma[k] = 0;
6171 } else {
6172 if (SourceScan[k] != dm_vert) {
6173 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
6174 } else {
6175 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
6176 }
6177 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
6178 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6179 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6180 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6181 }
6182 } else {
6183 time_per_pte_group_nom_luma[k] = 0;
6184 time_per_pte_group_vblank_luma[k] = 0;
6185 time_per_pte_group_flip_luma[k] = 0;
6186 time_per_pte_group_nom_chroma[k] = 0;
6187 time_per_pte_group_vblank_chroma[k] = 0;
6188 time_per_pte_group_flip_chroma[k] = 0;
6189 }
6190 }
6191 }
6192
6193 static void CalculateVMGroupAndRequestTimes(
6194 unsigned int NumberOfActivePlanes,
6195 bool GPUVMEnable,
6196 unsigned int GPUVMMaxPageTableLevels,
6197 unsigned int HTotal[],
6198 int BytePerPixelC[],
6199 double DestinationLinesToRequestVMInVBlank[],
6200 double DestinationLinesToRequestVMInImmediateFlip[],
6201 bool DCCEnable[],
6202 double PixelClock[],
6203 int dpte_row_width_luma_ub[],
6204 int dpte_row_width_chroma_ub[],
6205 int vm_group_bytes[],
6206 unsigned int dpde0_bytes_per_frame_ub_l[],
6207 unsigned int dpde0_bytes_per_frame_ub_c[],
6208 int meta_pte_bytes_per_frame_ub_l[],
6209 int meta_pte_bytes_per_frame_ub_c[],
6210 double TimePerVMGroupVBlank[],
6211 double TimePerVMGroupFlip[],
6212 double TimePerVMRequestVBlank[],
6213 double TimePerVMRequestFlip[])
6214 {
6215 int num_group_per_lower_vm_stage;
6216 int num_req_per_lower_vm_stage;
6217 int k;
6218
6219 for (k = 0; k < NumberOfActivePlanes; ++k) {
6220 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6221 if (DCCEnable[k] == false) {
6222 if (BytePerPixelC[k] > 0) {
6223 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6224 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6225 } else {
6226 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6227 }
6228 } else {
6229 if (GPUVMMaxPageTableLevels == 1) {
6230 if (BytePerPixelC[k] > 0) {
6231 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6232 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6233 } else {
6234 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6235 }
6236 } else {
6237 if (BytePerPixelC[k] > 0) {
6238 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6239 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6240 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6241 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6242 } else {
6243 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6244 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6245 }
6246 }
6247 }
6248
6249 if (DCCEnable[k] == false) {
6250 if (BytePerPixelC[k] > 0) {
6251 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6252 } else {
6253 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6254 }
6255 } else {
6256 if (GPUVMMaxPageTableLevels == 1) {
6257 if (BytePerPixelC[k] > 0) {
6258 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6259 } else {
6260 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6261 }
6262 } else {
6263 if (BytePerPixelC[k] > 0) {
6264 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
6265 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6266 } else {
6267 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6268 }
6269 }
6270 }
6271
6272 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6273 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6274 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6275 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6276
6277 if (GPUVMMaxPageTableLevels > 2) {
6278 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6279 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6280 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6281 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6282 }
6283
6284 } else {
6285 TimePerVMGroupVBlank[k] = 0;
6286 TimePerVMGroupFlip[k] = 0;
6287 TimePerVMRequestVBlank[k] = 0;
6288 TimePerVMRequestFlip[k] = 0;
6289 }
6290 }
6291 }
6292
6293 static void CalculateStutterEfficiency(
6294 struct display_mode_lib *mode_lib,
6295 int CompressedBufferSizeInkByte,
6296 bool UnboundedRequestEnabled,
6297 int ConfigReturnBufferSizeInKByte,
6298 int MetaFIFOSizeInKEntries,
6299 int ZeroSizeBufferEntries,
6300 int NumberOfActivePlanes,
6301 int ROBBufferSizeInKByte,
6302 double TotalDataReadBandwidth,
6303 double DCFCLK,
6304 double ReturnBW,
6305 double COMPBUF_RESERVED_SPACE_64B,
6306 double COMPBUF_RESERVED_SPACE_ZS,
6307 double SRExitTime,
6308 double SRExitZ8Time,
6309 bool SynchronizedVBlank,
6310 double Z8StutterEnterPlusExitWatermark,
6311 double StutterEnterPlusExitWatermark,
6312 bool ProgressiveToInterlaceUnitInOPP,
6313 bool Interlace[],
6314 double MinTTUVBlank[],
6315 int DPPPerPlane[],
6316 unsigned int DETBufferSizeY[],
6317 int BytePerPixelY[],
6318 double BytePerPixelDETY[],
6319 double SwathWidthY[],
6320 int SwathHeightY[],
6321 int SwathHeightC[],
6322 double NetDCCRateLuma[],
6323 double NetDCCRateChroma[],
6324 double DCCFractionOfZeroSizeRequestsLuma[],
6325 double DCCFractionOfZeroSizeRequestsChroma[],
6326 int HTotal[],
6327 int VTotal[],
6328 double PixelClock[],
6329 double VRatio[],
6330 enum scan_direction_class SourceScan[],
6331 int BlockHeight256BytesY[],
6332 int BlockWidth256BytesY[],
6333 int BlockHeight256BytesC[],
6334 int BlockWidth256BytesC[],
6335 int DCCYMaxUncompressedBlock[],
6336 int DCCCMaxUncompressedBlock[],
6337 int VActive[],
6338 bool DCCEnable[],
6339 bool WritebackEnable[],
6340 double ReadBandwidthPlaneLuma[],
6341 double ReadBandwidthPlaneChroma[],
6342 double meta_row_bw[],
6343 double dpte_row_bw[],
6344 double *StutterEfficiencyNotIncludingVBlank,
6345 double *StutterEfficiency,
6346 int *NumberOfStutterBurstsPerFrame,
6347 double *Z8StutterEfficiencyNotIncludingVBlank,
6348 double *Z8StutterEfficiency,
6349 int *Z8NumberOfStutterBurstsPerFrame,
6350 double *StutterPeriod)
6351 {
6352 struct vba_vars_st *v = &mode_lib->vba;
6353
6354 double DETBufferingTimeY;
6355 double SwathWidthYCriticalPlane = 0;
6356 double VActiveTimeCriticalPlane = 0;
6357 double FrameTimeCriticalPlane = 0;
6358 int BytePerPixelYCriticalPlane = 0;
6359 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6360 double MinTTUVBlankCriticalPlane = 0;
6361 double TotalCompressedReadBandwidth;
6362 double TotalRowReadBandwidth;
6363 double AverageDCCCompressionRate;
6364 double EffectiveCompressedBufferSize;
6365 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
6366 double StutterBurstTime;
6367 int TotalActiveWriteback;
6368 double LinesInDETY;
6369 double LinesInDETYRoundedDownToSwath;
6370 double MaximumEffectiveCompressionLuma;
6371 double MaximumEffectiveCompressionChroma;
6372 double TotalZeroSizeRequestReadBandwidth;
6373 double TotalZeroSizeCompressedReadBandwidth;
6374 double AverageDCCZeroSizeFraction;
6375 double AverageZeroSizeCompressionRate;
6376 int TotalNumberOfActiveOTG = 0;
6377 double LastStutterPeriod = 0.0;
6378 double LastZ8StutterPeriod = 0.0;
6379 int k;
6380
6381 TotalZeroSizeRequestReadBandwidth = 0;
6382 TotalZeroSizeCompressedReadBandwidth = 0;
6383 TotalRowReadBandwidth = 0;
6384 TotalCompressedReadBandwidth = 0;
6385
6386 for (k = 0; k < NumberOfActivePlanes; ++k) {
6387 if (DCCEnable[k] == true) {
6388 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
6389 || DCCYMaxUncompressedBlock[k] < 256) {
6390 MaximumEffectiveCompressionLuma = 2;
6391 } else {
6392 MaximumEffectiveCompressionLuma = 4;
6393 }
6394 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
6395 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
6396 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6397 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
6398 if (ReadBandwidthPlaneChroma[k] > 0) {
6399 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6400 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
6401 MaximumEffectiveCompressionChroma = 2;
6402 } else {
6403 MaximumEffectiveCompressionChroma = 4;
6404 }
6405 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
6406 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
6407 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
6408 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6409 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
6410 }
6411 } else {
6412 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6413 }
6414 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6415 }
6416
6417 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
6418 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
6419
6420 #ifdef __DML_VBA_DEBUG__
6421 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
6422 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
6423 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
6424 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
6425 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
6426 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6427 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
6428 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
6429 #endif
6430
6431 if (AverageDCCZeroSizeFraction == 1) {
6432 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6433 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
6434 } else if (AverageDCCZeroSizeFraction > 0) {
6435 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6436 EffectiveCompressedBufferSize = dml_min(
6437 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6438 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
6439 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
6440 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6441 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6442 dml_print(
6443 "DML::%s: min 2 = %f\n",
6444 __func__,
6445 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
6446 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6447 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6448 } else {
6449 EffectiveCompressedBufferSize = dml_min(
6450 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6451 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
6452 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6453 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
6454 }
6455
6456 #ifdef __DML_VBA_DEBUG__
6457 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
6458 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
6459 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6460 #endif
6461
6462 *StutterPeriod = 0;
6463 for (k = 0; k < NumberOfActivePlanes; ++k) {
6464 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
6465 / BytePerPixelDETY[k] / SwathWidthY[k];
6466 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
6467 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
6468 #ifdef __DML_VBA_DEBUG__
6469 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
6470 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
6471 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
6472 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
6473 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
6474 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
6475 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
6476 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
6477 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6478 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
6479 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
6480 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6481 #endif
6482
6483 if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
6484 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
6485
6486 *StutterPeriod = DETBufferingTimeY;
6487 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
6488 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
6489 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6490 SwathWidthYCriticalPlane = SwathWidthY[k];
6491 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
6492 MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
6493
6494 #ifdef __DML_VBA_DEBUG__
6495 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6496 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
6497 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
6498 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6499 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
6500 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
6501 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
6502 #endif
6503 }
6504 }
6505
6506 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize);
6507 #ifdef __DML_VBA_DEBUG__
6508 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
6509 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6510 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
6511 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
6512 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6513 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
6514 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6515 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
6516 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
6517 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
6518 #endif
6519
6520 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
6521 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
6522 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6523 #ifdef __DML_VBA_DEBUG__
6524 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
6525 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
6526 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
6527 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
6528 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6529 #endif
6530 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6531
6532 dml_print(
6533 "DML::%s: Time to finish residue swath=%f\n",
6534 __func__,
6535 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6536
6537 TotalActiveWriteback = 0;
6538 for (k = 0; k < NumberOfActivePlanes; ++k) {
6539 if (WritebackEnable[k]) {
6540 TotalActiveWriteback = TotalActiveWriteback + 1;
6541 }
6542 }
6543
6544 if (TotalActiveWriteback == 0) {
6545 #ifdef __DML_VBA_DEBUG__
6546 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
6547 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
6548 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
6549 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6550 #endif
6551 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
6552 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
6553 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6554 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6555 } else {
6556 *StutterEfficiencyNotIncludingVBlank = 0.;
6557 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
6558 *NumberOfStutterBurstsPerFrame = 0;
6559 *Z8NumberOfStutterBurstsPerFrame = 0;
6560 }
6561 #ifdef __DML_VBA_DEBUG__
6562 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6563 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6564 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
6565 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
6566 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6567 #endif
6568
6569 for (k = 0; k < NumberOfActivePlanes; ++k) {
6570 if (v->BlendingAndTiming[k] == k) {
6571 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6572 }
6573 }
6574
6575 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6576 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6577
6578 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
6579 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
6580 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6581 } else {
6582 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6583 }
6584 } else {
6585 *StutterEfficiency = 0;
6586 }
6587
6588 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6589 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6590 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
6591 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
6592 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6593 } else {
6594 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6595 }
6596 } else {
6597 *Z8StutterEfficiency = 0.;
6598 }
6599
6600 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6601 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6602 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6603 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6604 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6605 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6606 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6607 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6608 }
6609
6610 static void CalculateSwathAndDETConfiguration(
6611 bool ForceSingleDPP,
6612 int NumberOfActivePlanes,
6613 bool DETSharedByAllDPP,
6614 unsigned int DETBufferSizeInKByteA[],
6615 double MaximumSwathWidthLuma[],
6616 double MaximumSwathWidthChroma[],
6617 enum scan_direction_class SourceScan[],
6618 enum source_format_class SourcePixelFormat[],
6619 enum dm_swizzle_mode SurfaceTiling[],
6620 int ViewportWidth[],
6621 int ViewportHeight[],
6622 int SurfaceWidthY[],
6623 int SurfaceWidthC[],
6624 int SurfaceHeightY[],
6625 int SurfaceHeightC[],
6626 int Read256BytesBlockHeightY[],
6627 int Read256BytesBlockHeightC[],
6628 int Read256BytesBlockWidthY[],
6629 int Read256BytesBlockWidthC[],
6630 enum odm_combine_mode ODMCombineEnabled[],
6631 int BlendingAndTiming[],
6632 int BytePerPixY[],
6633 int BytePerPixC[],
6634 double BytePerPixDETY[],
6635 double BytePerPixDETC[],
6636 int HActive[],
6637 double HRatio[],
6638 double HRatioChroma[],
6639 int DPPPerPlane[],
6640 int swath_width_luma_ub[],
6641 int swath_width_chroma_ub[],
6642 double SwathWidth[],
6643 double SwathWidthChroma[],
6644 int SwathHeightY[],
6645 int SwathHeightC[],
6646 unsigned int DETBufferSizeY[],
6647 unsigned int DETBufferSizeC[],
6648 bool ViewportSizeSupportPerPlane[],
6649 bool *ViewportSizeSupport)
6650 {
6651 int MaximumSwathHeightY[DC__NUM_DPP__MAX];
6652 int MaximumSwathHeightC[DC__NUM_DPP__MAX];
6653 int MinimumSwathHeightY;
6654 int MinimumSwathHeightC;
6655 int RoundedUpMaxSwathSizeBytesY;
6656 int RoundedUpMaxSwathSizeBytesC;
6657 int RoundedUpMinSwathSizeBytesY;
6658 int RoundedUpMinSwathSizeBytesC;
6659 int RoundedUpSwathSizeBytesY;
6660 int RoundedUpSwathSizeBytesC;
6661 double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
6662 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
6663 int k;
6664
6665 CalculateSwathWidth(
6666 ForceSingleDPP,
6667 NumberOfActivePlanes,
6668 SourcePixelFormat,
6669 SourceScan,
6670 ViewportWidth,
6671 ViewportHeight,
6672 SurfaceWidthY,
6673 SurfaceWidthC,
6674 SurfaceHeightY,
6675 SurfaceHeightC,
6676 ODMCombineEnabled,
6677 BytePerPixY,
6678 BytePerPixC,
6679 Read256BytesBlockHeightY,
6680 Read256BytesBlockHeightC,
6681 Read256BytesBlockWidthY,
6682 Read256BytesBlockWidthC,
6683 BlendingAndTiming,
6684 HActive,
6685 HRatio,
6686 DPPPerPlane,
6687 SwathWidthSingleDPP,
6688 SwathWidthSingleDPPChroma,
6689 SwathWidth,
6690 SwathWidthChroma,
6691 MaximumSwathHeightY,
6692 MaximumSwathHeightC,
6693 swath_width_luma_ub,
6694 swath_width_chroma_ub);
6695
6696 *ViewportSizeSupport = true;
6697 for (k = 0; k < NumberOfActivePlanes; ++k) {
6698 unsigned int DETBufferSizeInKByte = DETBufferSizeInKByteA[k];
6699
6700 if (DETSharedByAllDPP && DPPPerPlane[k])
6701 DETBufferSizeInKByte /= DPPPerPlane[k];
6702 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
6703 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
6704 if (SurfaceTiling[k] == dm_sw_linear
6705 || (SourcePixelFormat[k] == dm_444_64
6706 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6707 && SourceScan[k] != dm_vert)) {
6708 MinimumSwathHeightY = MaximumSwathHeightY[k];
6709 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6710 MinimumSwathHeightY = MaximumSwathHeightY[k];
6711 } else {
6712 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6713 }
6714 MinimumSwathHeightC = MaximumSwathHeightC[k];
6715 } else {
6716 if (SurfaceTiling[k] == dm_sw_linear) {
6717 MinimumSwathHeightY = MaximumSwathHeightY[k];
6718 MinimumSwathHeightC = MaximumSwathHeightC[k];
6719 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
6720 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6721 MinimumSwathHeightC = MaximumSwathHeightC[k];
6722 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6723 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6724 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6725 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6726 MinimumSwathHeightY = MaximumSwathHeightY[k];
6727 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6728 } else {
6729 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6730 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6731 }
6732 }
6733
6734 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
6735 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
6736 if (SourcePixelFormat[k] == dm_420_10) {
6737 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6738 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6739 }
6740 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
6741 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
6742 if (SourcePixelFormat[k] == dm_420_10) {
6743 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6744 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6745 }
6746
6747 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6748 SwathHeightY[k] = MaximumSwathHeightY[k];
6749 SwathHeightC[k] = MaximumSwathHeightC[k];
6750 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6751 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6752 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6753 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6754 SwathHeightY[k] = MinimumSwathHeightY;
6755 SwathHeightC[k] = MaximumSwathHeightC[k];
6756 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6757 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6758 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6759 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6760 SwathHeightY[k] = MaximumSwathHeightY[k];
6761 SwathHeightC[k] = MinimumSwathHeightC;
6762 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6763 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6764 } else {
6765 SwathHeightY[k] = MinimumSwathHeightY;
6766 SwathHeightC[k] = MinimumSwathHeightC;
6767 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6768 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6769 }
6770 {
6771 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
6772 if (SwathHeightC[k] == 0) {
6773 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
6774 DETBufferSizeC[k] = 0;
6775 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6776 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
6777 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
6778 } else {
6779 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024);
6780 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
6781 }
6782
6783 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6784 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6785 *ViewportSizeSupport = false;
6786 ViewportSizeSupportPerPlane[k] = false;
6787 } else {
6788 ViewportSizeSupportPerPlane[k] = true;
6789 }
6790 }
6791 }
6792 }
6793
6794 static void CalculateSwathWidth(
6795 bool ForceSingleDPP,
6796 int NumberOfActivePlanes,
6797 enum source_format_class SourcePixelFormat[],
6798 enum scan_direction_class SourceScan[],
6799 int ViewportWidth[],
6800 int ViewportHeight[],
6801 int SurfaceWidthY[],
6802 int SurfaceWidthC[],
6803 int SurfaceHeightY[],
6804 int SurfaceHeightC[],
6805 enum odm_combine_mode ODMCombineEnabled[],
6806 int BytePerPixY[],
6807 int BytePerPixC[],
6808 int Read256BytesBlockHeightY[],
6809 int Read256BytesBlockHeightC[],
6810 int Read256BytesBlockWidthY[],
6811 int Read256BytesBlockWidthC[],
6812 int BlendingAndTiming[],
6813 int HActive[],
6814 double HRatio[],
6815 int DPPPerPlane[],
6816 double SwathWidthSingleDPPY[],
6817 double SwathWidthSingleDPPC[],
6818 double SwathWidthY[],
6819 double SwathWidthC[],
6820 int MaximumSwathHeightY[],
6821 int MaximumSwathHeightC[],
6822 int swath_width_luma_ub[],
6823 int swath_width_chroma_ub[])
6824 {
6825 enum odm_combine_mode MainPlaneODMCombine;
6826 int j, k;
6827
6828 #ifdef __DML_VBA_DEBUG__
6829 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
6830 #endif
6831
6832 for (k = 0; k < NumberOfActivePlanes; ++k) {
6833 if (SourceScan[k] != dm_vert) {
6834 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6835 } else {
6836 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6837 }
6838
6839 #ifdef __DML_VBA_DEBUG__
6840 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
6841 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
6842 #endif
6843
6844 MainPlaneODMCombine = ODMCombineEnabled[k];
6845 for (j = 0; j < NumberOfActivePlanes; ++j) {
6846 if (BlendingAndTiming[k] == j) {
6847 MainPlaneODMCombine = ODMCombineEnabled[j];
6848 }
6849 }
6850
6851 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) {
6852 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
6853 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) {
6854 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
6855 } else if (DPPPerPlane[k] == 2) {
6856 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6857 } else {
6858 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6859 }
6860
6861 #ifdef __DML_VBA_DEBUG__
6862 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
6863 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
6864 #endif
6865
6866 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6867 SwathWidthC[k] = SwathWidthY[k] / 2;
6868 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6869 } else {
6870 SwathWidthC[k] = SwathWidthY[k];
6871 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6872 }
6873
6874 if (ForceSingleDPP == true) {
6875 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6876 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6877 }
6878 {
6879 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6880 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6881
6882 #ifdef __DML_VBA_DEBUG__
6883 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
6884 #endif
6885
6886 if (SourceScan[k] != dm_vert) {
6887 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6888 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6889 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6890 if (BytePerPixC[k] > 0) {
6891 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6892
6893 swath_width_chroma_ub[k] = dml_min(
6894 surface_width_ub_c,
6895 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
6896 } else {
6897 swath_width_chroma_ub[k] = 0;
6898 }
6899 } else {
6900 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
6901 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
6902 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
6903 if (BytePerPixC[k] > 0) {
6904 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6905
6906 swath_width_chroma_ub[k] = dml_min(
6907 surface_height_ub_c,
6908 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
6909 } else {
6910 swath_width_chroma_ub[k] = 0;
6911 }
6912 }
6913 }
6914 }
6915 }
6916
6917 static double CalculateExtraLatency(
6918 int RoundTripPingLatencyCycles,
6919 int ReorderingBytes,
6920 double DCFCLK,
6921 int TotalNumberOfActiveDPP,
6922 int PixelChunkSizeInKByte,
6923 int TotalNumberOfDCCActiveDPP,
6924 int MetaChunkSize,
6925 double ReturnBW,
6926 bool GPUVMEnable,
6927 bool HostVMEnable,
6928 int NumberOfActivePlanes,
6929 int NumberOfDPP[],
6930 int dpte_group_bytes[],
6931 double HostVMInefficiencyFactor,
6932 double HostVMMinPageSize,
6933 int HostVMMaxNonCachedPageTableLevels)
6934 {
6935 double ExtraLatencyBytes;
6936 double ExtraLatency;
6937
6938 ExtraLatencyBytes = CalculateExtraLatencyBytes(
6939 ReorderingBytes,
6940 TotalNumberOfActiveDPP,
6941 PixelChunkSizeInKByte,
6942 TotalNumberOfDCCActiveDPP,
6943 MetaChunkSize,
6944 GPUVMEnable,
6945 HostVMEnable,
6946 NumberOfActivePlanes,
6947 NumberOfDPP,
6948 dpte_group_bytes,
6949 HostVMInefficiencyFactor,
6950 HostVMMinPageSize,
6951 HostVMMaxNonCachedPageTableLevels);
6952
6953 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
6954
6955 #ifdef __DML_VBA_DEBUG__
6956 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
6957 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
6958 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
6959 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
6960 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
6961 #endif
6962
6963 return ExtraLatency;
6964 }
6965
6966 static double CalculateExtraLatencyBytes(
6967 int ReorderingBytes,
6968 int TotalNumberOfActiveDPP,
6969 int PixelChunkSizeInKByte,
6970 int TotalNumberOfDCCActiveDPP,
6971 int MetaChunkSize,
6972 bool GPUVMEnable,
6973 bool HostVMEnable,
6974 int NumberOfActivePlanes,
6975 int NumberOfDPP[],
6976 int dpte_group_bytes[],
6977 double HostVMInefficiencyFactor,
6978 double HostVMMinPageSize,
6979 int HostVMMaxNonCachedPageTableLevels)
6980 {
6981 double ret;
6982 int HostVMDynamicLevels = 0, k;
6983
6984 if (GPUVMEnable == true && HostVMEnable == true) {
6985 if (HostVMMinPageSize < 2048) {
6986 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
6987 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
6988 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
6989 } else {
6990 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
6991 }
6992 } else {
6993 HostVMDynamicLevels = 0;
6994 }
6995
6996 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
6997
6998 if (GPUVMEnable == true) {
6999 for (k = 0; k < NumberOfActivePlanes; ++k) {
7000 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
7001 }
7002 }
7003 return ret;
7004 }
7005
7006 static double CalculateUrgentLatency(
7007 double UrgentLatencyPixelDataOnly,
7008 double UrgentLatencyPixelMixedWithVMData,
7009 double UrgentLatencyVMDataOnly,
7010 bool DoUrgentLatencyAdjustment,
7011 double UrgentLatencyAdjustmentFabricClockComponent,
7012 double UrgentLatencyAdjustmentFabricClockReference,
7013 double FabricClock)
7014 {
7015 double ret;
7016
7017 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
7018 if (DoUrgentLatencyAdjustment == true) {
7019 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
7020 }
7021 return ret;
7022 }
7023
7024 static noinline_for_stack void UseMinimumDCFCLK(
7025 struct display_mode_lib *mode_lib,
7026 int MaxPrefetchMode,
7027 int ReorderingBytes)
7028 {
7029 struct vba_vars_st *v = &mode_lib->vba;
7030 int dummy1, i, j, k;
7031 double NormalEfficiency, dummy2, dummy3;
7032 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
7033
7034 NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
7035 for (i = 0; i < v->soc.num_states; ++i) {
7036 for (j = 0; j <= 1; ++j) {
7037 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
7038 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
7039 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
7040 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
7041 double MinimumTWait;
7042 double NonDPTEBandwidth;
7043 double DPTEBandwidth;
7044 double DCFCLKRequiredForAverageBandwidth;
7045 double ExtraLatencyBytes;
7046 double ExtraLatencyCycles;
7047 double DCFCLKRequiredForPeakBandwidth;
7048 int NoOfDPPState[DC__NUM_DPP__MAX];
7049 double MinimumTvmPlus2Tr0;
7050
7051 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
7052 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7053 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
7054 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
7055 }
7056
7057 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
7058 NoOfDPPState[k] = v->NoOfDPP[i][j][k];
7059 }
7060
7061 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
7062 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
7063 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
7064 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
7065 DCFCLKRequiredForAverageBandwidth = dml_max3(
7066 v->ProjectedDCFCLKDeepSleep[i][j],
7067 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth
7068 / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
7069 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth);
7070
7071 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7072 ReorderingBytes,
7073 v->TotalNumberOfActiveDPP[i][j],
7074 v->PixelChunkSizeInKByte,
7075 v->TotalNumberOfDCCActiveDPP[i][j],
7076 v->MetaChunkSize,
7077 v->GPUVMEnable,
7078 v->HostVMEnable,
7079 v->NumberOfActivePlanes,
7080 NoOfDPPState,
7081 v->dpte_group_bytes,
7082 1,
7083 v->HostVMMinPageSize,
7084 v->HostVMMaxNonCachedPageTableLevels);
7085 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
7086 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7087 double DCFCLKCyclesRequiredInPrefetch;
7088 double ExpectedPrefetchBWAcceleration;
7089 double PrefetchTime;
7090
7091 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
7092 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
7093 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
7094 + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
7095 + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth
7096 + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
7097 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
7098 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
7099 / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
7100 DynamicMetadataVMExtraLatency[k] =
7101 (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
7102 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
7103 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait
7104 - v->UrgLatency[i]
7105 * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2)
7106 * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
7107 - DynamicMetadataVMExtraLatency[k];
7108
7109 if (PrefetchTime > 0) {
7110 double ExpectedVRatioPrefetch;
7111 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
7112 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
7113 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
7114 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
7115 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
7116 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
7117 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth;
7118 }
7119 } else {
7120 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7121 }
7122 if (v->DynamicMetadataEnable[k] == true) {
7123 double TSetupPipe;
7124 double TdmbfPipe;
7125 double TdmsksPipe;
7126 double TdmecPipe;
7127 double AllowedTimeForUrgentExtraLatency;
7128
7129 CalculateVupdateAndDynamicMetadataParameters(
7130 v->MaxInterDCNTileRepeaters,
7131 v->RequiredDPPCLK[i][j][k],
7132 v->RequiredDISPCLK[i][j],
7133 v->ProjectedDCFCLKDeepSleep[i][j],
7134 v->PixelClock[k],
7135 v->HTotal[k],
7136 v->VTotal[k] - v->VActive[k],
7137 v->DynamicMetadataTransmittedBytes[k],
7138 v->DynamicMetadataLinesBeforeActiveRequired[k],
7139 v->Interlace[k],
7140 v->ProgressiveToInterlaceUnitInOPP,
7141 &TSetupPipe,
7142 &TdmbfPipe,
7143 &TdmecPipe,
7144 &TdmsksPipe,
7145 &dummy1,
7146 &dummy2,
7147 &dummy3);
7148 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
7149 - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
7150 if (AllowedTimeForUrgentExtraLatency > 0) {
7151 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
7152 DCFCLKRequiredForPeakBandwidthPerPlane[k],
7153 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
7154 } else {
7155 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7156 }
7157 }
7158 }
7159 DCFCLKRequiredForPeakBandwidth = 0;
7160 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
7161 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
7162 }
7163 MinimumTvmPlus2Tr0 = v->UrgLatency[i]
7164 * (v->GPUVMEnable == true ?
7165 (v->HostVMEnable == true ?
7166 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) :
7167 0);
7168 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7169 double MaximumTvmPlus2Tr0PlusTsw;
7170 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
7171 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
7172 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
7173 } else {
7174 DCFCLKRequiredForPeakBandwidth = dml_max3(
7175 DCFCLKRequiredForPeakBandwidth,
7176 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
7177 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
7178 }
7179 }
7180 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
7181 }
7182 }
7183 }
7184
7185 static void CalculateUnboundedRequestAndCompressedBufferSize(
7186 unsigned int DETBufferSizeInKByte,
7187 int ConfigReturnBufferSizeInKByte,
7188 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
7189 int TotalActiveDPP,
7190 bool NoChromaPlanes,
7191 int MaxNumDPP,
7192 int CompressedBufferSegmentSizeInkByteFinal,
7193 enum output_encoder_class *Output,
7194 bool *UnboundedRequestEnabled,
7195 int *CompressedBufferSizeInkByte)
7196 {
7197 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
7198
7199 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]);
7200 *CompressedBufferSizeInkByte = (
7201 *UnboundedRequestEnabled == true ?
7202 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
7203 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
7204 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
7205
7206 #ifdef __DML_VBA_DEBUG__
7207 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
7208 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
7209 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
7210 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
7211 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
7212 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
7213 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
7214 #endif
7215 }
7216
7217 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
7218 {
7219 bool ret_val = false;
7220
7221 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
7222 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) {
7223 ret_val = false;
7224 }
7225 return (ret_val);
7226 }
7227
7228