1 /* SPDX-License-Identifier: MIT */
2 /*
3 * Copyright 2023 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors: AMD
24 *
25 */
26
27 #include "display_mode_core.h"
28 #include "display_mode_util.h"
29 #include "display_mode_lib_defines.h"
30
31 #include "dml_assert.h"
32
33 #define DML2_MAX_FMT_420_BUFFER_WIDTH 4096
34 #define TB_BORROWED_MAX 400
35 #define DML_MAX_VSTARTUP_START 1023
36
37 // ---------------------------
38 // Declaration Begins
39 // ---------------------------
40 static void CalculateBytePerPixelAndBlockSizes(
41 enum dml_source_format_class SourcePixelFormat,
42 enum dml_swizzle_mode SurfaceTiling,
43 // Output
44 dml_uint_t *BytePerPixelY,
45 dml_uint_t *BytePerPixelC,
46 dml_float_t *BytePerPixelDETY,
47 dml_float_t *BytePerPixelDETC,
48 dml_uint_t *BlockHeight256BytesY,
49 dml_uint_t *BlockHeight256BytesC,
50 dml_uint_t *BlockWidth256BytesY,
51 dml_uint_t *BlockWidth256BytesC,
52 dml_uint_t *MacroTileHeightY,
53 dml_uint_t *MacroTileHeightC,
54 dml_uint_t *MacroTileWidthY,
55 dml_uint_t *MacroTileWidthC);
56
57 static dml_float_t CalculateWriteBackDISPCLK(
58 enum dml_source_format_class WritebackPixelFormat,
59 dml_float_t PixelClock,
60 dml_float_t WritebackHRatio,
61 dml_float_t WritebackVRatio,
62 dml_uint_t WritebackHTaps,
63 dml_uint_t WritebackVTaps,
64 dml_uint_t WritebackSourceWidth,
65 dml_uint_t WritebackDestinationWidth,
66 dml_uint_t HTotal,
67 dml_uint_t WritebackLineBufferSize,
68 dml_float_t DISPCLKDPPCLKVCOSpeed);
69
70 static void CalculateVMRowAndSwath(
71 struct display_mode_lib_scratch_st *s,
72 struct CalculateVMRowAndSwath_params_st *p);
73
74 static void CalculateOutputLink(
75 dml_float_t PHYCLKPerState,
76 dml_float_t PHYCLKD18PerState,
77 dml_float_t PHYCLKD32PerState,
78 dml_float_t Downspreading,
79 dml_bool_t IsMainSurfaceUsingTheIndicatedTiming,
80 enum dml_output_encoder_class Output,
81 enum dml_output_format_class OutputFormat,
82 dml_uint_t HTotal,
83 dml_uint_t HActive,
84 dml_float_t PixelClockBackEnd,
85 dml_float_t ForcedOutputLinkBPP,
86 dml_uint_t DSCInputBitPerComponent,
87 dml_uint_t NumberOfDSCSlices,
88 dml_float_t AudioSampleRate,
89 dml_uint_t AudioSampleLayout,
90 enum dml_odm_mode ODMModeNoDSC,
91 enum dml_odm_mode ODMModeDSC,
92 enum dml_dsc_enable DSCEnable,
93 dml_uint_t OutputLinkDPLanes,
94 enum dml_output_link_dp_rate OutputLinkDPRate,
95
96 // Output
97 dml_bool_t *RequiresDSC,
98 dml_bool_t *RequiresFEC,
99 dml_float_t *OutBpp,
100 enum dml_output_type_and_rate__type *OutputType,
101 enum dml_output_type_and_rate__rate *OutputRate,
102 dml_uint_t *RequiredSlots);
103
104 static void CalculateODMMode(
105 dml_uint_t MaximumPixelsPerLinePerDSCUnit,
106 dml_uint_t HActive,
107 enum dml_output_encoder_class Output,
108 enum dml_output_format_class OutputFormat,
109 enum dml_odm_use_policy ODMUse,
110 dml_float_t StateDispclk,
111 dml_float_t MaxDispclk,
112 dml_bool_t DSCEnable,
113 dml_uint_t TotalNumberOfActiveDPP,
114 dml_uint_t MaxNumDPP,
115 dml_float_t PixelClock,
116 dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
117 dml_float_t DISPCLKRampingMargin,
118 dml_float_t DISPCLKDPPCLKVCOSpeed,
119 dml_uint_t NumberOfDSCSlices,
120
121 // Output
122 dml_bool_t *TotalAvailablePipesSupport,
123 dml_uint_t *NumberOfDPP,
124 enum dml_odm_mode *ODMMode,
125 dml_float_t *RequiredDISPCLKPerSurface);
126
127 static dml_float_t CalculateRequiredDispclk(
128 enum dml_odm_mode ODMMode,
129 dml_float_t PixelClock,
130 dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
131 dml_float_t DISPCLKRampingMargin,
132 dml_float_t DISPCLKDPPCLKVCOSpeed,
133 dml_float_t MaxDispclkSingle);
134
135 static void CalculateSinglePipeDPPCLKAndSCLThroughput(
136 dml_float_t HRatio,
137 dml_float_t HRatioChroma,
138 dml_float_t VRatio,
139 dml_float_t VRatioChroma,
140 dml_float_t MaxDCHUBToPSCLThroughput,
141 dml_float_t MaxPSCLToLBThroughput,
142 dml_float_t PixelClock,
143 enum dml_source_format_class SourcePixelFormat,
144 dml_uint_t HTaps,
145 dml_uint_t HTapsChroma,
146 dml_uint_t VTaps,
147 dml_uint_t VTapsChroma,
148
149 // Output
150 dml_float_t *PSCL_THROUGHPUT,
151 dml_float_t *PSCL_THROUGHPUT_CHROMA,
152 dml_float_t *DPPCLKUsingSingleDPP);
153
154 static void CalculateDPPCLK(
155 dml_uint_t NumberOfActiveSurfaces,
156 dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
157 dml_float_t DISPCLKDPPCLKVCOSpeed,
158 dml_float_t DPPCLKUsingSingleDPP[],
159 dml_uint_t DPPPerSurface[],
160
161 // Output
162 dml_float_t *GlobalDPPCLK,
163 dml_float_t Dppclk[]);
164
165 static void CalculateMALLUseForStaticScreen(
166 dml_uint_t NumberOfActiveSurfaces,
167 dml_uint_t MALLAllocatedForDCNFinal,
168 enum dml_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
169 dml_uint_t SurfaceSizeInMALL[],
170 dml_bool_t one_row_per_frame_fits_in_buffer[],
171
172 // Output
173 dml_bool_t UsesMALLForStaticScreen[]);
174
175 static dml_uint_t dscceComputeDelay(
176 dml_uint_t bpc,
177 dml_float_t BPP,
178 dml_uint_t sliceWidth,
179 dml_uint_t numSlices,
180 enum dml_output_format_class pixelFormat,
181 enum dml_output_encoder_class Output);
182
183 static dml_uint_t dscComputeDelay(enum dml_output_format_class pixelFormat,
184 enum dml_output_encoder_class Output);
185
186 static dml_bool_t CalculatePrefetchSchedule(struct display_mode_lib_scratch_st *scratch,
187 struct CalculatePrefetchSchedule_params_st *p);
188
189 static dml_float_t RoundToDFSGranularity(dml_float_t Clock, dml_bool_t round_up, dml_float_t VCOSpeed);
190
191 static void CalculateDCCConfiguration(
192 dml_bool_t DCCEnabled,
193 dml_bool_t DCCProgrammingAssumesScanDirectionUnknown,
194 enum dml_source_format_class SourcePixelFormat,
195 dml_uint_t SurfaceWidthLuma,
196 dml_uint_t SurfaceWidthChroma,
197 dml_uint_t SurfaceHeightLuma,
198 dml_uint_t SurfaceHeightChroma,
199 dml_uint_t nomDETInKByte,
200 dml_uint_t RequestHeight256ByteLuma,
201 dml_uint_t RequestHeight256ByteChroma,
202 enum dml_swizzle_mode TilingFormat,
203 dml_uint_t BytePerPixelY,
204 dml_uint_t BytePerPixelC,
205 dml_float_t BytePerPixelDETY,
206 dml_float_t BytePerPixelDETC,
207 enum dml_rotation_angle SourceScan,
208 // Output
209 dml_uint_t *MaxUncompressedBlockLuma,
210 dml_uint_t *MaxUncompressedBlockChroma,
211 dml_uint_t *MaxCompressedBlockLuma,
212 dml_uint_t *MaxCompressedBlockChroma,
213 dml_uint_t *IndependentBlockLuma,
214 dml_uint_t *IndependentBlockChroma);
215
216 static dml_uint_t CalculatePrefetchSourceLines(
217 dml_float_t VRatio,
218 dml_uint_t VTaps,
219 dml_bool_t Interlace,
220 dml_bool_t ProgressiveToInterlaceUnitInOPP,
221 dml_uint_t SwathHeight,
222 enum dml_rotation_angle SourceScan,
223 dml_bool_t ViewportStationary,
224 dml_uint_t SwathWidth,
225 dml_uint_t ViewportHeight,
226 dml_uint_t ViewportXStart,
227 dml_uint_t ViewportYStart,
228
229 // Output
230 dml_uint_t *VInitPreFill,
231 dml_uint_t *MaxNumSwath);
232
233 static dml_uint_t CalculateVMAndRowBytes(
234 dml_bool_t ViewportStationary,
235 dml_bool_t DCCEnable,
236 dml_uint_t NumberOfDPPs,
237 dml_uint_t BlockHeight256Bytes,
238 dml_uint_t BlockWidth256Bytes,
239 enum dml_source_format_class SourcePixelFormat,
240 dml_uint_t SurfaceTiling,
241 dml_uint_t BytePerPixel,
242 enum dml_rotation_angle SourceScan,
243 dml_uint_t SwathWidth,
244 dml_uint_t ViewportHeight,
245 dml_uint_t ViewportXStart,
246 dml_uint_t ViewportYStart,
247 dml_bool_t GPUVMEnable,
248 dml_uint_t GPUVMMaxPageTableLevels,
249 dml_uint_t GPUVMMinPageSizeKBytes,
250 dml_uint_t PTEBufferSizeInRequests,
251 dml_uint_t Pitch,
252 dml_uint_t DCCMetaPitch,
253 dml_uint_t MacroTileWidth,
254 dml_uint_t MacroTileHeight,
255
256 // Output
257 dml_uint_t *MetaRowByte,
258 dml_uint_t *PixelPTEBytesPerRow,
259 dml_uint_t *PixelPTEBytesPerRowStorage, // for PTE buffer size check
260 dml_uint_t *dpte_row_width_ub,
261 dml_uint_t *dpte_row_height,
262 dml_uint_t *dpte_row_height_linear,
263 dml_uint_t *PixelPTEBytesPerRow_one_row_per_frame,
264 dml_uint_t *dpte_row_width_ub_one_row_per_frame,
265 dml_uint_t *dpte_row_height_one_row_per_frame,
266 dml_uint_t *MetaRequestWidth,
267 dml_uint_t *MetaRequestHeight,
268 dml_uint_t *meta_row_width,
269 dml_uint_t *meta_row_height,
270 dml_uint_t *PixelPTEReqWidth,
271 dml_uint_t *PixelPTEReqHeight,
272 dml_uint_t *PTERequestSize,
273 dml_uint_t *DPDE0BytesFrame,
274 dml_uint_t *MetaPTEBytesFrame);
275
276 static dml_float_t CalculateTWait(
277 dml_uint_t PrefetchMode,
278 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,
279 dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
280 dml_bool_t DRRDisplay,
281 dml_float_t DRAMClockChangeLatency,
282 dml_float_t FCLKChangeLatency,
283 dml_float_t UrgentLatency,
284 dml_float_t SREnterPlusExitTime);
285
286 static void CalculatePrefetchMode(
287 enum dml_prefetch_modes AllowForPStateChangeOrStutterInVBlank,
288 dml_uint_t *MinPrefetchMode,
289 dml_uint_t *MaxPrefetchMode);
290
291 static void CalculateRowBandwidth(
292 dml_bool_t GPUVMEnable,
293 enum dml_source_format_class SourcePixelFormat,
294 dml_float_t VRatio,
295 dml_float_t VRatioChroma,
296 dml_bool_t DCCEnable,
297 dml_float_t LineTime,
298 dml_uint_t MetaRowByteLuma,
299 dml_uint_t MetaRowByteChroma,
300 dml_uint_t meta_row_height_luma,
301 dml_uint_t meta_row_height_chroma,
302 dml_uint_t PixelPTEBytesPerRowLuma,
303 dml_uint_t PixelPTEBytesPerRowChroma,
304 dml_uint_t dpte_row_height_luma,
305 dml_uint_t dpte_row_height_chroma,
306 // Output
307 dml_float_t *meta_row_bw,
308 dml_float_t *dpte_row_bw);
309
310 static void CalculateFlipSchedule(
311 dml_float_t HostVMInefficiencyFactor,
312 dml_float_t UrgentExtraLatency,
313 dml_float_t UrgentLatency,
314 dml_uint_t GPUVMMaxPageTableLevels,
315 dml_bool_t HostVMEnable,
316 dml_uint_t HostVMMaxNonCachedPageTableLevels,
317 dml_bool_t GPUVMEnable,
318 dml_uint_t HostVMMinPageSize,
319 dml_float_t PDEAndMetaPTEBytesPerFrame,
320 dml_float_t MetaRowBytes,
321 dml_float_t DPTEBytesPerRow,
322 dml_float_t BandwidthAvailableForImmediateFlip,
323 dml_uint_t TotImmediateFlipBytes,
324 enum dml_source_format_class SourcePixelFormat,
325 dml_float_t LineTime,
326 dml_float_t VRatio,
327 dml_float_t VRatioChroma,
328 dml_float_t Tno_bw,
329 dml_bool_t DCCEnable,
330 dml_uint_t dpte_row_height,
331 dml_uint_t meta_row_height,
332 dml_uint_t dpte_row_height_chroma,
333 dml_uint_t meta_row_height_chroma,
334 dml_bool_t use_one_row_for_frame_flip,
335
336 // Output
337 dml_float_t *DestinationLinesToRequestVMInImmediateFlip,
338 dml_float_t *DestinationLinesToRequestRowInImmediateFlip,
339 dml_float_t *final_flip_bw,
340 dml_bool_t *ImmediateFlipSupportedForPipe);
341
342 static dml_float_t CalculateWriteBackDelay(
343 enum dml_source_format_class WritebackPixelFormat,
344 dml_float_t WritebackHRatio,
345 dml_float_t WritebackVRatio,
346 dml_uint_t WritebackVTaps,
347 dml_uint_t WritebackDestinationWidth,
348 dml_uint_t WritebackDestinationHeight,
349 dml_uint_t WritebackSourceHeight,
350 dml_uint_t HTotal);
351
352 static void CalculateVUpdateAndDynamicMetadataParameters(
353 dml_uint_t MaxInterDCNTileRepeaters,
354 dml_float_t Dppclk,
355 dml_float_t DISPCLK,
356 dml_float_t DCFClkDeepSleep,
357 dml_float_t PixelClock,
358 dml_uint_t HTotal,
359 dml_uint_t VBlank,
360 dml_uint_t DynamicMetadataTransmittedBytes,
361 dml_uint_t DynamicMetadataLinesBeforeActiveRequired,
362 dml_uint_t InterlaceEnable,
363 dml_bool_t ProgressiveToInterlaceUnitInOPP,
364 dml_float_t *TSetup,
365 dml_float_t *Tdmbf,
366 dml_float_t *Tdmec,
367 dml_float_t *Tdmsks,
368 dml_uint_t *VUpdateOffsetPix,
369 dml_uint_t *VUpdateWidthPix,
370 dml_uint_t *VReadyOffsetPix);
371
372 static void PixelClockAdjustmentForProgressiveToInterlaceUnit(struct dml_display_cfg_st *display_cfg, dml_bool_t ptoi_supported);
373
374 static dml_float_t TruncToValidBPP(
375 dml_float_t LinkBitRate,
376 dml_uint_t Lanes,
377 dml_uint_t HTotal,
378 dml_uint_t HActive,
379 dml_float_t PixelClock,
380 dml_float_t DesiredBPP,
381 dml_bool_t DSCEnable,
382 enum dml_output_encoder_class Output,
383 enum dml_output_format_class Format,
384 dml_uint_t DSCInputBitPerComponent,
385 dml_uint_t DSCSlices,
386 dml_uint_t AudioRate,
387 dml_uint_t AudioLayout,
388 enum dml_odm_mode ODMModeNoDSC,
389 enum dml_odm_mode ODMModeDSC,
390 // Output
391 dml_uint_t *RequiredSlotsSingle);
392
393 static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
394 struct display_mode_lib_scratch_st *s,
395 struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *p);
396
397 static void CalculateDCFCLKDeepSleep(
398 dml_uint_t NumberOfActiveSurfaces,
399 dml_uint_t BytePerPixelY[],
400 dml_uint_t BytePerPixelC[],
401 dml_float_t VRatio[],
402 dml_float_t VRatioChroma[],
403 dml_uint_t SwathWidthY[],
404 dml_uint_t SwathWidthC[],
405 dml_uint_t DPPPerSurface[],
406 dml_float_t HRatio[],
407 dml_float_t HRatioChroma[],
408 dml_float_t PixelClock[],
409 dml_float_t PSCL_THROUGHPUT[],
410 dml_float_t PSCL_THROUGHPUT_CHROMA[],
411 dml_float_t Dppclk[],
412 dml_float_t ReadBandwidthLuma[],
413 dml_float_t ReadBandwidthChroma[],
414 dml_uint_t ReturnBusWidth,
415
416 // Output
417 dml_float_t *DCFCLKDeepSleep);
418
419 static void CalculateUrgentBurstFactor(
420 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,
421 dml_uint_t swath_width_luma_ub,
422 dml_uint_t swath_width_chroma_ub,
423 dml_uint_t SwathHeightY,
424 dml_uint_t SwathHeightC,
425 dml_float_t LineTime,
426 dml_float_t UrgentLatency,
427 dml_float_t CursorBufferSize,
428 dml_uint_t CursorWidth,
429 dml_uint_t CursorBPP,
430 dml_float_t VRatio,
431 dml_float_t VRatioC,
432 dml_float_t BytePerPixelInDETY,
433 dml_float_t BytePerPixelInDETC,
434 dml_uint_t DETBufferSizeY,
435 dml_uint_t DETBufferSizeC,
436 // Output
437 dml_float_t *UrgentBurstFactorCursor,
438 dml_float_t *UrgentBurstFactorLuma,
439 dml_float_t *UrgentBurstFactorChroma,
440 dml_bool_t *NotEnoughUrgentLatencyHiding);
441
442 static dml_float_t RequiredDTBCLK(
443 dml_bool_t DSCEnable,
444 dml_float_t PixelClock,
445 enum dml_output_format_class OutputFormat,
446 dml_float_t OutputBpp,
447 dml_uint_t DSCSlices,
448 dml_uint_t HTotal,
449 dml_uint_t HActive,
450 dml_uint_t AudioRate,
451 dml_uint_t AudioLayoutSingle);
452
453 static void UseMinimumDCFCLK(
454 struct display_mode_lib_scratch_st *scratch,
455 struct UseMinimumDCFCLK_params_st *p);
456
457 static void CalculatePixelDeliveryTimes(
458 dml_uint_t NumberOfActiveSurfaces,
459 dml_float_t VRatio[],
460 dml_float_t VRatioChroma[],
461 dml_float_t VRatioPrefetchY[],
462 dml_float_t VRatioPrefetchC[],
463 dml_uint_t swath_width_luma_ub[],
464 dml_uint_t swath_width_chroma_ub[],
465 dml_uint_t DPPPerSurface[],
466 dml_float_t HRatio[],
467 dml_float_t HRatioChroma[],
468 dml_float_t PixelClock[],
469 dml_float_t PSCL_THROUGHPUT[],
470 dml_float_t PSCL_THROUGHPUT_CHROMA[],
471 dml_float_t Dppclk[],
472 dml_uint_t BytePerPixelC[],
473 enum dml_rotation_angle SourceScan[],
474 dml_uint_t NumberOfCursors[],
475 dml_uint_t CursorWidth[],
476 dml_uint_t CursorBPP[],
477 dml_uint_t BlockWidth256BytesY[],
478 dml_uint_t BlockHeight256BytesY[],
479 dml_uint_t BlockWidth256BytesC[],
480 dml_uint_t BlockHeight256BytesC[],
481
482 // Output
483 dml_float_t DisplayPipeLineDeliveryTimeLuma[],
484 dml_float_t DisplayPipeLineDeliveryTimeChroma[],
485 dml_float_t DisplayPipeLineDeliveryTimeLumaPrefetch[],
486 dml_float_t DisplayPipeLineDeliveryTimeChromaPrefetch[],
487 dml_float_t DisplayPipeRequestDeliveryTimeLuma[],
488 dml_float_t DisplayPipeRequestDeliveryTimeChroma[],
489 dml_float_t DisplayPipeRequestDeliveryTimeLumaPrefetch[],
490 dml_float_t DisplayPipeRequestDeliveryTimeChromaPrefetch[],
491 dml_float_t CursorRequestDeliveryTime[],
492 dml_float_t CursorRequestDeliveryTimePrefetch[]);
493
494 static void CalculateMetaAndPTETimes(
495 dml_bool_t use_one_row_for_frame[],
496 dml_uint_t NumberOfActiveSurfaces,
497 dml_bool_t GPUVMEnable,
498 dml_uint_t MetaChunkSize,
499 dml_uint_t MinMetaChunkSizeBytes,
500 dml_uint_t HTotal[],
501 dml_float_t VRatio[],
502 dml_float_t VRatioChroma[],
503 dml_float_t DestinationLinesToRequestRowInVBlank[],
504 dml_float_t DestinationLinesToRequestRowInImmediateFlip[],
505 dml_bool_t DCCEnable[],
506 dml_float_t PixelClock[],
507 dml_uint_t BytePerPixelY[],
508 dml_uint_t BytePerPixelC[],
509 enum dml_rotation_angle SourceScan[],
510 dml_uint_t dpte_row_height[],
511 dml_uint_t dpte_row_height_chroma[],
512 dml_uint_t meta_row_width[],
513 dml_uint_t meta_row_width_chroma[],
514 dml_uint_t meta_row_height[],
515 dml_uint_t meta_row_height_chroma[],
516 dml_uint_t meta_req_width[],
517 dml_uint_t meta_req_width_chroma[],
518 dml_uint_t meta_req_height[],
519 dml_uint_t meta_req_height_chroma[],
520 dml_uint_t dpte_group_bytes[],
521 dml_uint_t PTERequestSizeY[],
522 dml_uint_t PTERequestSizeC[],
523 dml_uint_t PixelPTEReqWidthY[],
524 dml_uint_t PixelPTEReqHeightY[],
525 dml_uint_t PixelPTEReqWidthC[],
526 dml_uint_t PixelPTEReqHeightC[],
527 dml_uint_t dpte_row_width_luma_ub[],
528 dml_uint_t dpte_row_width_chroma_ub[],
529
530 // Output
531 dml_float_t DST_Y_PER_PTE_ROW_NOM_L[],
532 dml_float_t DST_Y_PER_PTE_ROW_NOM_C[],
533 dml_float_t DST_Y_PER_META_ROW_NOM_L[],
534 dml_float_t DST_Y_PER_META_ROW_NOM_C[],
535 dml_float_t TimePerMetaChunkNominal[],
536 dml_float_t TimePerChromaMetaChunkNominal[],
537 dml_float_t TimePerMetaChunkVBlank[],
538 dml_float_t TimePerChromaMetaChunkVBlank[],
539 dml_float_t TimePerMetaChunkFlip[],
540 dml_float_t TimePerChromaMetaChunkFlip[],
541 dml_float_t time_per_pte_group_nom_luma[],
542 dml_float_t time_per_pte_group_vblank_luma[],
543 dml_float_t time_per_pte_group_flip_luma[],
544 dml_float_t time_per_pte_group_nom_chroma[],
545 dml_float_t time_per_pte_group_vblank_chroma[],
546 dml_float_t time_per_pte_group_flip_chroma[]);
547
548 static void CalculateVMGroupAndRequestTimes(
549 dml_uint_t NumberOfActiveSurfaces,
550 dml_bool_t GPUVMEnable,
551 dml_uint_t GPUVMMaxPageTableLevels,
552 dml_uint_t HTotal[],
553 dml_uint_t BytePerPixelC[],
554 dml_float_t DestinationLinesToRequestVMInVBlank[],
555 dml_float_t DestinationLinesToRequestVMInImmediateFlip[],
556 dml_bool_t DCCEnable[],
557 dml_float_t PixelClock[],
558 dml_uint_t dpte_row_width_luma_ub[],
559 dml_uint_t dpte_row_width_chroma_ub[],
560 dml_uint_t vm_group_bytes[],
561 dml_uint_t dpde0_bytes_per_frame_ub_l[],
562 dml_uint_t dpde0_bytes_per_frame_ub_c[],
563 dml_uint_t meta_pte_bytes_per_frame_ub_l[],
564 dml_uint_t meta_pte_bytes_per_frame_ub_c[],
565
566 // Output
567 dml_float_t TimePerVMGroupVBlank[],
568 dml_float_t TimePerVMGroupFlip[],
569 dml_float_t TimePerVMRequestVBlank[],
570 dml_float_t TimePerVMRequestFlip[]);
571
572 static void CalculateStutterEfficiency(
573 struct display_mode_lib_scratch_st *scratch,
574 struct CalculateStutterEfficiency_params_st *p);
575
576 static void CalculateSwathAndDETConfiguration(
577 struct display_mode_lib_scratch_st *scratch,
578 struct CalculateSwathAndDETConfiguration_params_st *p);
579
580 static void CalculateSwathWidth(
581 dml_bool_t ForceSingleDPP,
582 dml_uint_t NumberOfActiveSurfaces,
583 enum dml_source_format_class SourcePixelFormat[],
584 enum dml_rotation_angle SourceScan[],
585 dml_bool_t ViewportStationary[],
586 dml_uint_t ViewportWidth[],
587 dml_uint_t ViewportHeight[],
588 dml_uint_t ViewportXStart[],
589 dml_uint_t ViewportYStart[],
590 dml_uint_t ViewportXStartC[],
591 dml_uint_t ViewportYStartC[],
592 dml_uint_t SurfaceWidthY[],
593 dml_uint_t SurfaceWidthC[],
594 dml_uint_t SurfaceHeightY[],
595 dml_uint_t SurfaceHeightC[],
596 enum dml_odm_mode ODMMode[],
597 dml_uint_t BytePerPixY[],
598 dml_uint_t BytePerPixC[],
599 dml_uint_t Read256BytesBlockHeightY[],
600 dml_uint_t Read256BytesBlockHeightC[],
601 dml_uint_t Read256BytesBlockWidthY[],
602 dml_uint_t Read256BytesBlockWidthC[],
603 dml_uint_t BlendingAndTiming[],
604 dml_uint_t HActive[],
605 dml_float_t HRatio[],
606 dml_uint_t DPPPerSurface[],
607
608 // Output
609 dml_uint_t SwathWidthSingleDPPY[],
610 dml_uint_t SwathWidthSingleDPPC[],
611 dml_uint_t SwathWidthY[],
612 dml_uint_t SwathWidthC[],
613 dml_uint_t MaximumSwathHeightY[],
614 dml_uint_t MaximumSwathHeightC[],
615 dml_uint_t swath_width_luma_ub[],
616 dml_uint_t swath_width_chroma_ub[]);
617
618 static dml_float_t CalculateExtraLatency(
619 dml_uint_t RoundTripPingLatencyCycles,
620 dml_uint_t ReorderingBytes,
621 dml_float_t DCFCLK,
622 dml_uint_t TotalNumberOfActiveDPP,
623 dml_uint_t PixelChunkSizeInKByte,
624 dml_uint_t TotalNumberOfDCCActiveDPP,
625 dml_uint_t MetaChunkSize,
626 dml_float_t ReturnBW,
627 dml_bool_t GPUVMEnable,
628 dml_bool_t HostVMEnable,
629 dml_uint_t NumberOfActiveSurfaces,
630 dml_uint_t NumberOfDPP[],
631 dml_uint_t dpte_group_bytes[],
632 dml_float_t HostVMInefficiencyFactor,
633 dml_uint_t HostVMMinPageSize,
634 dml_uint_t HostVMMaxNonCachedPageTableLevels);
635
636 static dml_uint_t CalculateExtraLatencyBytes(
637 dml_uint_t ReorderingBytes,
638 dml_uint_t TotalNumberOfActiveDPP,
639 dml_uint_t PixelChunkSizeInKByte,
640 dml_uint_t TotalNumberOfDCCActiveDPP,
641 dml_uint_t MetaChunkSize,
642 dml_bool_t GPUVMEnable,
643 dml_bool_t HostVMEnable,
644 dml_uint_t NumberOfActiveSurfaces,
645 dml_uint_t NumberOfDPP[],
646 dml_uint_t dpte_group_bytes[],
647 dml_float_t HostVMInefficiencyFactor,
648 dml_uint_t HostVMMinPageSize,
649 dml_uint_t HostVMMaxNonCachedPageTableLevels);
650
651 static dml_float_t CalculateUrgentLatency(
652 dml_float_t UrgentLatencyPixelDataOnly,
653 dml_float_t UrgentLatencyPixelMixedWithVMData,
654 dml_float_t UrgentLatencyVMDataOnly,
655 dml_bool_t DoUrgentLatencyAdjustment,
656 dml_float_t UrgentLatencyAdjustmentFabricClockComponent,
657 dml_float_t UrgentLatencyAdjustmentFabricClockReference,
658 dml_float_t FabricClockSingle);
659
660 static dml_bool_t UnboundedRequest(
661 enum dml_unbounded_requesting_policy UseUnboundedRequestingFinal,
662 dml_uint_t TotalNumberOfActiveDPP,
663 dml_bool_t NoChromaOrLinear,
664 enum dml_output_encoder_class Output);
665
666 static void CalculateSurfaceSizeInMall(
667 dml_uint_t NumberOfActiveSurfaces,
668 dml_uint_t MALLAllocatedForDCN,
669 enum dml_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
670 dml_bool_t DCCEnable[],
671 dml_bool_t ViewportStationary[],
672 dml_uint_t ViewportXStartY[],
673 dml_uint_t ViewportYStartY[],
674 dml_uint_t ViewportXStartC[],
675 dml_uint_t ViewportYStartC[],
676 dml_uint_t ViewportWidthY[],
677 dml_uint_t ViewportHeightY[],
678 dml_uint_t BytesPerPixelY[],
679 dml_uint_t ViewportWidthC[],
680 dml_uint_t ViewportHeightC[],
681 dml_uint_t BytesPerPixelC[],
682 dml_uint_t SurfaceWidthY[],
683 dml_uint_t SurfaceWidthC[],
684 dml_uint_t SurfaceHeightY[],
685 dml_uint_t SurfaceHeightC[],
686 dml_uint_t Read256BytesBlockWidthY[],
687 dml_uint_t Read256BytesBlockWidthC[],
688 dml_uint_t Read256BytesBlockHeightY[],
689 dml_uint_t Read256BytesBlockHeightC[],
690 dml_uint_t ReadBlockWidthY[],
691 dml_uint_t ReadBlockWidthC[],
692 dml_uint_t ReadBlockHeightY[],
693 dml_uint_t ReadBlockHeightC[],
694
695 // Output
696 dml_uint_t SurfaceSizeInMALL[],
697 dml_bool_t *ExceededMALLSize);
698
699 static void CalculateDETBufferSize(
700 dml_uint_t DETSizeOverride[],
701 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
702 dml_bool_t ForceSingleDPP,
703 dml_uint_t NumberOfActiveSurfaces,
704 dml_bool_t UnboundedRequestEnabled,
705 dml_uint_t nomDETInKByte,
706 dml_uint_t MaxTotalDETInKByte,
707 dml_uint_t ConfigReturnBufferSizeInKByte,
708 dml_uint_t MinCompressedBufferSizeInKByte,
709 dml_uint_t ConfigReturnBufferSegmentSizeInkByte,
710 dml_uint_t CompressedBufferSegmentSizeInkByteFinal,
711 enum dml_source_format_class SourcePixelFormat[],
712 dml_float_t ReadBandwidthLuma[],
713 dml_float_t ReadBandwidthChroma[],
714 dml_uint_t RotesY[],
715 dml_uint_t RoundedUpMaxSwathSizeBytesC[],
716 dml_uint_t DPPPerSurface[],
717 // Output
718 dml_uint_t DETBufferSizeInKByte[],
719 dml_uint_t *CompressedBufferSizeInkByte);
720
721 static void CalculateMaxDETAndMinCompressedBufferSize(
722 dml_uint_t ConfigReturnBufferSizeInKByte,
723 dml_uint_t ConfigReturnBufferSegmentSizeInKByte,
724 dml_uint_t ROBBufferSizeInKByte,
725 dml_uint_t MaxNumDPP,
726 dml_bool_t nomDETInKByteOverrideEnable,
727 dml_uint_t nomDETInKByteOverrideValue,
728
729 // Output
730 dml_uint_t *MaxTotalDETInKByte,
731 dml_uint_t *nomDETInKByte,
732 dml_uint_t *MinCompressedBufferSizeInKByte);
733
734 static dml_uint_t DSCDelayRequirement(
735 dml_bool_t DSCEnabled,
736 enum dml_odm_mode ODMMode,
737 dml_uint_t DSCInputBitPerComponent,
738 dml_float_t OutputBpp,
739 dml_uint_t HActive,
740 dml_uint_t HTotal,
741 dml_uint_t NumberOfDSCSlices,
742 enum dml_output_format_class OutputFormat,
743 enum dml_output_encoder_class Output,
744 dml_float_t PixelClock,
745 dml_float_t PixelClockBackEnd);
746
747 static dml_bool_t CalculateVActiveBandwithSupport(
748 dml_uint_t NumberOfActiveSurfaces,
749 dml_float_t ReturnBW,
750 dml_bool_t NotUrgentLatencyHiding[],
751 dml_float_t ReadBandwidthLuma[],
752 dml_float_t ReadBandwidthChroma[],
753 dml_float_t cursor_bw[],
754 dml_float_t meta_row_bandwidth[],
755 dml_float_t dpte_row_bandwidth[],
756 dml_uint_t NumberOfDPP[],
757 dml_float_t UrgentBurstFactorLuma[],
758 dml_float_t UrgentBurstFactorChroma[],
759 dml_float_t UrgentBurstFactorCursor[]);
760
761 static void CalculatePrefetchBandwithSupport(
762 dml_uint_t NumberOfActiveSurfaces,
763 dml_float_t ReturnBW,
764 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
765 dml_bool_t NotUrgentLatencyHiding[],
766 dml_float_t ReadBandwidthLuma[],
767 dml_float_t ReadBandwidthChroma[],
768 dml_float_t PrefetchBandwidthLuma[],
769 dml_float_t PrefetchBandwidthChroma[],
770 dml_float_t cursor_bw[],
771 dml_float_t meta_row_bandwidth[],
772 dml_float_t dpte_row_bandwidth[],
773 dml_float_t cursor_bw_pre[],
774 dml_float_t prefetch_vmrow_bw[],
775 dml_uint_t NumberOfDPP[],
776 dml_float_t UrgentBurstFactorLuma[],
777 dml_float_t UrgentBurstFactorChroma[],
778 dml_float_t UrgentBurstFactorCursor[],
779 dml_float_t UrgentBurstFactorLumaPre[],
780 dml_float_t UrgentBurstFactorChromaPre[],
781 dml_float_t UrgentBurstFactorCursorPre[],
782
783 // Output
784 dml_float_t *PrefetchBandwidth,
785 dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch,
786 dml_float_t *FractionOfUrgentBandwidth,
787 dml_bool_t *PrefetchBandwidthSupport);
788
789 static dml_float_t CalculateBandwidthAvailableForImmediateFlip(
790 dml_uint_t NumberOfActiveSurfaces,
791 dml_float_t ReturnBW,
792 dml_float_t ReadBandwidthLuma[],
793 dml_float_t ReadBandwidthChroma[],
794 dml_float_t PrefetchBandwidthLuma[],
795 dml_float_t PrefetchBandwidthChroma[],
796 dml_float_t cursor_bw[],
797 dml_float_t cursor_bw_pre[],
798 dml_uint_t NumberOfDPP[],
799 dml_float_t UrgentBurstFactorLuma[],
800 dml_float_t UrgentBurstFactorChroma[],
801 dml_float_t UrgentBurstFactorCursor[],
802 dml_float_t UrgentBurstFactorLumaPre[],
803 dml_float_t UrgentBurstFactorChromaPre[],
804 dml_float_t UrgentBurstFactorCursorPre[]);
805
806 static void CalculateImmediateFlipBandwithSupport(
807 dml_uint_t NumberOfActiveSurfaces,
808 dml_float_t ReturnBW,
809 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
810 enum dml_immediate_flip_requirement ImmediateFlipRequirement[],
811 dml_float_t final_flip_bw[],
812 dml_float_t ReadBandwidthLuma[],
813 dml_float_t ReadBandwidthChroma[],
814 dml_float_t PrefetchBandwidthLuma[],
815 dml_float_t PrefetchBandwidthChroma[],
816 dml_float_t cursor_bw[],
817 dml_float_t meta_row_bandwidth[],
818 dml_float_t dpte_row_bandwidth[],
819 dml_float_t cursor_bw_pre[],
820 dml_float_t prefetch_vmrow_bw[],
821 dml_uint_t NumberOfDPP[],
822 dml_float_t UrgentBurstFactorLuma[],
823 dml_float_t UrgentBurstFactorChroma[],
824 dml_float_t UrgentBurstFactorCursor[],
825 dml_float_t UrgentBurstFactorLumaPre[],
826 dml_float_t UrgentBurstFactorChromaPre[],
827 dml_float_t UrgentBurstFactorCursorPre[],
828
829 // Output
830 dml_float_t *TotalBandwidth,
831 dml_float_t *TotalBandwidthNotIncludingMALLPrefetch,
832 dml_float_t *FractionOfUrgentBandwidth,
833 dml_bool_t *ImmediateFlipBandwidthSupport);
834
835 // ---------------------------
836 // Declaration Ends
837 // ---------------------------
838
dscceComputeDelay(dml_uint_t bpc,dml_float_t BPP,dml_uint_t sliceWidth,dml_uint_t numSlices,enum dml_output_format_class pixelFormat,enum dml_output_encoder_class Output)839 static dml_uint_t dscceComputeDelay(
840 dml_uint_t bpc,
841 dml_float_t BPP,
842 dml_uint_t sliceWidth,
843 dml_uint_t numSlices,
844 enum dml_output_format_class pixelFormat,
845 enum dml_output_encoder_class Output)
846 {
847 // valid bpc = source bits per component in the set of {8, 10, 12}
848 // valid bpp = increments of 1/16 of a bit
849 // min = 6/7/8 in N420/N422/444, respectively
850 // max = such that compression is 1:1
851 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
852 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
853 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
854
855 // fixed value
856 dml_uint_t rcModelSize = 8192;
857
858 // N422/N420 operate at 2 pixels per clock
859 dml_uint_t pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
860 Delay, pixels;
861
862 if (pixelFormat == dml_420)
863 pixelsPerClock = 2;
864 // #all other modes operate at 1 pixel per clock
865 else if (pixelFormat == dml_444)
866 pixelsPerClock = 1;
867 else if (pixelFormat == dml_n422)
868 pixelsPerClock = 2;
869 else
870 pixelsPerClock = 1;
871
872 //initial transmit delay as per PPS
873 initalXmitDelay = (dml_uint_t)(dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock, 1));
874
875 //compute ssm delay
876 if (bpc == 8)
877 D = 81;
878 else if (bpc == 10)
879 D = 89;
880 else
881 D = 113;
882
883 //divide by pixel per cycle to compute slice width as seen by DSC
884 w = sliceWidth / pixelsPerClock;
885
886 //422 mode has an additional cycle of delay
887 if (pixelFormat == dml_420 || pixelFormat == dml_444 || pixelFormat == dml_n422)
888 s = 0;
889 else
890 s = 1;
891
892 //main calculation for the dscce
893 ix = initalXmitDelay + 45;
894 wx = (w + 2) / 3;
895 p = 3 * wx - w;
896 l0 = ix / w;
897 a = ix + p * l0;
898 ax = (a + 2) / 3 + D + 6 + 1;
899 L = (ax + wx - 1) / wx;
900 if ((ix % w) == 0 && p != 0)
901 lstall = 1;
902 else
903 lstall = 0;
904 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
905
906 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
907 pixels = Delay * 3 * pixelsPerClock;
908
909 #ifdef __DML_VBA_DEBUG__
910 dml_print("DML::%s: bpc: %u\n", __func__, bpc);
911 dml_print("DML::%s: BPP: %f\n", __func__, BPP);
912 dml_print("DML::%s: sliceWidth: %u\n", __func__, sliceWidth);
913 dml_print("DML::%s: numSlices: %u\n", __func__, numSlices);
914 dml_print("DML::%s: pixelFormat: %u\n", __func__, pixelFormat);
915 dml_print("DML::%s: Output: %u\n", __func__, Output);
916 dml_print("DML::%s: pixels: %u\n", __func__, pixels);
917 #endif
918 return pixels;
919 }
920
dscComputeDelay(enum dml_output_format_class pixelFormat,enum dml_output_encoder_class Output)921 static dml_uint_t dscComputeDelay(enum dml_output_format_class pixelFormat, enum dml_output_encoder_class Output)
922 {
923 dml_uint_t Delay = 0;
924
925 if (pixelFormat == dml_420) {
926 // sfr
927 Delay = Delay + 2;
928 // dsccif
929 Delay = Delay + 0;
930 // dscc - input deserializer
931 Delay = Delay + 3;
932 // dscc gets pixels every other cycle
933 Delay = Delay + 2;
934 // dscc - input cdc fifo
935 Delay = Delay + 12;
936 // dscc gets pixels every other cycle
937 Delay = Delay + 13;
938 // dscc - cdc uncertainty
939 Delay = Delay + 2;
940 // dscc - output cdc fifo
941 Delay = Delay + 7;
942 // dscc gets pixels every other cycle
943 Delay = Delay + 3;
944 // dscc - cdc uncertainty
945 Delay = Delay + 2;
946 // dscc - output serializer
947 Delay = Delay + 1;
948 // sft
949 Delay = Delay + 1;
950 } else if (pixelFormat == dml_n422) {
951 // sfr
952 Delay = Delay + 2;
953 // dsccif
954 Delay = Delay + 1;
955 // dscc - input deserializer
956 Delay = Delay + 5;
957 // dscc - input cdc fifo
958 Delay = Delay + 25;
959 // dscc - cdc uncertainty
960 Delay = Delay + 2;
961 // dscc - output cdc fifo
962 Delay = Delay + 10;
963 // dscc - cdc uncertainty
964 Delay = Delay + 2;
965 // dscc - output serializer
966 Delay = Delay + 1;
967 // sft
968 Delay = Delay + 1;
969 } else {
970 // sfr
971 Delay = Delay + 2;
972 // dsccif
973 Delay = Delay + 0;
974 // dscc - input deserializer
975 Delay = Delay + 3;
976 // dscc - input cdc fifo
977 Delay = Delay + 12;
978 // dscc - cdc uncertainty
979 Delay = Delay + 2;
980 // dscc - output cdc fifo
981 Delay = Delay + 7;
982 // dscc - output serializer
983 Delay = Delay + 1;
984 // dscc - cdc uncertainty
985 Delay = Delay + 2;
986 // sft
987 Delay = Delay + 1;
988 }
989 #ifdef __DML_VBA_DEBUG__
990 dml_print("DML::%s: pixelFormat = %u\n", __func__, pixelFormat);
991 dml_print("DML::%s: Delay = %u\n", __func__, Delay);
992 #endif
993
994 return Delay;
995 }
996
CalculatePrefetchSchedule(struct display_mode_lib_scratch_st * scratch,struct CalculatePrefetchSchedule_params_st * p)997 static dml_bool_t CalculatePrefetchSchedule(struct display_mode_lib_scratch_st *scratch,
998 struct CalculatePrefetchSchedule_params_st *p)
999 {
1000 struct CalculatePrefetchSchedule_locals_st *s = &scratch->CalculatePrefetchSchedule_locals;
1001
1002 s->MyError = false;
1003 s->DPPCycles = 0;
1004 s->DISPCLKCycles = 0;
1005 s->DSTTotalPixelsAfterScaler = 0.0;
1006 s->LineTime = 0.0;
1007 s->dst_y_prefetch_equ = 0.0;
1008 s->prefetch_bw_oto = 0.0;
1009 s->Tvm_oto = 0.0;
1010 s->Tr0_oto = 0.0;
1011 s->Tvm_oto_lines = 0.0;
1012 s->Tr0_oto_lines = 0.0;
1013 s->dst_y_prefetch_oto = 0.0;
1014 s->TimeForFetchingMetaPTE = 0.0;
1015 s->TimeForFetchingRowInVBlank = 0.0;
1016 s->LinesToRequestPrefetchPixelData = 0.0;
1017 s->HostVMDynamicLevelsTrips = 0;
1018 s->trip_to_mem = 0.0;
1019 s->Tvm_trips = 0.0;
1020 s->Tr0_trips = 0.0;
1021 s->Tvm_trips_rounded = 0.0;
1022 s->Tr0_trips_rounded = 0.0;
1023 s->max_Tsw = 0.0;
1024 s->Lsw_oto = 0.0;
1025 s->Tpre_rounded = 0.0;
1026 s->prefetch_bw_equ = 0.0;
1027 s->Tvm_equ = 0.0;
1028 s->Tr0_equ = 0.0;
1029 s->Tdmbf = 0.0;
1030 s->Tdmec = 0.0;
1031 s->Tdmsks = 0.0;
1032 s->prefetch_sw_bytes = 0.0;
1033 s->prefetch_bw_pr = 0.0;
1034 s->bytes_pp = 0.0;
1035 s->dep_bytes = 0.0;
1036 s->min_Lsw_oto = 0.0;
1037 s->Tsw_est1 = 0.0;
1038 s->Tsw_est3 = 0.0;
1039
1040 if (p->GPUVMEnable == true && p->HostVMEnable == true) {
1041 s->HostVMDynamicLevelsTrips = p->HostVMMaxNonCachedPageTableLevels;
1042 } else {
1043 s->HostVMDynamicLevelsTrips = 0;
1044 }
1045 #ifdef __DML_VBA_DEBUG__
1046 dml_print("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable);
1047 dml_print("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->GPUVMPageTableLevels);
1048 dml_print("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable);
1049 dml_print("DML::%s: VStartup = %u\n", __func__, p->VStartup);
1050 dml_print("DML::%s: MaxVStartup = %u\n", __func__, p->MaxVStartup);
1051 dml_print("DML::%s: HostVMEnable = %u\n", __func__, p->HostVMEnable);
1052 dml_print("DML::%s: HostVMInefficiencyFactor= %f\n", __func__, p->HostVMInefficiencyFactor);
1053 dml_print("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk);
1054 #endif
1055 CalculateVUpdateAndDynamicMetadataParameters(
1056 p->MaxInterDCNTileRepeaters,
1057 p->myPipe->Dppclk,
1058 p->myPipe->Dispclk,
1059 p->myPipe->DCFClkDeepSleep,
1060 p->myPipe->PixelClock,
1061 p->myPipe->HTotal,
1062 p->myPipe->VBlank,
1063 p->DynamicMetadataTransmittedBytes,
1064 p->DynamicMetadataLinesBeforeActiveRequired,
1065 p->myPipe->InterlaceEnable,
1066 p->myPipe->ProgressiveToInterlaceUnitInOPP,
1067 p->TSetup,
1068
1069 // Output
1070 &s->Tdmbf,
1071 &s->Tdmec,
1072 &s->Tdmsks,
1073 p->VUpdateOffsetPix,
1074 p->VUpdateWidthPix,
1075 p->VReadyOffsetPix);
1076
1077 s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock;
1078 s->trip_to_mem = p->UrgentLatency;
1079 s->Tvm_trips = p->UrgentExtraLatency + s->trip_to_mem * (p->GPUVMPageTableLevels * (s->HostVMDynamicLevelsTrips + 1) - 1);
1080
1081 if (p->DynamicMetadataVMEnabled == true) {
1082 *p->Tdmdl = p->TWait + s->Tvm_trips + s->trip_to_mem;
1083 } else {
1084 *p->Tdmdl = p->TWait + p->UrgentExtraLatency;
1085 }
1086
1087 #ifdef __DML_VBA_ALLOW_DELTA__
1088 if (DynamicMetadataEnable == false) {
1089 *Tdmdl = 0.0;
1090 }
1091 #endif
1092
1093 if (p->DynamicMetadataEnable == true) {
1094 if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) {
1095 *p->NotEnoughTimeForDynamicMetadata = true;
1096 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
1097 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
1098 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
1099 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
1100 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
1101 } else {
1102 *p->NotEnoughTimeForDynamicMetadata = false;
1103 }
1104 } else {
1105 *p->NotEnoughTimeForDynamicMetadata = false;
1106 }
1107
1108 *p->Tdmdl_vm = (p->DynamicMetadataEnable == true && p->DynamicMetadataVMEnabled == true && p->GPUVMEnable == true ? p->TWait + s->Tvm_trips : 0);
1109
1110 if (p->myPipe->ScalerEnabled)
1111 s->DPPCycles = (dml_uint_t)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCL);
1112 else
1113 s->DPPCycles = (dml_uint_t)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCLLBOnly);
1114
1115 s->DPPCycles = (dml_uint_t)(s->DPPCycles + p->myPipe->NumberOfCursors * p->DPPCLKDelayCNVCCursor);
1116
1117 s->DISPCLKCycles = (dml_uint_t)p->DISPCLKDelaySubtotal;
1118
1119 if (p->myPipe->Dppclk == 0.0 || p->myPipe->Dispclk == 0.0)
1120 return true;
1121
1122 *p->DSTXAfterScaler = (dml_uint_t) dml_round(s->DPPCycles * p->myPipe->PixelClock / p->myPipe->Dppclk + s->DISPCLKCycles * p->myPipe->PixelClock / p->myPipe->Dispclk + p->DSCDelay, 1.0);
1123 *p->DSTXAfterScaler = (dml_uint_t) dml_round(*p->DSTXAfterScaler + (p->myPipe->ODMMode != dml_odm_mode_bypass ? 18 : 0) + (p->myPipe->DPPPerSurface - 1) * p->DPP_RECOUT_WIDTH +
1124 ((p->myPipe->ODMMode == dml_odm_mode_split_1to2 || p->myPipe->ODMMode == dml_odm_mode_mso_1to2) ? (dml_float_t)p->myPipe->HActive / 2.0 : 0) +
1125 ((p->myPipe->ODMMode == dml_odm_mode_mso_1to4) ? (dml_float_t)p->myPipe->HActive * 3.0 / 4.0 : 0), 1.0);
1126
1127 #ifdef __DML_VBA_DEBUG__
1128 dml_print("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles);
1129 dml_print("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock);
1130 dml_print("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk);
1131 dml_print("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles);
1132 dml_print("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk);
1133 dml_print("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay);
1134 dml_print("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode);
1135 dml_print("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH);
1136 dml_print("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler);
1137 #endif
1138
1139 if (p->OutputFormat == dml_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP))
1140 *p->DSTYAfterScaler = 1;
1141 else
1142 *p->DSTYAfterScaler = 0;
1143
1144 s->DSTTotalPixelsAfterScaler = *p->DSTYAfterScaler * p->myPipe->HTotal + *p->DSTXAfterScaler;
1145 *p->DSTYAfterScaler = (dml_uint_t)(dml_floor(s->DSTTotalPixelsAfterScaler / p->myPipe->HTotal, 1));
1146 *p->DSTXAfterScaler = (dml_uint_t)(s->DSTTotalPixelsAfterScaler - ((dml_float_t) (*p->DSTYAfterScaler * p->myPipe->HTotal)));
1147 #ifdef __DML_VBA_DEBUG__
1148 dml_print("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler);
1149 dml_print("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler);
1150 #endif
1151
1152 s->MyError = false;
1153
1154 s->Tr0_trips = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1);
1155
1156 if (p->GPUVMEnable == true) {
1157 s->Tvm_trips_rounded = dml_ceil(4.0 * s->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
1158 s->Tr0_trips_rounded = dml_ceil(4.0 * s->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
1159 if (p->GPUVMPageTableLevels >= 3) {
1160 *p->Tno_bw = p->UrgentExtraLatency + s->trip_to_mem * (dml_float_t) ((p->GPUVMPageTableLevels - 2) * (s->HostVMDynamicLevelsTrips + 1) - 1);
1161 } else if (p->GPUVMPageTableLevels == 1 && p->myPipe->DCCEnable != true) {
1162 s->Tr0_trips_rounded = dml_ceil(4.0 * p->UrgentExtraLatency / s->LineTime, 1.0) / 4.0 * s->LineTime;
1163 *p->Tno_bw = p->UrgentExtraLatency;
1164 } else {
1165 *p->Tno_bw = 0;
1166 }
1167 } else if (p->myPipe->DCCEnable == true) {
1168 s->Tvm_trips_rounded = s->LineTime / 4.0;
1169 s->Tr0_trips_rounded = dml_ceil(4.0 * s->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
1170 *p->Tno_bw = 0;
1171 } else {
1172 s->Tvm_trips_rounded = s->LineTime / 4.0;
1173 s->Tr0_trips_rounded = s->LineTime / 2.0;
1174 *p->Tno_bw = 0;
1175 }
1176 s->Tvm_trips_rounded = dml_max(s->Tvm_trips_rounded, s->LineTime / 4.0);
1177 s->Tr0_trips_rounded = dml_max(s->Tr0_trips_rounded, s->LineTime / 4.0);
1178
1179 if (p->myPipe->SourcePixelFormat == dml_420_8 || p->myPipe->SourcePixelFormat == dml_420_10 || p->myPipe->SourcePixelFormat == dml_420_12) {
1180 s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4;
1181 } else {
1182 s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC;
1183 }
1184
1185 s->prefetch_bw_pr = s->bytes_pp * p->myPipe->PixelClock / (dml_float_t)p->myPipe->DPPPerSurface;
1186 if (p->myPipe->VRatio < 1.0)
1187 s->prefetch_bw_pr = p->myPipe->VRatio * s->prefetch_bw_pr;
1188
1189 s->max_Tsw = (dml_max(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) * s->LineTime);
1190
1191 s->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC;
1192 s->prefetch_bw_oto = dml_max(s->prefetch_bw_pr, s->prefetch_sw_bytes / s->max_Tsw);
1193
1194 s->min_Lsw_oto = dml_max(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML_MAX_VRATIO_PRE_OTO__;
1195 s->min_Lsw_oto = dml_max(s->min_Lsw_oto, 1.0);
1196 s->Lsw_oto = dml_ceil(4.0 * dml_max(s->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, s->min_Lsw_oto), 1.0) / 4.0;
1197
1198 if (p->GPUVMEnable == true) {
1199 s->Tvm_oto = dml_max3(
1200 s->Tvm_trips,
1201 *p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->prefetch_bw_oto,
1202 s->LineTime / 4.0);
1203 } else
1204 s->Tvm_oto = s->LineTime / 4.0;
1205
1206 if ((p->GPUVMEnable == true || p->myPipe->DCCEnable == true)) {
1207 s->Tr0_oto = dml_max4(
1208 s->Tr0_trips,
1209 (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_oto,
1210 (s->LineTime - s->Tvm_oto)/2.0,
1211 s->LineTime / 4.0);
1212 #ifdef __DML_VBA_DEBUG__
1213 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__, (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_oto);
1214 dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, s->Tr0_trips);
1215 dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime - s->Tvm_oto);
1216 dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, s->LineTime / 4);
1217 #endif
1218 } else
1219 s->Tr0_oto = (s->LineTime - s->Tvm_oto) / 2.0;
1220
1221 s->Tvm_oto_lines = dml_ceil(4.0 * s->Tvm_oto / s->LineTime, 1) / 4.0;
1222 s->Tr0_oto_lines = dml_ceil(4.0 * s->Tr0_oto / s->LineTime, 1) / 4.0;
1223 s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto;
1224
1225 s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + dml_max(p->TWait + p->TCalc, *p->Tdmdl)) / s->LineTime - (*p->DSTYAfterScaler + (dml_float_t) *p->DSTXAfterScaler / (dml_float_t)p->myPipe->HTotal);
1226 s->dst_y_prefetch_equ = dml_min(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
1227
1228 #ifdef __DML_VBA_DEBUG__
1229 dml_print("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal);
1230 dml_print("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto);
1231 dml_print("DML::%s: *Tno_bw = %f\n", __func__, *p->Tno_bw);
1232 dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, p->UrgentExtraLatency);
1233 dml_print("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem);
1234 dml_print("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
1235 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
1236 dml_print("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
1237 dml_print("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC);
1238 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
1239 dml_print("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub);
1240 dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, s->prefetch_sw_bytes);
1241 dml_print("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp);
1242 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, p->PDEAndMetaPTEBytesFrame);
1243 dml_print("DML::%s: MetaRowByte = %u\n", __func__, p->MetaRowByte);
1244 dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
1245 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
1246 dml_print("DML::%s: Tvm_trips = %f\n", __func__, s->Tvm_trips);
1247 dml_print("DML::%s: Tr0_trips = %f\n", __func__, s->Tr0_trips);
1248 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto);
1249 dml_print("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto);
1250 dml_print("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto);
1251 dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines);
1252 dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines);
1253 dml_print("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto);
1254 dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto);
1255 dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ);
1256 #endif
1257
1258 s->dst_y_prefetch_equ = dml_floor(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0;
1259 s->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime;
1260
1261 dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ);
1262
1263 dml_print("DML::%s: LineTime: %f\n", __func__, s->LineTime);
1264 dml_print("DML::%s: VStartup: %u\n", __func__, p->VStartup);
1265 dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime);
1266 dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup);
1267 dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc);
1268 dml_print("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait);
1269 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
1270 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
1271 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
1272 dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm);
1273 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
1274 dml_print("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler);
1275 dml_print("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler);
1276
1277 s->dep_bytes = dml_max(p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor, p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor);
1278
1279 if (s->prefetch_sw_bytes < s->dep_bytes) {
1280 s->prefetch_sw_bytes = 2 * s->dep_bytes;
1281 }
1282
1283 *p->DestinationLinesToRequestVMInVBlank = 0;
1284 *p->DestinationLinesToRequestRowInVBlank = 0;
1285 *p->VRatioPrefetchY = 0;
1286 *p->VRatioPrefetchC = 0;
1287 *p->RequiredPrefetchPixDataBWLuma = 0;
1288 if (s->dst_y_prefetch_equ > 1) {
1289
1290 if (s->Tpre_rounded - *p->Tno_bw > 0) {
1291 s->PrefetchBandwidth1 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + 2 * p->MetaRowByte
1292 + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor
1293 + s->prefetch_sw_bytes)
1294 / (s->Tpre_rounded - *p->Tno_bw);
1295 s->Tsw_est1 = s->prefetch_sw_bytes / s->PrefetchBandwidth1;
1296 } else
1297 s->PrefetchBandwidth1 = 0;
1298
1299 if (p->VStartup == p->MaxVStartup && (s->Tsw_est1 / s->LineTime < s->min_Lsw_oto) && s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0) {
1300 s->PrefetchBandwidth1 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + 2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) /
1301 (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw);
1302 }
1303
1304 if (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded > 0)
1305 s->PrefetchBandwidth2 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) /
1306 (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded);
1307 else
1308 s->PrefetchBandwidth2 = 0;
1309
1310 if (s->Tpre_rounded - s->Tvm_trips_rounded > 0) {
1311 s->PrefetchBandwidth3 = (2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) /
1312 (s->Tpre_rounded - s->Tvm_trips_rounded);
1313 s->Tsw_est3 = s->prefetch_sw_bytes / s->PrefetchBandwidth3;
1314 }
1315 else
1316 s->PrefetchBandwidth3 = 0;
1317
1318
1319 if (p->VStartup == p->MaxVStartup && (s->Tsw_est3 / s->LineTime < s->min_Lsw_oto) && s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded > 0) {
1320 s->PrefetchBandwidth3 = (2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded);
1321 }
1322
1323 if (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0)
1324 s->PrefetchBandwidth4 = s->prefetch_sw_bytes / (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded);
1325 else
1326 s->PrefetchBandwidth4 = 0;
1327
1328 #ifdef __DML_VBA_DEBUG__
1329 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, s->Tpre_rounded);
1330 dml_print("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw);
1331 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, s->Tvm_trips_rounded);
1332 dml_print("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1);
1333 dml_print("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3);
1334 dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, s->PrefetchBandwidth1);
1335 dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, s->PrefetchBandwidth2);
1336 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, s->PrefetchBandwidth3);
1337 dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, s->PrefetchBandwidth4);
1338 #endif
1339 {
1340 dml_bool_t Case1OK;
1341 dml_bool_t Case2OK;
1342 dml_bool_t Case3OK;
1343
1344 if (s->PrefetchBandwidth1 > 0) {
1345 if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth1 >= s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth1 >= s->Tr0_trips_rounded) {
1346 Case1OK = true;
1347 } else {
1348 Case1OK = false;
1349 }
1350 } else {
1351 Case1OK = false;
1352 }
1353
1354 if (s->PrefetchBandwidth2 > 0) {
1355 if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth2 >= s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth2 < s->Tr0_trips_rounded) {
1356 Case2OK = true;
1357 } else {
1358 Case2OK = false;
1359 }
1360 } else {
1361 Case2OK = false;
1362 }
1363
1364 if (s->PrefetchBandwidth3 > 0) {
1365 if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth3 < s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth3 >= s->Tr0_trips_rounded) {
1366 Case3OK = true;
1367 } else {
1368 Case3OK = false;
1369 }
1370 } else {
1371 Case3OK = false;
1372 }
1373
1374 if (Case1OK) {
1375 s->prefetch_bw_equ = s->PrefetchBandwidth1;
1376 } else if (Case2OK) {
1377 s->prefetch_bw_equ = s->PrefetchBandwidth2;
1378 } else if (Case3OK) {
1379 s->prefetch_bw_equ = s->PrefetchBandwidth3;
1380 } else {
1381 s->prefetch_bw_equ = s->PrefetchBandwidth4;
1382 }
1383
1384 #ifdef __DML_VBA_DEBUG__
1385 dml_print("DML::%s: Case1OK: %u\n", __func__, Case1OK);
1386 dml_print("DML::%s: Case2OK: %u\n", __func__, Case2OK);
1387 dml_print("DML::%s: Case3OK: %u\n", __func__, Case3OK);
1388 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ);
1389 #endif
1390
1391 if (s->prefetch_bw_equ > 0) {
1392 if (p->GPUVMEnable == true) {
1393 s->Tvm_equ = dml_max3(*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->prefetch_bw_equ, s->Tvm_trips, s->LineTime / 4);
1394 } else {
1395 s->Tvm_equ = s->LineTime / 4;
1396 }
1397
1398 if ((p->GPUVMEnable == true || p->myPipe->DCCEnable == true)) {
1399 s->Tr0_equ = dml_max4((p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_equ, s->Tr0_trips, (s->LineTime - s->Tvm_equ) / 2, s->LineTime / 4);
1400 } else {
1401 s->Tr0_equ = (s->LineTime - s->Tvm_equ) / 2;
1402 }
1403 } else {
1404 s->Tvm_equ = 0;
1405 s->Tr0_equ = 0;
1406 dml_print("DML::%s: prefetch_bw_equ equals 0!\n", __func__);
1407 }
1408 }
1409
1410
1411 if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) {
1412 *p->DestinationLinesForPrefetch = s->dst_y_prefetch_oto;
1413 s->TimeForFetchingMetaPTE = s->Tvm_oto;
1414 s->TimeForFetchingRowInVBlank = s->Tr0_oto;
1415
1416 *p->DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * s->TimeForFetchingMetaPTE / s->LineTime, 1.0) / 4.0;
1417 *p->DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
1418 } else {
1419 *p->DestinationLinesForPrefetch = s->dst_y_prefetch_equ;
1420 s->TimeForFetchingMetaPTE = s->Tvm_equ;
1421 s->TimeForFetchingRowInVBlank = s->Tr0_equ;
1422
1423 if (p->VStartup == p->MaxVStartup && p->EnhancedPrefetchScheduleAccelerationFinal != 0) {
1424 *p->DestinationLinesToRequestVMInVBlank = dml_floor(4.0 * s->TimeForFetchingMetaPTE / s->LineTime, 1.0) / 4.0;
1425 *p->DestinationLinesToRequestRowInVBlank = dml_floor(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
1426 } else {
1427 *p->DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * s->TimeForFetchingMetaPTE / s->LineTime, 1.0) / 4.0;
1428 *p->DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
1429 }
1430 }
1431
1432 s->LinesToRequestPrefetchPixelData = *p->DestinationLinesForPrefetch - *p->DestinationLinesToRequestVMInVBlank - 2 * *p->DestinationLinesToRequestRowInVBlank;
1433
1434 #ifdef __DML_VBA_DEBUG__
1435 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *p->DestinationLinesForPrefetch);
1436 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *p->DestinationLinesToRequestVMInVBlank);
1437 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank);
1438 dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime);
1439 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *p->DestinationLinesToRequestRowInVBlank);
1440 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
1441 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData);
1442 #endif
1443
1444 if (s->LinesToRequestPrefetchPixelData >= 1 && s->prefetch_bw_equ > 0) {
1445 *p->VRatioPrefetchY = (dml_float_t)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData;
1446 *p->VRatioPrefetchY = dml_max(*p->VRatioPrefetchY, 1.0);
1447 #ifdef __DML_VBA_DEBUG__
1448 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
1449 dml_print("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY);
1450 dml_print("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY);
1451 #endif
1452 if ((p->SwathHeightY > 4) && (p->VInitPreFillY > 3)) {
1453 if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillY - 3.0) / 2.0) {
1454 *p->VRatioPrefetchY = dml_max(*p->VRatioPrefetchY,
1455 (dml_float_t)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0));
1456 } else {
1457 s->MyError = true;
1458 dml_print("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY);
1459 *p->VRatioPrefetchY = 0;
1460 }
1461 #ifdef __DML_VBA_DEBUG__
1462 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
1463 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
1464 dml_print("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY);
1465 #endif
1466 }
1467
1468 *p->VRatioPrefetchC = (dml_float_t)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData;
1469 *p->VRatioPrefetchC = dml_max(*p->VRatioPrefetchC, 1.0);
1470
1471 #ifdef __DML_VBA_DEBUG__
1472 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
1473 dml_print("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC);
1474 dml_print("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC);
1475 #endif
1476 if ((p->SwathHeightC > 4) && (p->VInitPreFillC > 3)) {
1477 if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillC - 3.0) / 2.0) {
1478 *p->VRatioPrefetchC = dml_max(*p->VRatioPrefetchC, (dml_float_t)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0));
1479 } else {
1480 s->MyError = true;
1481 dml_print("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC);
1482 *p->VRatioPrefetchC = 0;
1483 }
1484 #ifdef __DML_VBA_DEBUG__
1485 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
1486 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
1487 dml_print("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC);
1488 #endif
1489 }
1490
1491 *p->RequiredPrefetchPixDataBWLuma = (dml_float_t)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData
1492 * p->myPipe->BytePerPixelY
1493 * p->swath_width_luma_ub / s->LineTime;
1494
1495 #ifdef __DML_VBA_DEBUG__
1496 dml_print("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
1497 dml_print("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
1498 dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime);
1499 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixDataBWLuma);
1500 #endif
1501 *p->RequiredPrefetchPixDataBWChroma = (dml_float_t)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData
1502 *p->myPipe->BytePerPixelC
1503 *p->swath_width_chroma_ub / s->LineTime;
1504 } else {
1505 s->MyError = true;
1506 dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n", __func__, s->LinesToRequestPrefetchPixelData);
1507 *p->VRatioPrefetchY = 0;
1508 *p->VRatioPrefetchC = 0;
1509 *p->RequiredPrefetchPixDataBWLuma = 0;
1510 *p->RequiredPrefetchPixDataBWChroma = 0;
1511 }
1512
1513 dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", (dml_float_t)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingMetaPTE);
1514 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", s->TimeForFetchingMetaPTE);
1515 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", s->TimeForFetchingRowInVBlank);
1516 dml_print("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (dml_float_t)s->LinesToRequestPrefetchPixelData * s->LineTime);
1517 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((dml_float_t) (*p->DSTXAfterScaler) / (dml_float_t)p->myPipe->HTotal)) * s->LineTime);
1518 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
1519 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingMetaPTE - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((dml_float_t) (*p->DSTXAfterScaler) / (dml_float_t)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup);
1520 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow);
1521
1522 } else {
1523 s->MyError = true;
1524 dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ);
1525 s->TimeForFetchingMetaPTE = 0;
1526 s->TimeForFetchingRowInVBlank = 0;
1527 *p->DestinationLinesToRequestVMInVBlank = 0;
1528 *p->DestinationLinesToRequestRowInVBlank = 0;
1529 s->LinesToRequestPrefetchPixelData = 0;
1530 *p->VRatioPrefetchY = 0;
1531 *p->VRatioPrefetchC = 0;
1532 *p->RequiredPrefetchPixDataBWLuma = 0;
1533 *p->RequiredPrefetchPixDataBWChroma = 0;
1534 }
1535
1536 {
1537 dml_float_t prefetch_vm_bw;
1538 dml_float_t prefetch_row_bw;
1539
1540 if (p->PDEAndMetaPTEBytesFrame == 0) {
1541 prefetch_vm_bw = 0;
1542 } else if (*p->DestinationLinesToRequestVMInVBlank > 0) {
1543 #ifdef __DML_VBA_DEBUG__
1544 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, p->PDEAndMetaPTEBytesFrame);
1545 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
1546 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *p->DestinationLinesToRequestVMInVBlank);
1547 dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime);
1548 #endif
1549 prefetch_vm_bw = p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / (*p->DestinationLinesToRequestVMInVBlank * s->LineTime);
1550 #ifdef __DML_VBA_DEBUG__
1551 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
1552 #endif
1553 } else {
1554 prefetch_vm_bw = 0;
1555 s->MyError = true;
1556 dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n", __func__, *p->DestinationLinesToRequestVMInVBlank);
1557 }
1558
1559 if (p->MetaRowByte + p->PixelPTEBytesPerRow == 0) {
1560 prefetch_row_bw = 0;
1561 } else if (*p->DestinationLinesToRequestRowInVBlank > 0) {
1562 prefetch_row_bw = (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / (*p->DestinationLinesToRequestRowInVBlank * s->LineTime);
1563
1564 #ifdef __DML_VBA_DEBUG__
1565 dml_print("DML::%s: MetaRowByte = %u\n", __func__, p->MetaRowByte);
1566 dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
1567 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *p->DestinationLinesToRequestRowInVBlank);
1568 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
1569 #endif
1570 } else {
1571 prefetch_row_bw = 0;
1572 s->MyError = true;
1573 dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n", __func__, *p->DestinationLinesToRequestRowInVBlank);
1574 }
1575
1576 *p->prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1577 }
1578
1579 if (s->MyError) {
1580 s->TimeForFetchingMetaPTE = 0;
1581 s->TimeForFetchingRowInVBlank = 0;
1582 *p->DestinationLinesToRequestVMInVBlank = 0;
1583 *p->DestinationLinesToRequestRowInVBlank = 0;
1584 *p->DestinationLinesForPrefetch = 0;
1585 s->LinesToRequestPrefetchPixelData = 0;
1586 *p->VRatioPrefetchY = 0;
1587 *p->VRatioPrefetchC = 0;
1588 *p->RequiredPrefetchPixDataBWLuma = 0;
1589 *p->RequiredPrefetchPixDataBWChroma = 0;
1590 }
1591
1592 return s->MyError;
1593 } // CalculatePrefetchSchedule
1594
CalculateBytePerPixelAndBlockSizes(enum dml_source_format_class SourcePixelFormat,enum dml_swizzle_mode SurfaceTiling,dml_uint_t * BytePerPixelY,dml_uint_t * BytePerPixelC,dml_float_t * BytePerPixelDETY,dml_float_t * BytePerPixelDETC,dml_uint_t * BlockHeight256BytesY,dml_uint_t * BlockHeight256BytesC,dml_uint_t * BlockWidth256BytesY,dml_uint_t * BlockWidth256BytesC,dml_uint_t * MacroTileHeightY,dml_uint_t * MacroTileHeightC,dml_uint_t * MacroTileWidthY,dml_uint_t * MacroTileWidthC)1595 static void CalculateBytePerPixelAndBlockSizes(
1596 enum dml_source_format_class SourcePixelFormat,
1597 enum dml_swizzle_mode SurfaceTiling,
1598
1599 // Output
1600 dml_uint_t *BytePerPixelY,
1601 dml_uint_t *BytePerPixelC,
1602 dml_float_t *BytePerPixelDETY,
1603 dml_float_t *BytePerPixelDETC,
1604 dml_uint_t *BlockHeight256BytesY,
1605 dml_uint_t *BlockHeight256BytesC,
1606 dml_uint_t *BlockWidth256BytesY,
1607 dml_uint_t *BlockWidth256BytesC,
1608 dml_uint_t *MacroTileHeightY,
1609 dml_uint_t *MacroTileHeightC,
1610 dml_uint_t *MacroTileWidthY,
1611 dml_uint_t *MacroTileWidthC)
1612 {
1613 if (SourcePixelFormat == dml_444_64) {
1614 *BytePerPixelDETY = 8;
1615 *BytePerPixelDETC = 0;
1616 *BytePerPixelY = 8;
1617 *BytePerPixelC = 0;
1618 } else if (SourcePixelFormat == dml_444_32 || SourcePixelFormat == dml_rgbe) {
1619 *BytePerPixelDETY = 4;
1620 *BytePerPixelDETC = 0;
1621 *BytePerPixelY = 4;
1622 *BytePerPixelC = 0;
1623 } else if (SourcePixelFormat == dml_444_16 || SourcePixelFormat == dml_mono_16) {
1624 *BytePerPixelDETY = 2;
1625 *BytePerPixelDETC = 0;
1626 *BytePerPixelY = 2;
1627 *BytePerPixelC = 0;
1628 } else if (SourcePixelFormat == dml_444_8 || SourcePixelFormat == dml_mono_8) {
1629 *BytePerPixelDETY = 1;
1630 *BytePerPixelDETC = 0;
1631 *BytePerPixelY = 1;
1632 *BytePerPixelC = 0;
1633 } else if (SourcePixelFormat == dml_rgbe_alpha) {
1634 *BytePerPixelDETY = 4;
1635 *BytePerPixelDETC = 1;
1636 *BytePerPixelY = 4;
1637 *BytePerPixelC = 1;
1638 } else if (SourcePixelFormat == dml_420_8) {
1639 *BytePerPixelDETY = 1;
1640 *BytePerPixelDETC = 2;
1641 *BytePerPixelY = 1;
1642 *BytePerPixelC = 2;
1643 } else if (SourcePixelFormat == dml_420_12) {
1644 *BytePerPixelDETY = 2;
1645 *BytePerPixelDETC = 4;
1646 *BytePerPixelY = 2;
1647 *BytePerPixelC = 4;
1648 } else {
1649 *BytePerPixelDETY = (dml_float_t) (4.0 / 3);
1650 *BytePerPixelDETC = (dml_float_t) (8.0 / 3);
1651 *BytePerPixelY = 2;
1652 *BytePerPixelC = 4;
1653 }
1654 #ifdef __DML_VBA_DEBUG__
1655 dml_print("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat);
1656 dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
1657 dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
1658 dml_print("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY);
1659 dml_print("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC);
1660 #endif
1661 if ((SourcePixelFormat == dml_444_64 || SourcePixelFormat == dml_444_32
1662 || SourcePixelFormat == dml_444_16
1663 || SourcePixelFormat == dml_444_8
1664 || SourcePixelFormat == dml_mono_16
1665 || SourcePixelFormat == dml_mono_8
1666 || SourcePixelFormat == dml_rgbe)) {
1667 if (SurfaceTiling == dml_sw_linear) {
1668 *BlockHeight256BytesY = 1;
1669 } else if (SourcePixelFormat == dml_444_64) {
1670 *BlockHeight256BytesY = 4;
1671 } else if (SourcePixelFormat == dml_444_8) {
1672 *BlockHeight256BytesY = 16;
1673 } else {
1674 *BlockHeight256BytesY = 8;
1675 }
1676 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
1677 *BlockHeight256BytesC = 0;
1678 *BlockWidth256BytesC = 0;
1679 } else {
1680 if (SurfaceTiling == dml_sw_linear) {
1681 *BlockHeight256BytesY = 1;
1682 *BlockHeight256BytesC = 1;
1683 } else if (SourcePixelFormat == dml_rgbe_alpha) {
1684 *BlockHeight256BytesY = 8;
1685 *BlockHeight256BytesC = 16;
1686 } else if (SourcePixelFormat == dml_420_8) {
1687 *BlockHeight256BytesY = 16;
1688 *BlockHeight256BytesC = 8;
1689 } else {
1690 *BlockHeight256BytesY = 8;
1691 *BlockHeight256BytesC = 8;
1692 }
1693 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
1694 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
1695 }
1696 #ifdef __DML_VBA_DEBUG__
1697 dml_print("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY);
1698 dml_print("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY);
1699 dml_print("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC);
1700 dml_print("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC);
1701 #endif
1702
1703 if (SurfaceTiling == dml_sw_linear) {
1704 *MacroTileHeightY = *BlockHeight256BytesY;
1705 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
1706 *MacroTileHeightC = *BlockHeight256BytesC;
1707 if (*MacroTileHeightC == 0) {
1708 *MacroTileWidthC = 0;
1709 } else {
1710 *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
1711 }
1712 } else if (SurfaceTiling == dml_sw_64kb_d || SurfaceTiling == dml_sw_64kb_d_t || SurfaceTiling == dml_sw_64kb_d_x || SurfaceTiling == dml_sw_64kb_r_x) {
1713 *MacroTileHeightY = 16 * *BlockHeight256BytesY;
1714 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
1715 *MacroTileHeightC = 16 * *BlockHeight256BytesC;
1716 if (*MacroTileHeightC == 0) {
1717 *MacroTileWidthC = 0;
1718 } else {
1719 *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
1720 }
1721 } else {
1722 *MacroTileHeightY = 32 * *BlockHeight256BytesY;
1723 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
1724 *MacroTileHeightC = 32 * *BlockHeight256BytesC;
1725 if (*MacroTileHeightC == 0) {
1726 *MacroTileWidthC = 0;
1727 } else {
1728 *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
1729 }
1730 }
1731
1732 #ifdef __DML_VBA_DEBUG__
1733 dml_print("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY);
1734 dml_print("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY);
1735 dml_print("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC);
1736 dml_print("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC);
1737 #endif
1738 } // CalculateBytePerPixelAndBlockSizes
1739
CalculateTWait(dml_uint_t PrefetchMode,enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal,dml_bool_t DRRDisplay,dml_float_t DRAMClockChangeLatency,dml_float_t FCLKChangeLatency,dml_float_t UrgentLatency,dml_float_t SREnterPlusExitTime)1740 static noinline_for_stack dml_float_t CalculateTWait(
1741 dml_uint_t PrefetchMode,
1742 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,
1743 dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
1744 dml_bool_t DRRDisplay,
1745 dml_float_t DRAMClockChangeLatency,
1746 dml_float_t FCLKChangeLatency,
1747 dml_float_t UrgentLatency,
1748 dml_float_t SREnterPlusExitTime)
1749 {
1750 dml_float_t TWait = 0.0;
1751
1752 if (PrefetchMode == 0 &&
1753 !(UseMALLForPStateChange == dml_use_mall_pstate_change_full_frame) && !(UseMALLForPStateChange == dml_use_mall_pstate_change_sub_viewport) &&
1754 !(UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe) && !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
1755 TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
1756 } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe)) {
1757 TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
1758 } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe)) {
1759 TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
1760 } else {
1761 TWait = UrgentLatency;
1762 }
1763
1764 #ifdef __DML_VBA_DEBUG__
1765 dml_print("DML::%s: PrefetchMode = %u\n", __func__, PrefetchMode);
1766 dml_print("DML::%s: TWait = %f\n", __func__, TWait);
1767 #endif
1768 return TWait;
1769 } // CalculateTWait
1770
1771
1772 /// @brief Calculate the "starting point" for prefetch calculation
1773 /// if AllowForPStateChangeOrStutterInVBlank is set as a particular requirement, then the mode evalulation
1774 /// will only be done at the given mode. If no specific requirement (i.e. *_if_possible), then will just go from
1775 /// try all the prefetch mode in decreasing order of "difficulty" (start from 0 which means all power saving
1776 /// features).
CalculatePrefetchMode(enum dml_prefetch_modes AllowForPStateChangeOrStutterInVBlank,dml_uint_t * MinPrefetchMode,dml_uint_t * MaxPrefetchMode)1777 static void CalculatePrefetchMode(
1778 enum dml_prefetch_modes AllowForPStateChangeOrStutterInVBlank,
1779 dml_uint_t *MinPrefetchMode,
1780 dml_uint_t *MaxPrefetchMode)
1781 {
1782 if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_uclk_fclk_and_stutter_if_possible) {
1783 *MinPrefetchMode = 0; // consider all pwr saving features
1784 *MaxPrefetchMode = 3; // consider just urgent latency
1785 } else {
1786 if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_none) {
1787 *MinPrefetchMode = 3;
1788 } else if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_stutter) {
1789 *MinPrefetchMode = 2;
1790 } else if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_fclk_and_stutter) {
1791 *MinPrefetchMode = 1;
1792 } else if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_uclk_fclk_and_stutter) {
1793 *MinPrefetchMode = 0;
1794 } else {
1795 dml_print("ERROR: Invalid AllowForPStateChangeOrStutterInVBlank setting! val=%u\n", AllowForPStateChangeOrStutterInVBlank);
1796 ASSERT(0);
1797 }
1798 *MaxPrefetchMode = *MinPrefetchMode;
1799 }
1800 } // CalculatePrefetchMode
1801
CalculateWriteBackDISPCLK(enum dml_source_format_class WritebackPixelFormat,dml_float_t PixelClock,dml_float_t WritebackHRatio,dml_float_t WritebackVRatio,dml_uint_t WritebackHTaps,dml_uint_t WritebackVTaps,dml_uint_t WritebackSourceWidth,dml_uint_t WritebackDestinationWidth,dml_uint_t HTotal,dml_uint_t WritebackLineBufferSize,dml_float_t DISPCLKDPPCLKVCOSpeed)1802 static dml_float_t CalculateWriteBackDISPCLK(
1803 enum dml_source_format_class WritebackPixelFormat,
1804 dml_float_t PixelClock,
1805 dml_float_t WritebackHRatio,
1806 dml_float_t WritebackVRatio,
1807 dml_uint_t WritebackHTaps,
1808 dml_uint_t WritebackVTaps,
1809 dml_uint_t WritebackSourceWidth,
1810 dml_uint_t WritebackDestinationWidth,
1811 dml_uint_t HTotal,
1812 dml_uint_t WritebackLineBufferSize,
1813 dml_float_t DISPCLKDPPCLKVCOSpeed)
1814 {
1815 (void)WritebackPixelFormat;
1816 (void)WritebackVRatio;
1817 dml_float_t DISPCLK_H, DISPCLK_V, DISPCLK_HB;
1818
1819 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
1820 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / (dml_float_t) HTotal;
1821 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / (dml_float_t) WritebackSourceWidth;
1822 return RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
1823 }
1824
CalculateWriteBackDelay(enum dml_source_format_class WritebackPixelFormat,dml_float_t WritebackHRatio,dml_float_t WritebackVRatio,dml_uint_t WritebackVTaps,dml_uint_t WritebackDestinationWidth,dml_uint_t WritebackDestinationHeight,dml_uint_t WritebackSourceHeight,dml_uint_t HTotal)1825 static dml_float_t CalculateWriteBackDelay(
1826 enum dml_source_format_class WritebackPixelFormat,
1827 dml_float_t WritebackHRatio,
1828 dml_float_t WritebackVRatio,
1829 dml_uint_t WritebackVTaps,
1830 dml_uint_t WritebackDestinationWidth,
1831 dml_uint_t WritebackDestinationHeight,
1832 dml_uint_t WritebackSourceHeight,
1833 dml_uint_t HTotal)
1834 {
1835 (void)WritebackPixelFormat;
1836 (void)WritebackHRatio;
1837 dml_float_t CalculateWriteBackDelay;
1838 dml_float_t Line_length;
1839 dml_float_t Output_lines_last_notclamped;
1840 dml_float_t WritebackVInit;
1841
1842 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
1843 Line_length = dml_max((dml_float_t) WritebackDestinationWidth, dml_ceil((dml_float_t)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
1844 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil(((dml_float_t)WritebackSourceHeight - (dml_float_t) WritebackVInit) / (dml_float_t)WritebackVRatio, 1.0);
1845 if (Output_lines_last_notclamped < 0) {
1846 CalculateWriteBackDelay = 0;
1847 } else {
1848 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
1849 }
1850 return CalculateWriteBackDelay;
1851 }
1852
CalculateVUpdateAndDynamicMetadataParameters(dml_uint_t MaxInterDCNTileRepeaters,dml_float_t Dppclk,dml_float_t Dispclk,dml_float_t DCFClkDeepSleep,dml_float_t PixelClock,dml_uint_t HTotal,dml_uint_t VBlank,dml_uint_t DynamicMetadataTransmittedBytes,dml_uint_t DynamicMetadataLinesBeforeActiveRequired,dml_uint_t InterlaceEnable,dml_bool_t ProgressiveToInterlaceUnitInOPP,dml_float_t * TSetup,dml_float_t * Tdmbf,dml_float_t * Tdmec,dml_float_t * Tdmsks,dml_uint_t * VUpdateOffsetPix,dml_uint_t * VUpdateWidthPix,dml_uint_t * VReadyOffsetPix)1853 static void CalculateVUpdateAndDynamicMetadataParameters(
1854 dml_uint_t MaxInterDCNTileRepeaters,
1855 dml_float_t Dppclk,
1856 dml_float_t Dispclk,
1857 dml_float_t DCFClkDeepSleep,
1858 dml_float_t PixelClock,
1859 dml_uint_t HTotal,
1860 dml_uint_t VBlank,
1861 dml_uint_t DynamicMetadataTransmittedBytes,
1862 dml_uint_t DynamicMetadataLinesBeforeActiveRequired,
1863 dml_uint_t InterlaceEnable,
1864 dml_bool_t ProgressiveToInterlaceUnitInOPP,
1865
1866 // Output
1867 dml_float_t *TSetup,
1868 dml_float_t *Tdmbf,
1869 dml_float_t *Tdmec,
1870 dml_float_t *Tdmsks,
1871 dml_uint_t *VUpdateOffsetPix,
1872 dml_uint_t *VUpdateWidthPix,
1873 dml_uint_t *VReadyOffsetPix)
1874 {
1875 dml_float_t TotalRepeaterDelayTime;
1876 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
1877 *VUpdateWidthPix = (dml_uint_t)(dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0));
1878 *VReadyOffsetPix = (dml_uint_t)(dml_ceil(dml_max(150.0 / Dppclk, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0));
1879 *VUpdateOffsetPix = (dml_uint_t)(dml_ceil(HTotal / 4.0, 1.0));
1880 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
1881 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
1882 *Tdmec = HTotal / PixelClock;
1883
1884 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
1885 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
1886 } else {
1887 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
1888 }
1889 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
1890 *Tdmsks = *Tdmsks / 2;
1891 }
1892 #ifdef __DML_VBA_DEBUG__
1893 dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired);
1894 dml_print("DML::%s: VBlank = %u\n", __func__, VBlank);
1895 dml_print("DML::%s: HTotal = %u\n", __func__, HTotal);
1896 dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
1897 dml_print("DML::%s: Dppclk = %f\n", __func__, Dppclk);
1898 dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep);
1899 dml_print("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters);
1900 dml_print("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime);
1901
1902 dml_print("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix);
1903 dml_print("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix);
1904 dml_print("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix);
1905
1906 dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
1907 #endif
1908 }
1909
CalculateRowBandwidth(dml_bool_t GPUVMEnable,enum dml_source_format_class SourcePixelFormat,dml_float_t VRatio,dml_float_t VRatioChroma,dml_bool_t DCCEnable,dml_float_t LineTime,dml_uint_t MetaRowByteLuma,dml_uint_t MetaRowByteChroma,dml_uint_t meta_row_height_luma,dml_uint_t meta_row_height_chroma,dml_uint_t PixelPTEBytesPerRowLuma,dml_uint_t PixelPTEBytesPerRowChroma,dml_uint_t dpte_row_height_luma,dml_uint_t dpte_row_height_chroma,dml_float_t * meta_row_bw,dml_float_t * dpte_row_bw)1910 static void CalculateRowBandwidth(
1911 dml_bool_t GPUVMEnable,
1912 enum dml_source_format_class SourcePixelFormat,
1913 dml_float_t VRatio,
1914 dml_float_t VRatioChroma,
1915 dml_bool_t DCCEnable,
1916 dml_float_t LineTime,
1917 dml_uint_t MetaRowByteLuma,
1918 dml_uint_t MetaRowByteChroma,
1919 dml_uint_t meta_row_height_luma,
1920 dml_uint_t meta_row_height_chroma,
1921 dml_uint_t PixelPTEBytesPerRowLuma,
1922 dml_uint_t PixelPTEBytesPerRowChroma,
1923 dml_uint_t dpte_row_height_luma,
1924 dml_uint_t dpte_row_height_chroma,
1925 // Output
1926 dml_float_t *meta_row_bw,
1927 dml_float_t *dpte_row_bw)
1928 {
1929 if (DCCEnable != true) {
1930 *meta_row_bw = 0;
1931 } else if (SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_420_12 || SourcePixelFormat == dml_rgbe_alpha) {
1932 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime)
1933 + VRatioChroma * MetaRowByteChroma
1934 / (meta_row_height_chroma * LineTime);
1935 } else {
1936 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
1937 }
1938
1939 if (GPUVMEnable != true) {
1940 *dpte_row_bw = 0;
1941 } else if (SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_420_12 || SourcePixelFormat == dml_rgbe_alpha) {
1942 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
1943 + VRatioChroma * PixelPTEBytesPerRowChroma
1944 / (dpte_row_height_chroma * LineTime);
1945 } else {
1946 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
1947 }
1948 }
1949
1950 /// @brief Determine immediate flip schedule given bw remaining after considering the prefetch schedule
1951 /// @param BandwidthAvailableForImmediateFlip Bandwidth available for iflip for all planes
CalculateFlipSchedule(dml_float_t HostVMInefficiencyFactor,dml_float_t UrgentExtraLatency,dml_float_t UrgentLatency,dml_uint_t GPUVMMaxPageTableLevels,dml_bool_t HostVMEnable,dml_uint_t HostVMMaxNonCachedPageTableLevels,dml_bool_t GPUVMEnable,dml_uint_t HostVMMinPageSize,dml_float_t PDEAndMetaPTEBytesPerFrame,dml_float_t MetaRowBytes,dml_float_t DPTEBytesPerRow,dml_float_t BandwidthAvailableForImmediateFlip,dml_uint_t TotImmediateFlipBytes,enum dml_source_format_class SourcePixelFormat,dml_float_t LineTime,dml_float_t VRatio,dml_float_t VRatioChroma,dml_float_t Tno_bw,dml_bool_t DCCEnable,dml_uint_t dpte_row_height,dml_uint_t meta_row_height,dml_uint_t dpte_row_height_chroma,dml_uint_t meta_row_height_chroma,dml_bool_t use_one_row_for_frame_flip,dml_float_t * DestinationLinesToRequestVMInImmediateFlip,dml_float_t * DestinationLinesToRequestRowInImmediateFlip,dml_float_t * final_flip_bw,dml_bool_t * ImmediateFlipSupportedForPipe)1952 static void CalculateFlipSchedule(
1953 dml_float_t HostVMInefficiencyFactor,
1954 dml_float_t UrgentExtraLatency,
1955 dml_float_t UrgentLatency,
1956 dml_uint_t GPUVMMaxPageTableLevels,
1957 dml_bool_t HostVMEnable,
1958 dml_uint_t HostVMMaxNonCachedPageTableLevels,
1959 dml_bool_t GPUVMEnable,
1960 dml_uint_t HostVMMinPageSize,
1961 dml_float_t PDEAndMetaPTEBytesPerFrame,
1962 dml_float_t MetaRowBytes,
1963 dml_float_t DPTEBytesPerRow,
1964 dml_float_t BandwidthAvailableForImmediateFlip,
1965 dml_uint_t TotImmediateFlipBytes,
1966 enum dml_source_format_class SourcePixelFormat,
1967 dml_float_t LineTime,
1968 dml_float_t VRatio,
1969 dml_float_t VRatioChroma,
1970 dml_float_t Tno_bw,
1971 dml_bool_t DCCEnable,
1972 dml_uint_t dpte_row_height,
1973 dml_uint_t meta_row_height,
1974 dml_uint_t dpte_row_height_chroma,
1975 dml_uint_t meta_row_height_chroma,
1976 dml_bool_t use_one_row_for_frame_flip,
1977
1978 // Output
1979 dml_float_t *DestinationLinesToRequestVMInImmediateFlip,
1980 dml_float_t *DestinationLinesToRequestRowInImmediateFlip,
1981 dml_float_t *final_flip_bw,
1982 dml_bool_t *ImmediateFlipSupportedForPipe)
1983 {
1984 (void)HostVMMinPageSize;
1985 dml_float_t min_row_time = 0.0;
1986 dml_uint_t HostVMDynamicLevelsTrips = 0;
1987 dml_float_t TimeForFetchingMetaPTEImmediateFlip = 0;
1988 dml_float_t TimeForFetchingRowInVBlankImmediateFlip = 0;
1989 dml_float_t ImmediateFlipBW = 0; // @brief The immediate flip bandwidth for this pipe
1990
1991 if (GPUVMEnable == true && HostVMEnable == true) {
1992 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
1993 } else {
1994 HostVMDynamicLevelsTrips = 0;
1995 }
1996
1997 #ifdef __DML_VBA_DEBUG__
1998 dml_print("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes);
1999 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
2000 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
2001 dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
2002 #endif
2003
2004 if (TotImmediateFlipBytes > 0) {
2005 if (use_one_row_for_frame_flip) {
2006 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2.0 * DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / (dml_float_t) TotImmediateFlipBytes;
2007 } else {
2008 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / (dml_float_t) TotImmediateFlipBytes;
2009 }
2010 if (GPUVMEnable == true) {
2011 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
2012 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
2013 LineTime / 4.0);
2014 } else {
2015 TimeForFetchingMetaPTEImmediateFlip = 0;
2016 }
2017 if ((GPUVMEnable == true || DCCEnable == true)) {
2018 TimeForFetchingRowInVBlankImmediateFlip = dml_max3((MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
2019 } else {
2020 TimeForFetchingRowInVBlankImmediateFlip = 0;
2021 }
2022
2023 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
2024 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
2025
2026 if (GPUVMEnable == true) {
2027 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
2028 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
2029 } else if ((GPUVMEnable == true || DCCEnable == true)) {
2030 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
2031 } else {
2032 *final_flip_bw = 0;
2033 }
2034 } else {
2035 TimeForFetchingMetaPTEImmediateFlip = 0;
2036 TimeForFetchingRowInVBlankImmediateFlip = 0;
2037 *DestinationLinesToRequestVMInImmediateFlip = 0;
2038 *DestinationLinesToRequestRowInImmediateFlip = 0;
2039 *final_flip_bw = 0;
2040 }
2041
2042 if (SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_rgbe_alpha) {
2043 if (GPUVMEnable == true && DCCEnable != true) {
2044 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
2045 } else if (GPUVMEnable != true && DCCEnable == true) {
2046 min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
2047 } else {
2048 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
2049 }
2050 } else {
2051 if (GPUVMEnable == true && DCCEnable != true) {
2052 min_row_time = dpte_row_height * LineTime / VRatio;
2053 } else if (GPUVMEnable != true && DCCEnable == true) {
2054 min_row_time = meta_row_height * LineTime / VRatio;
2055 } else {
2056 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
2057 }
2058 }
2059
2060 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
2061 *ImmediateFlipSupportedForPipe = false;
2062 } else {
2063 *ImmediateFlipSupportedForPipe = true;
2064 }
2065
2066 #ifdef __DML_VBA_DEBUG__
2067 dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
2068 dml_print("DML::%s: DCCEnable = %u\n", __func__, DCCEnable);
2069
2070 dml_print("DML::%s: MetaRowBytes = %f\n", __func__, MetaRowBytes);
2071 dml_print("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow);
2072 dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
2073 dml_print("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes);
2074 dml_print("DML::%s: ImmediateFlipBW = %f\n", __func__, ImmediateFlipBW);
2075 dml_print("DML::%s: PDEAndMetaPTEBytesPerFrame = %f\n", __func__, PDEAndMetaPTEBytesPerFrame);
2076 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
2077 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
2078 dml_print("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw);
2079
2080 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip);
2081 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip);
2082 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
2083 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
2084 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
2085 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe);
2086 #endif
2087 } // CalculateFlipSchedule
2088
RoundToDFSGranularity(dml_float_t Clock,dml_bool_t round_up,dml_float_t VCOSpeed)2089 static dml_float_t RoundToDFSGranularity(dml_float_t Clock, dml_bool_t round_up, dml_float_t VCOSpeed)
2090 {
2091 if (Clock <= 0.0)
2092 return 0.0;
2093 else {
2094 if (round_up)
2095 return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
2096 else
2097 return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
2098 }
2099 }
2100
CalculateDCCConfiguration(dml_bool_t DCCEnabled,dml_bool_t DCCProgrammingAssumesScanDirectionUnknown,enum dml_source_format_class SourcePixelFormat,dml_uint_t SurfaceWidthLuma,dml_uint_t SurfaceWidthChroma,dml_uint_t SurfaceHeightLuma,dml_uint_t SurfaceHeightChroma,dml_uint_t nomDETInKByte,dml_uint_t RequestHeight256ByteLuma,dml_uint_t RequestHeight256ByteChroma,enum dml_swizzle_mode TilingFormat,dml_uint_t BytePerPixelY,dml_uint_t BytePerPixelC,dml_float_t BytePerPixelDETY,dml_float_t BytePerPixelDETC,enum dml_rotation_angle SourceScan,dml_uint_t * MaxUncompressedBlockLuma,dml_uint_t * MaxUncompressedBlockChroma,dml_uint_t * MaxCompressedBlockLuma,dml_uint_t * MaxCompressedBlockChroma,dml_uint_t * IndependentBlockLuma,dml_uint_t * IndependentBlockChroma)2101 static void CalculateDCCConfiguration(
2102 dml_bool_t DCCEnabled,
2103 dml_bool_t DCCProgrammingAssumesScanDirectionUnknown,
2104 enum dml_source_format_class SourcePixelFormat,
2105 dml_uint_t SurfaceWidthLuma,
2106 dml_uint_t SurfaceWidthChroma,
2107 dml_uint_t SurfaceHeightLuma,
2108 dml_uint_t SurfaceHeightChroma,
2109 dml_uint_t nomDETInKByte,
2110 dml_uint_t RequestHeight256ByteLuma,
2111 dml_uint_t RequestHeight256ByteChroma,
2112 enum dml_swizzle_mode TilingFormat,
2113 dml_uint_t BytePerPixelY,
2114 dml_uint_t BytePerPixelC,
2115 dml_float_t BytePerPixelDETY,
2116 dml_float_t BytePerPixelDETC,
2117 enum dml_rotation_angle SourceScan,
2118 // Output
2119 dml_uint_t *MaxUncompressedBlockLuma,
2120 dml_uint_t *MaxUncompressedBlockChroma,
2121 dml_uint_t *MaxCompressedBlockLuma,
2122 dml_uint_t *MaxCompressedBlockChroma,
2123 dml_uint_t *IndependentBlockLuma,
2124 dml_uint_t *IndependentBlockChroma)
2125 {
2126 (void)SurfaceWidthChroma;
2127 (void)SurfaceHeightChroma;
2128 (void)TilingFormat;
2129 (void)BytePerPixelDETY;
2130 (void)BytePerPixelDETC;
2131 dml_uint_t DETBufferSizeForDCC = nomDETInKByte * 1024;
2132
2133 dml_uint_t yuv420;
2134 dml_uint_t horz_div_l;
2135 dml_uint_t horz_div_c;
2136 dml_uint_t vert_div_l;
2137 dml_uint_t vert_div_c;
2138
2139 dml_uint_t swath_buf_size;
2140 dml_float_t detile_buf_vp_horz_limit;
2141 dml_float_t detile_buf_vp_vert_limit;
2142
2143 dml_uint_t MAS_vp_horz_limit;
2144 dml_uint_t MAS_vp_vert_limit;
2145 dml_uint_t max_vp_horz_width;
2146 dml_uint_t max_vp_vert_height;
2147 dml_uint_t eff_surf_width_l;
2148 dml_uint_t eff_surf_width_c;
2149 dml_uint_t eff_surf_height_l;
2150 dml_uint_t eff_surf_height_c;
2151
2152 dml_uint_t full_swath_bytes_horz_wc_l;
2153 dml_uint_t full_swath_bytes_horz_wc_c;
2154 dml_uint_t full_swath_bytes_vert_wc_l;
2155 dml_uint_t full_swath_bytes_vert_wc_c;
2156
2157 dml_uint_t req128_horz_wc_l;
2158 dml_uint_t req128_horz_wc_c;
2159 dml_uint_t req128_vert_wc_l;
2160 dml_uint_t req128_vert_wc_c;
2161
2162 dml_uint_t segment_order_horz_contiguous_luma;
2163 dml_uint_t segment_order_horz_contiguous_chroma;
2164 dml_uint_t segment_order_vert_contiguous_luma;
2165 dml_uint_t segment_order_vert_contiguous_chroma;
2166
2167 typedef enum{
2168 REQ_256Bytes,
2169 REQ_128BytesNonContiguous,
2170 REQ_128BytesContiguous,
2171 REQ_NA
2172 } RequestType;
2173
2174 RequestType RequestLuma;
2175 RequestType RequestChroma;
2176
2177 yuv420 = ((SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_420_12) ? 1 : 0);
2178 horz_div_l = 1;
2179 horz_div_c = 1;
2180 vert_div_l = 1;
2181 vert_div_c = 1;
2182
2183 if (BytePerPixelY == 1)
2184 vert_div_l = 0;
2185 if (BytePerPixelC == 1)
2186 vert_div_c = 0;
2187
2188 if (BytePerPixelC == 0) {
2189 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
2190 detile_buf_vp_horz_limit = (dml_float_t) swath_buf_size / ((dml_float_t) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
2191 detile_buf_vp_vert_limit = (dml_float_t) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
2192 } else {
2193 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
2194 detile_buf_vp_horz_limit = (dml_float_t) swath_buf_size / ((dml_float_t) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) + (dml_float_t) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
2195 detile_buf_vp_vert_limit = (dml_float_t) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
2196 }
2197
2198 if (SourcePixelFormat == dml_420_10) {
2199 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
2200 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
2201 }
2202
2203 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
2204 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
2205
2206 MAS_vp_horz_limit = SourcePixelFormat == dml_rgbe_alpha ? 3840 : 6144;
2207 MAS_vp_vert_limit = SourcePixelFormat == dml_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
2208 max_vp_horz_width = (dml_uint_t)(dml_min((dml_float_t) MAS_vp_horz_limit, detile_buf_vp_horz_limit));
2209 max_vp_vert_height = (dml_uint_t)(dml_min((dml_float_t) MAS_vp_vert_limit, detile_buf_vp_vert_limit));
2210 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
2211 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
2212 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
2213 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
2214
2215 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
2216 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
2217 if (BytePerPixelC > 0) {
2218 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
2219 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
2220 } else {
2221 full_swath_bytes_horz_wc_c = 0;
2222 full_swath_bytes_vert_wc_c = 0;
2223 }
2224
2225 if (SourcePixelFormat == dml_420_10) {
2226 full_swath_bytes_horz_wc_l = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0));
2227 full_swath_bytes_horz_wc_c = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0));
2228 full_swath_bytes_vert_wc_l = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0));
2229 full_swath_bytes_vert_wc_c = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0));
2230 }
2231
2232 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
2233 req128_horz_wc_l = 0;
2234 req128_horz_wc_c = 0;
2235 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
2236 req128_horz_wc_l = 0;
2237 req128_horz_wc_c = 1;
2238 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
2239 req128_horz_wc_l = 1;
2240 req128_horz_wc_c = 0;
2241 } else {
2242 req128_horz_wc_l = 1;
2243 req128_horz_wc_c = 1;
2244 }
2245
2246 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
2247 req128_vert_wc_l = 0;
2248 req128_vert_wc_c = 0;
2249 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
2250 req128_vert_wc_l = 0;
2251 req128_vert_wc_c = 1;
2252 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
2253 req128_vert_wc_l = 1;
2254 req128_vert_wc_c = 0;
2255 } else {
2256 req128_vert_wc_l = 1;
2257 req128_vert_wc_c = 1;
2258 }
2259
2260 if (BytePerPixelY == 2) {
2261 segment_order_horz_contiguous_luma = 0;
2262 segment_order_vert_contiguous_luma = 1;
2263 } else {
2264 segment_order_horz_contiguous_luma = 1;
2265 segment_order_vert_contiguous_luma = 0;
2266 }
2267
2268 if (BytePerPixelC == 2) {
2269 segment_order_horz_contiguous_chroma = 0;
2270 segment_order_vert_contiguous_chroma = 1;
2271 } else {
2272 segment_order_horz_contiguous_chroma = 1;
2273 segment_order_vert_contiguous_chroma = 0;
2274 }
2275 #ifdef __DML_VBA_DEBUG__
2276 dml_print("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled);
2277 dml_print("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
2278 dml_print("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC);
2279 dml_print("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l);
2280 dml_print("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c);
2281 dml_print("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l);
2282 dml_print("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c);
2283 dml_print("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma);
2284 dml_print("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma);
2285 #endif
2286
2287 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
2288 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
2289 RequestLuma = REQ_256Bytes;
2290 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
2291 RequestLuma = REQ_128BytesNonContiguous;
2292 } else {
2293 RequestLuma = REQ_128BytesContiguous;
2294 }
2295 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
2296 RequestChroma = REQ_256Bytes;
2297 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
2298 RequestChroma = REQ_128BytesNonContiguous;
2299 } else {
2300 RequestChroma = REQ_128BytesContiguous;
2301 }
2302 } else if (!dml_is_vertical_rotation(SourceScan)) {
2303 if (req128_horz_wc_l == 0) {
2304 RequestLuma = REQ_256Bytes;
2305 } else if (segment_order_horz_contiguous_luma == 0) {
2306 RequestLuma = REQ_128BytesNonContiguous;
2307 } else {
2308 RequestLuma = REQ_128BytesContiguous;
2309 }
2310 if (req128_horz_wc_c == 0) {
2311 RequestChroma = REQ_256Bytes;
2312 } else if (segment_order_horz_contiguous_chroma == 0) {
2313 RequestChroma = REQ_128BytesNonContiguous;
2314 } else {
2315 RequestChroma = REQ_128BytesContiguous;
2316 }
2317 } else {
2318 if (req128_vert_wc_l == 0) {
2319 RequestLuma = REQ_256Bytes;
2320 } else if (segment_order_vert_contiguous_luma == 0) {
2321 RequestLuma = REQ_128BytesNonContiguous;
2322 } else {
2323 RequestLuma = REQ_128BytesContiguous;
2324 }
2325 if (req128_vert_wc_c == 0) {
2326 RequestChroma = REQ_256Bytes;
2327 } else if (segment_order_vert_contiguous_chroma == 0) {
2328 RequestChroma = REQ_128BytesNonContiguous;
2329 } else {
2330 RequestChroma = REQ_128BytesContiguous;
2331 }
2332 }
2333
2334 if (RequestLuma == REQ_256Bytes) {
2335 *MaxUncompressedBlockLuma = 256;
2336 *MaxCompressedBlockLuma = 256;
2337 *IndependentBlockLuma = 0;
2338 } else if (RequestLuma == REQ_128BytesContiguous) {
2339 *MaxUncompressedBlockLuma = 256;
2340 *MaxCompressedBlockLuma = 128;
2341 *IndependentBlockLuma = 128;
2342 } else {
2343 *MaxUncompressedBlockLuma = 256;
2344 *MaxCompressedBlockLuma = 64;
2345 *IndependentBlockLuma = 64;
2346 }
2347
2348 if (RequestChroma == REQ_256Bytes) {
2349 *MaxUncompressedBlockChroma = 256;
2350 *MaxCompressedBlockChroma = 256;
2351 *IndependentBlockChroma = 0;
2352 } else if (RequestChroma == REQ_128BytesContiguous) {
2353 *MaxUncompressedBlockChroma = 256;
2354 *MaxCompressedBlockChroma = 128;
2355 *IndependentBlockChroma = 128;
2356 } else {
2357 *MaxUncompressedBlockChroma = 256;
2358 *MaxCompressedBlockChroma = 64;
2359 *IndependentBlockChroma = 64;
2360 }
2361
2362 if (DCCEnabled != true || BytePerPixelC == 0) {
2363 *MaxUncompressedBlockChroma = 0;
2364 *MaxCompressedBlockChroma = 0;
2365 *IndependentBlockChroma = 0;
2366 }
2367
2368 if (DCCEnabled != true) {
2369 *MaxUncompressedBlockLuma = 0;
2370 *MaxCompressedBlockLuma = 0;
2371 *IndependentBlockLuma = 0;
2372 }
2373
2374 #ifdef __DML_VBA_DEBUG__
2375 dml_print("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma);
2376 dml_print("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma);
2377 dml_print("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma);
2378 dml_print("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma);
2379 dml_print("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma);
2380 dml_print("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma);
2381 #endif
2382
2383 } // CalculateDCCConfiguration
2384
CalculatePrefetchSourceLines(dml_float_t VRatio,dml_uint_t VTaps,dml_bool_t Interlace,dml_bool_t ProgressiveToInterlaceUnitInOPP,dml_uint_t SwathHeight,enum dml_rotation_angle SourceScan,dml_bool_t ViewportStationary,dml_uint_t SwathWidth,dml_uint_t ViewportHeight,dml_uint_t ViewportXStart,dml_uint_t ViewportYStart,dml_uint_t * VInitPreFill,dml_uint_t * MaxNumSwath)2385 static dml_uint_t CalculatePrefetchSourceLines(
2386 dml_float_t VRatio,
2387 dml_uint_t VTaps,
2388 dml_bool_t Interlace,
2389 dml_bool_t ProgressiveToInterlaceUnitInOPP,
2390 dml_uint_t SwathHeight,
2391 enum dml_rotation_angle SourceScan,
2392 dml_bool_t ViewportStationary,
2393 dml_uint_t SwathWidth,
2394 dml_uint_t ViewportHeight,
2395 dml_uint_t ViewportXStart,
2396 dml_uint_t ViewportYStart,
2397
2398 // Output
2399 dml_uint_t *VInitPreFill,
2400 dml_uint_t *MaxNumSwath)
2401 {
2402
2403 dml_uint_t vp_start_rot = 0;
2404 dml_uint_t sw0_tmp = 0;
2405 dml_uint_t MaxPartialSwath = 0;
2406 dml_float_t numLines = 0;
2407
2408 #ifdef __DML_VBA_DEBUG__
2409 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
2410 dml_print("DML::%s: VTaps = %u\n", __func__, VTaps);
2411 dml_print("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart);
2412 dml_print("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart);
2413 dml_print("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary);
2414 dml_print("DML::%s: SwathHeight = %u\n", __func__, SwathHeight);
2415 #endif
2416 if (ProgressiveToInterlaceUnitInOPP)
2417 *VInitPreFill = (dml_uint_t)(dml_floor((VRatio + (dml_float_t) VTaps + 1) / 2.0, 1));
2418 else
2419 *VInitPreFill = (dml_uint_t)(dml_floor((VRatio + (dml_float_t) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1));
2420
2421 if (ViewportStationary) {
2422 if (SourceScan == dml_rotation_180 || SourceScan == dml_rotation_180m) {
2423 vp_start_rot = SwathHeight - (((dml_uint_t) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
2424 } else if (SourceScan == dml_rotation_270 || SourceScan == dml_rotation_90m) {
2425 vp_start_rot = ViewportXStart;
2426 } else if (SourceScan == dml_rotation_90 || SourceScan == dml_rotation_270m) {
2427 vp_start_rot = SwathHeight - (((dml_uint_t)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
2428 } else {
2429 vp_start_rot = ViewportYStart;
2430 }
2431 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
2432 if (sw0_tmp < *VInitPreFill) {
2433 *MaxNumSwath = (dml_uint_t)(dml_ceil((*VInitPreFill - sw0_tmp) / (dml_float_t) SwathHeight, 1) + 1);
2434 } else {
2435 *MaxNumSwath = 1;
2436 }
2437 MaxPartialSwath = (dml_uint_t)(dml_max(1, (dml_uint_t) (vp_start_rot + *VInitPreFill - 1) % SwathHeight));
2438 } else {
2439 *MaxNumSwath = (dml_uint_t)(dml_ceil((*VInitPreFill - 1.0) / (dml_float_t) SwathHeight, 1) + 1);
2440 if (*VInitPreFill > 1) {
2441 MaxPartialSwath = (dml_uint_t)(dml_max(1, (dml_uint_t) (*VInitPreFill - 2) % SwathHeight));
2442 } else {
2443 MaxPartialSwath = (dml_uint_t)(dml_max(1, (dml_uint_t) (*VInitPreFill + SwathHeight - 2) % SwathHeight));
2444 }
2445 }
2446 numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
2447
2448 #ifdef __DML_VBA_DEBUG__
2449 dml_print("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot);
2450 dml_print("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill);
2451 dml_print("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath);
2452 dml_print("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath);
2453 dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
2454 #endif
2455 return (dml_uint_t)(numLines);
2456
2457 } // CalculatePrefetchSourceLines
2458
CalculateVMAndRowBytes(dml_bool_t ViewportStationary,dml_bool_t DCCEnable,dml_uint_t NumberOfDPPs,dml_uint_t BlockHeight256Bytes,dml_uint_t BlockWidth256Bytes,enum dml_source_format_class SourcePixelFormat,dml_uint_t SurfaceTiling,dml_uint_t BytePerPixel,enum dml_rotation_angle SourceScan,dml_uint_t SwathWidth,dml_uint_t ViewportHeight,dml_uint_t ViewportXStart,dml_uint_t ViewportYStart,dml_bool_t GPUVMEnable,dml_uint_t GPUVMMaxPageTableLevels,dml_uint_t GPUVMMinPageSizeKBytes,dml_uint_t PTEBufferSizeInRequests,dml_uint_t Pitch,dml_uint_t DCCMetaPitch,dml_uint_t MacroTileWidth,dml_uint_t MacroTileHeight,dml_uint_t * MetaRowByte,dml_uint_t * PixelPTEBytesPerRow,dml_uint_t * PixelPTEBytesPerRowStorage,dml_uint_t * dpte_row_width_ub,dml_uint_t * dpte_row_height,dml_uint_t * dpte_row_height_linear,dml_uint_t * PixelPTEBytesPerRow_one_row_per_frame,dml_uint_t * dpte_row_width_ub_one_row_per_frame,dml_uint_t * dpte_row_height_one_row_per_frame,dml_uint_t * MetaRequestWidth,dml_uint_t * MetaRequestHeight,dml_uint_t * meta_row_width,dml_uint_t * meta_row_height,dml_uint_t * PixelPTEReqWidth,dml_uint_t * PixelPTEReqHeight,dml_uint_t * PTERequestSize,dml_uint_t * DPDE0BytesFrame,dml_uint_t * MetaPTEBytesFrame)2459 static dml_uint_t CalculateVMAndRowBytes(
2460 dml_bool_t ViewportStationary,
2461 dml_bool_t DCCEnable,
2462 dml_uint_t NumberOfDPPs,
2463 dml_uint_t BlockHeight256Bytes,
2464 dml_uint_t BlockWidth256Bytes,
2465 enum dml_source_format_class SourcePixelFormat,
2466 dml_uint_t SurfaceTiling,
2467 dml_uint_t BytePerPixel,
2468 enum dml_rotation_angle SourceScan,
2469 dml_uint_t SwathWidth,
2470 dml_uint_t ViewportHeight,
2471 dml_uint_t ViewportXStart,
2472 dml_uint_t ViewportYStart,
2473 dml_bool_t GPUVMEnable,
2474 dml_uint_t GPUVMMaxPageTableLevels,
2475 dml_uint_t GPUVMMinPageSizeKBytes,
2476 dml_uint_t PTEBufferSizeInRequests,
2477 dml_uint_t Pitch,
2478 dml_uint_t DCCMetaPitch,
2479 dml_uint_t MacroTileWidth,
2480 dml_uint_t MacroTileHeight,
2481
2482 // Output
2483 dml_uint_t *MetaRowByte,
2484 dml_uint_t *PixelPTEBytesPerRow, // for bandwidth calculation
2485 dml_uint_t *PixelPTEBytesPerRowStorage, // for PTE buffer size check
2486 dml_uint_t *dpte_row_width_ub,
2487 dml_uint_t *dpte_row_height,
2488 dml_uint_t *dpte_row_height_linear,
2489 dml_uint_t *PixelPTEBytesPerRow_one_row_per_frame,
2490 dml_uint_t *dpte_row_width_ub_one_row_per_frame,
2491 dml_uint_t *dpte_row_height_one_row_per_frame,
2492 dml_uint_t *MetaRequestWidth,
2493 dml_uint_t *MetaRequestHeight,
2494 dml_uint_t *meta_row_width,
2495 dml_uint_t *meta_row_height,
2496 dml_uint_t *PixelPTEReqWidth,
2497 dml_uint_t *PixelPTEReqHeight,
2498 dml_uint_t *PTERequestSize,
2499 dml_uint_t *DPDE0BytesFrame,
2500 dml_uint_t *MetaPTEBytesFrame)
2501 {
2502 (void)SourcePixelFormat;
2503 dml_uint_t MPDEBytesFrame;
2504 dml_uint_t DCCMetaSurfaceBytes;
2505 dml_uint_t ExtraDPDEBytesFrame;
2506 dml_uint_t PDEAndMetaPTEBytesFrame;
2507 dml_uint_t MacroTileSizeBytes;
2508 dml_uint_t vp_height_meta_ub;
2509 dml_uint_t vp_height_dpte_ub;
2510
2511 dml_uint_t PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
2512
2513 *MetaRequestHeight = 8 * BlockHeight256Bytes;
2514 *MetaRequestWidth = 8 * BlockWidth256Bytes;
2515 if (SurfaceTiling == dml_sw_linear) {
2516 *meta_row_height = 32;
2517 *meta_row_width = (dml_uint_t)(dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth));
2518 } else if (!dml_is_vertical_rotation(SourceScan)) {
2519 *meta_row_height = *MetaRequestHeight;
2520 if (ViewportStationary && NumberOfDPPs == 1) {
2521 *meta_row_width = (dml_uint_t)(dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth));
2522 } else {
2523 *meta_row_width = (dml_uint_t)(dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth);
2524 }
2525 *MetaRowByte = (dml_uint_t)(*meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0);
2526 } else {
2527 *meta_row_height = *MetaRequestWidth;
2528 if (ViewportStationary && NumberOfDPPs == 1) {
2529 *meta_row_width = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1, *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight));
2530 } else {
2531 *meta_row_width = (dml_uint_t)(dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight);
2532 }
2533 *MetaRowByte = (dml_uint_t)(*meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0);
2534 }
2535
2536 if (ViewportStationary && (NumberOfDPPs == 1 || !dml_is_vertical_rotation(SourceScan))) {
2537 vp_height_meta_ub = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1, 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes));
2538 } else if (!dml_is_vertical_rotation(SourceScan)) {
2539 vp_height_meta_ub = (dml_uint_t)(dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes);
2540 } else {
2541 vp_height_meta_ub = (dml_uint_t)(dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes);
2542 }
2543
2544 DCCMetaSurfaceBytes = (dml_uint_t)(DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0);
2545
2546 if (GPUVMEnable == true) {
2547 *MetaPTEBytesFrame = (dml_uint_t)((dml_ceil((dml_float_t) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64);
2548 MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
2549 } else {
2550 *MetaPTEBytesFrame = 0;
2551 MPDEBytesFrame = 0;
2552 }
2553
2554 if (DCCEnable != true) {
2555 *MetaPTEBytesFrame = 0;
2556 MPDEBytesFrame = 0;
2557 *MetaRowByte = 0;
2558 }
2559
2560 MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
2561
2562 if (ViewportStationary && (NumberOfDPPs == 1 || !dml_is_vertical_rotation(SourceScan))) {
2563 vp_height_dpte_ub = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + MacroTileHeight - 1, MacroTileHeight) - dml_floor(ViewportYStart, MacroTileHeight));
2564 } else if (!dml_is_vertical_rotation(SourceScan)) {
2565 vp_height_dpte_ub = (dml_uint_t)(dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight);
2566 } else {
2567 vp_height_dpte_ub = (dml_uint_t)(dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight);
2568 }
2569
2570 if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
2571 *DPDE0BytesFrame = (dml_uint_t)(64 * (dml_ceil((dml_float_t) (Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) / (dml_float_t) (8 * 2097152), 1) + 1));
2572 ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
2573 } else {
2574 *DPDE0BytesFrame = 0;
2575 ExtraDPDEBytesFrame = 0;
2576 }
2577
2578 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2579
2580 #ifdef __DML_VBA_DEBUG__
2581 dml_print("DML::%s: DCCEnable = %u\n", __func__, DCCEnable);
2582 dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
2583 dml_print("DML::%s: SwModeLinear = %u\n", __func__, SurfaceTiling == dml_sw_linear);
2584 dml_print("DML::%s: BytePerPixel = %u\n", __func__, BytePerPixel);
2585 dml_print("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, GPUVMMaxPageTableLevels);
2586 dml_print("DML::%s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes);
2587 dml_print("DML::%s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes);
2588 dml_print("DML::%s: MacroTileHeight = %u\n", __func__, MacroTileHeight);
2589 dml_print("DML::%s: MacroTileWidth = %u\n", __func__, MacroTileWidth);
2590 dml_print("DML::%s: MetaPTEBytesFrame = %u\n", __func__, *MetaPTEBytesFrame);
2591 dml_print("DML::%s: MPDEBytesFrame = %u\n", __func__, MPDEBytesFrame);
2592 dml_print("DML::%s: DPDE0BytesFrame = %u\n", __func__, *DPDE0BytesFrame);
2593 dml_print("DML::%s: ExtraDPDEBytesFrame= %u\n", __func__, ExtraDPDEBytesFrame);
2594 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, PDEAndMetaPTEBytesFrame);
2595 dml_print("DML::%s: ViewportHeight = %u\n", __func__, ViewportHeight);
2596 dml_print("DML::%s: SwathWidth = %u\n", __func__, SwathWidth);
2597 dml_print("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub);
2598 #endif
2599
2600 if (SurfaceTiling == dml_sw_linear) {
2601 *PixelPTEReqHeight = 1;
2602 *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2603 PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2604 *PTERequestSize = 64;
2605 } else if (GPUVMMinPageSizeKBytes == 4) {
2606 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2607 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2608 *PTERequestSize = 128;
2609 } else {
2610 *PixelPTEReqHeight = MacroTileHeight;
2611 *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
2612 *PTERequestSize = 64;
2613 }
2614 #ifdef __DML_VBA_DEBUG__
2615 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes);
2616 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
2617 dml_print("DML::%s: PixelPTEReqHeight = %u\n", __func__, *PixelPTEReqHeight);
2618 dml_print("DML::%s: PixelPTEReqWidth = %u\n", __func__, *PixelPTEReqWidth);
2619 dml_print("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear);
2620 dml_print("DML::%s: PTERequestSize = %u\n", __func__, *PTERequestSize);
2621 dml_print("DML::%s: Pitch = %u\n", __func__, Pitch);
2622 #endif
2623
2624 *dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
2625 *dpte_row_width_ub_one_row_per_frame = (dml_uint_t)((dml_ceil(((dml_float_t)Pitch * (dml_float_t) *dpte_row_height_one_row_per_frame / (dml_float_t) *PixelPTEReqHeight - 1) / (dml_float_t) *PixelPTEReqWidth, 1) + 1) * (dml_float_t) *PixelPTEReqWidth);
2626 *PixelPTEBytesPerRow_one_row_per_frame = (dml_uint_t)((dml_float_t) *dpte_row_width_ub_one_row_per_frame / (dml_float_t) *PixelPTEReqWidth * *PTERequestSize);
2627
2628 if (SurfaceTiling == dml_sw_linear) {
2629 *dpte_row_height = (dml_uint_t)(dml_min(128, 1 << (dml_uint_t) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)));
2630 dml_print("DML::%s: dpte_row_height term 1 = %u\n", __func__, PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
2631 dml_print("DML::%s: dpte_row_height term 2 = %f\n", __func__, dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
2632 dml_print("DML::%s: dpte_row_height term 3 = %f\n", __func__, dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2633 dml_print("DML::%s: dpte_row_height term 4 = %u\n", __func__, 1 << (dml_uint_t) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2634 dml_print("DML::%s: dpte_row_height = %u\n", __func__, *dpte_row_height);
2635
2636 *dpte_row_width_ub = (dml_uint_t)(dml_ceil(((dml_float_t) Pitch * (dml_float_t) *dpte_row_height - 1), (dml_float_t) *PixelPTEReqWidth) + *PixelPTEReqWidth);
2637 *PixelPTEBytesPerRow = (dml_uint_t)((dml_float_t) *dpte_row_width_ub / (dml_float_t) *PixelPTEReqWidth * *PTERequestSize);
2638
2639 // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
2640 *dpte_row_height_linear = 1 << (dml_uint_t) dml_floor(dml_log2(PTEBufferSizeInRequests * PixelPTEReqWidth_linear / Pitch), 1);
2641 if (*dpte_row_height_linear > 128)
2642 *dpte_row_height_linear = 128;
2643
2644 #ifdef __DML_VBA_DEBUG__
2645 dml_print("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *dpte_row_width_ub);
2646 #endif
2647
2648 } else if (!dml_is_vertical_rotation(SourceScan)) {
2649 *dpte_row_height = *PixelPTEReqHeight;
2650
2651 if (GPUVMMinPageSizeKBytes > 64) {
2652 *dpte_row_width_ub = (dml_uint_t)((dml_ceil(((dml_float_t) Pitch * (dml_float_t) *dpte_row_height / (dml_float_t) *PixelPTEReqHeight - 1) / (dml_float_t) *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth);
2653 } else if (ViewportStationary && (NumberOfDPPs == 1)) {
2654 *dpte_row_width_ub = (dml_uint_t)(dml_floor(ViewportXStart + SwathWidth + *PixelPTEReqWidth - 1, *PixelPTEReqWidth) - dml_floor(ViewportXStart, *PixelPTEReqWidth));
2655 } else {
2656 *dpte_row_width_ub = (dml_uint_t)((dml_ceil((dml_float_t) (SwathWidth - 1) / (dml_float_t)*PixelPTEReqWidth, 1) + 1.0) * *PixelPTEReqWidth);
2657 }
2658 #ifdef __DML_VBA_DEBUG__
2659 dml_print("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *dpte_row_width_ub);
2660 #endif
2661
2662 ASSERT(*PixelPTEReqWidth);
2663 if (*PixelPTEReqWidth != 0)
2664 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2665 } else {
2666 *dpte_row_height = (dml_uint_t)(dml_min(*PixelPTEReqWidth, MacroTileWidth));
2667
2668 if (ViewportStationary && (NumberOfDPPs == 1)) {
2669 *dpte_row_width_ub = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1, *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight));
2670 } else {
2671 *dpte_row_width_ub = (dml_uint_t)((dml_ceil((dml_float_t) (SwathWidth - 1) / (dml_float_t) *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight);
2672 }
2673
2674 *PixelPTEBytesPerRow = (dml_uint_t)((dml_float_t) *dpte_row_width_ub / (dml_float_t) *PixelPTEReqHeight * *PTERequestSize);
2675 #ifdef __DML_VBA_DEBUG__
2676 dml_print("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *dpte_row_width_ub);
2677 #endif
2678 }
2679
2680 if (GPUVMEnable != true)
2681 *PixelPTEBytesPerRow = 0;
2682
2683 *PixelPTEBytesPerRowStorage = *PixelPTEBytesPerRow;
2684
2685 #ifdef __DML_VBA_DEBUG__
2686 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes);
2687 dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
2688 dml_print("DML::%s: dpte_row_height = %u\n", __func__, *dpte_row_height);
2689 dml_print("DML::%s: dpte_row_height_linear = %u\n", __func__, *dpte_row_height_linear);
2690 dml_print("DML::%s: dpte_row_width_ub = %u\n", __func__, *dpte_row_width_ub);
2691 dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *PixelPTEBytesPerRow);
2692 dml_print("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *PixelPTEBytesPerRowStorage);
2693 dml_print("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests);
2694 dml_print("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *dpte_row_height_one_row_per_frame);
2695 dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *dpte_row_width_ub_one_row_per_frame);
2696 dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *PixelPTEBytesPerRow_one_row_per_frame);
2697 #endif
2698
2699 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
2700
2701 return PDEAndMetaPTEBytesFrame;
2702 } // CalculateVMAndRowBytes
2703
PixelClockAdjustmentForProgressiveToInterlaceUnit(struct dml_display_cfg_st * display_cfg,dml_bool_t ptoi_supported)2704 static void PixelClockAdjustmentForProgressiveToInterlaceUnit(struct dml_display_cfg_st *display_cfg, dml_bool_t ptoi_supported)
2705 {
2706 dml_uint_t num_active_planes = dml_get_num_active_planes(display_cfg);
2707
2708 //Progressive To Interlace Unit Effect
2709 for (dml_uint_t k = 0; k < num_active_planes; ++k) {
2710 display_cfg->output.PixelClockBackEnd[k] = display_cfg->timing.PixelClock[k];
2711 if (display_cfg->timing.Interlace[k] == 1 && ptoi_supported == true) {
2712 display_cfg->timing.PixelClock[k] = 2 * display_cfg->timing.PixelClock[k];
2713 }
2714 }
2715 }
2716
TruncToValidBPP(dml_float_t LinkBitRate,dml_uint_t Lanes,dml_uint_t HTotal,dml_uint_t HActive,dml_float_t PixelClock,dml_float_t DesiredBPP,dml_bool_t DSCEnable,enum dml_output_encoder_class Output,enum dml_output_format_class Format,dml_uint_t DSCInputBitPerComponent,dml_uint_t DSCSlices,dml_uint_t AudioRate,dml_uint_t AudioLayout,enum dml_odm_mode ODMModeNoDSC,enum dml_odm_mode ODMModeDSC,dml_uint_t * RequiredSlots)2717 static dml_float_t TruncToValidBPP(
2718 dml_float_t LinkBitRate,
2719 dml_uint_t Lanes,
2720 dml_uint_t HTotal,
2721 dml_uint_t HActive,
2722 dml_float_t PixelClock,
2723 dml_float_t DesiredBPP,
2724 dml_bool_t DSCEnable,
2725 enum dml_output_encoder_class Output,
2726 enum dml_output_format_class Format,
2727 dml_uint_t DSCInputBitPerComponent,
2728 dml_uint_t DSCSlices,
2729 dml_uint_t AudioRate,
2730 dml_uint_t AudioLayout,
2731 enum dml_odm_mode ODMModeNoDSC,
2732 enum dml_odm_mode ODMModeDSC,
2733
2734 // Output
2735 dml_uint_t *RequiredSlots)
2736 {
2737 dml_float_t MaxLinkBPP;
2738 dml_uint_t MinDSCBPP;
2739 dml_float_t MaxDSCBPP;
2740 dml_uint_t NonDSCBPP0;
2741 dml_uint_t NonDSCBPP1;
2742 dml_uint_t NonDSCBPP2;
2743
2744 if (Format == dml_420) {
2745 NonDSCBPP0 = 12;
2746 NonDSCBPP1 = 15;
2747 NonDSCBPP2 = 18;
2748 MinDSCBPP = 6;
2749 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
2750 } else if (Format == dml_444) {
2751 NonDSCBPP0 = 24;
2752 NonDSCBPP1 = 30;
2753 NonDSCBPP2 = 36;
2754 MinDSCBPP = 8;
2755 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
2756 } else {
2757 if (Output == dml_hdmi) {
2758 NonDSCBPP0 = 24;
2759 NonDSCBPP1 = 24;
2760 NonDSCBPP2 = 24;
2761 } else {
2762 NonDSCBPP0 = 16;
2763 NonDSCBPP1 = 20;
2764 NonDSCBPP2 = 24;
2765 }
2766 if (Format == dml_n422) {
2767 MinDSCBPP = 7;
2768 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
2769 } else {
2770 MinDSCBPP = 8;
2771 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
2772 }
2773 }
2774
2775 if (Output == dml_dp2p0) {
2776 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128.0 / 132.0 * 383.0 / 384.0 * 65536.0 / 65540.0;
2777 } else if (DSCEnable && Output == dml_dp) {
2778 MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock * (1 - 2.4 / 100);
2779 } else {
2780 MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock;
2781 }
2782
2783 if (DSCEnable) {
2784 if (ODMModeDSC == dml_odm_mode_combine_4to1) {
2785 MaxLinkBPP = dml_min(MaxLinkBPP, 16);
2786 } else if (ODMModeDSC == dml_odm_mode_combine_2to1) {
2787 MaxLinkBPP = dml_min(MaxLinkBPP, 32);
2788 } else if (ODMModeDSC == dml_odm_mode_split_1to2) {
2789 MaxLinkBPP = 2 * MaxLinkBPP;
2790 }
2791 } else {
2792 if (ODMModeNoDSC == dml_odm_mode_combine_4to1) {
2793 MaxLinkBPP = dml_min(MaxLinkBPP, 16);
2794 } else if (ODMModeNoDSC == dml_odm_mode_combine_2to1) {
2795 MaxLinkBPP = dml_min(MaxLinkBPP, 32);
2796 } else if (ODMModeNoDSC == dml_odm_mode_split_1to2) {
2797 MaxLinkBPP = 2 * MaxLinkBPP;
2798 }
2799 }
2800
2801 *RequiredSlots = (dml_uint_t)(dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1));
2802
2803 if (DesiredBPP == 0) {
2804 if (DSCEnable) {
2805 if (MaxLinkBPP < MinDSCBPP) {
2806 return __DML_DPP_INVALID__;
2807 } else if (MaxLinkBPP >= MaxDSCBPP) {
2808 return MaxDSCBPP;
2809 } else {
2810 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
2811 }
2812 } else {
2813 if (MaxLinkBPP >= NonDSCBPP2) {
2814 return NonDSCBPP2;
2815 } else if (MaxLinkBPP >= NonDSCBPP1) {
2816 return NonDSCBPP1;
2817 } else if (MaxLinkBPP >= NonDSCBPP0) {
2818 return NonDSCBPP0;
2819 } else {
2820 return __DML_DPP_INVALID__;
2821 }
2822 }
2823 } else {
2824 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0)) ||
2825 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
2826 return __DML_DPP_INVALID__;
2827 } else {
2828 return DesiredBPP;
2829 }
2830 }
2831 } // TruncToValidBPP
2832
CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(struct display_mode_lib_scratch_st * scratch,struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st * p)2833 static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
2834 struct display_mode_lib_scratch_st *scratch,
2835 struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *p)
2836 {
2837 struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals_st *s = &scratch->CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals;
2838
2839 s->TotalActiveWriteback = 0;
2840 p->Watermark->UrgentWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency;
2841 p->Watermark->USRRetrainingWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency + p->mmSOCParameters.USRRetrainingLatency + p->mmSOCParameters.SMNLatency;
2842 p->Watermark->DRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->Watermark->UrgentWatermark;
2843 p->Watermark->FCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->Watermark->UrgentWatermark;
2844 p->Watermark->StutterExitWatermark = p->mmSOCParameters.SRExitTime + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep;
2845 p->Watermark->StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitTime + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep;
2846 p->Watermark->Z8StutterExitWatermark = p->mmSOCParameters.SRExitZ8Time + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep;
2847 p->Watermark->Z8StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitZ8Time + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep;
2848
2849 #ifdef __DML_VBA_DEBUG__
2850 dml_print("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency);
2851 dml_print("DML::%s: ExtraLatency = %f\n", __func__, p->mmSOCParameters.ExtraLatency);
2852 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, p->mmSOCParameters.DRAMClockChangeLatency);
2853 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark);
2854 dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, p->Watermark->USRRetrainingWatermark);
2855 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, p->Watermark->DRAMClockChangeWatermark);
2856 dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, p->Watermark->FCLKChangeWatermark);
2857 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, p->Watermark->StutterExitWatermark);
2858 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark);
2859 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark);
2860 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark);
2861 #endif
2862
2863 s->TotalActiveWriteback = 0;
2864 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
2865 if (p->WritebackEnable[k] == true) {
2866 s->TotalActiveWriteback = s->TotalActiveWriteback + 1;
2867 }
2868 }
2869
2870 if (s->TotalActiveWriteback <= 1) {
2871 p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency;
2872 } else {
2873 p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK;
2874 }
2875 if (p->USRRetrainingRequiredFinal)
2876 p->Watermark->WritebackUrgentWatermark = p->Watermark->WritebackUrgentWatermark + p->mmSOCParameters.USRRetrainingLatency;
2877
2878 if (s->TotalActiveWriteback <= 1) {
2879 p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency;
2880 p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency;
2881 } else {
2882 p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK;
2883 p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024 / 32 / p->SOCCLK;
2884 }
2885
2886 if (p->USRRetrainingRequiredFinal)
2887 p->Watermark->WritebackDRAMClockChangeWatermark = p->Watermark->WritebackDRAMClockChangeWatermark + p->mmSOCParameters.USRRetrainingLatency;
2888
2889 if (p->USRRetrainingRequiredFinal)
2890 p->Watermark->WritebackFCLKChangeWatermark = p->Watermark->WritebackFCLKChangeWatermark + p->mmSOCParameters.USRRetrainingLatency;
2891
2892 #ifdef __DML_VBA_DEBUG__
2893 dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", __func__, p->Watermark->WritebackDRAMClockChangeWatermark);
2894 dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, p->Watermark->WritebackFCLKChangeWatermark);
2895 dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, p->Watermark->WritebackUrgentWatermark);
2896 dml_print("DML::%s: USRRetrainingRequiredFinal = %u\n", __func__, p->USRRetrainingRequiredFinal);
2897 dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, p->mmSOCParameters.USRRetrainingLatency);
2898 #endif
2899
2900 s->TotalPixelBW = 0.0;
2901 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
2902 s->TotalPixelBW = s->TotalPixelBW + p->DPPPerSurface[k]
2903 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * p->VRatio[k] + p->SwathWidthC[k] * p->BytePerPixelDETC[k] * p->VRatioChroma[k]) / (p->HTotal[k] / p->PixelClock[k]);
2904 }
2905
2906 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
2907
2908 s->LBLatencyHidingSourceLinesY[k] = (dml_uint_t)(dml_min((dml_float_t)p->MaxLineBufferLines, dml_floor((dml_float_t)p->LineBufferSize / (dml_float_t)p->LBBitPerPixel[k] / ((dml_float_t)p->SwathWidthY[k] / dml_max(p->HRatio[k], 1.0)), 1)) - (p->VTaps[k] - 1));
2909 s->LBLatencyHidingSourceLinesC[k] = (dml_uint_t)(dml_min((dml_float_t)p->MaxLineBufferLines, dml_floor((dml_float_t)p->LineBufferSize / (dml_float_t)p->LBBitPerPixel[k] / ((dml_float_t)p->SwathWidthC[k] / dml_max(p->HRatioChroma[k], 1.0)), 1)) - (p->VTapsChroma[k] - 1));
2910
2911
2912 #ifdef __DML_VBA_DEBUG__
2913 dml_print("DML::%s: k=%u, MaxLineBufferLines = %u\n", __func__, k, p->MaxLineBufferLines);
2914 dml_print("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize);
2915 dml_print("DML::%s: k=%u, LBBitPerPixel = %u\n", __func__, k, p->LBBitPerPixel[k]);
2916 dml_print("DML::%s: k=%u, HRatio = %f\n", __func__, k, p->HRatio[k]);
2917 dml_print("DML::%s: k=%u, VTaps = %u\n", __func__, k, p->VTaps[k]);
2918 #endif
2919
2920 s->EffectiveLBLatencyHidingY = s->LBLatencyHidingSourceLinesY[k] / p->VRatio[k] * (p->HTotal[k] / p->PixelClock[k]);
2921 s->EffectiveLBLatencyHidingC = s->LBLatencyHidingSourceLinesC[k] / p->VRatioChroma[k] * (p->HTotal[k] / p->PixelClock[k]);
2922
2923 s->EffectiveDETBufferSizeY = p->DETBufferSizeY[k];
2924 if (p->UnboundedRequestEnabled) {
2925 s->EffectiveDETBufferSizeY = s->EffectiveDETBufferSizeY + p->CompressedBufferSizeInkByte * 1024 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * p->VRatio[k]) / (p->HTotal[k] / p->PixelClock[k]) / s->TotalPixelBW;
2926 }
2927
2928 s->LinesInDETY[k] = (dml_float_t)s->EffectiveDETBufferSizeY / p->BytePerPixelDETY[k] / p->SwathWidthY[k];
2929 s->LinesInDETYRoundedDownToSwath[k] = (dml_uint_t)(dml_floor(s->LinesInDETY[k], p->SwathHeightY[k]));
2930 s->FullDETBufferingTimeY = s->LinesInDETYRoundedDownToSwath[k] * (p->HTotal[k] / p->PixelClock[k]) / p->VRatio[k];
2931
2932 s->ActiveClockChangeLatencyHidingY = s->EffectiveLBLatencyHidingY + s->FullDETBufferingTimeY - ((dml_float_t)p->DSTXAfterScaler[k] / (dml_float_t)p->HTotal[k] + (dml_float_t)p->DSTYAfterScaler[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k];
2933
2934 if (p->NumberOfActiveSurfaces > 1) {
2935 s->ActiveClockChangeLatencyHidingY = s->ActiveClockChangeLatencyHidingY - (1.0 - 1.0 / (dml_float_t)p->NumberOfActiveSurfaces) * (dml_float_t)p->SwathHeightY[k] * (dml_float_t)p->HTotal[k] / p->PixelClock[k] / p->VRatio[k];
2936 }
2937
2938 if (p->BytePerPixelDETC[k] > 0) {
2939 s->LinesInDETC[k] = p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k];
2940 s->LinesInDETCRoundedDownToSwath[k] = (dml_uint_t)(dml_floor(s->LinesInDETC[k], p->SwathHeightC[k]));
2941 s->FullDETBufferingTimeC = s->LinesInDETCRoundedDownToSwath[k] * (p->HTotal[k] / p->PixelClock[k]) / p->VRatioChroma[k];
2942 s->ActiveClockChangeLatencyHidingC = s->EffectiveLBLatencyHidingC + s->FullDETBufferingTimeC - ((dml_float_t)p->DSTXAfterScaler[k] / (dml_float_t)p->HTotal[k] + (dml_float_t)p->DSTYAfterScaler[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k];
2943 if (p->NumberOfActiveSurfaces > 1) {
2944 s->ActiveClockChangeLatencyHidingC = s->ActiveClockChangeLatencyHidingC - (1.0 - 1.0 / (dml_float_t)p->NumberOfActiveSurfaces) * (dml_float_t)p->SwathHeightC[k] * (dml_float_t)p->HTotal[k] / p->PixelClock[k] / p->VRatioChroma[k];
2945 }
2946 s->ActiveClockChangeLatencyHiding = dml_min(s->ActiveClockChangeLatencyHidingY, s->ActiveClockChangeLatencyHidingC);
2947 } else {
2948 s->ActiveClockChangeLatencyHiding = s->ActiveClockChangeLatencyHidingY;
2949 }
2950
2951 s->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->UrgentWatermark - p->Watermark->DRAMClockChangeWatermark;
2952 s->ActiveFCLKChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->UrgentWatermark - p->Watermark->FCLKChangeWatermark;
2953 s->USRRetrainingLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->USRRetrainingWatermark;
2954
2955 if (p->WritebackEnable[k]) {
2956 s->WritebackLatencyHiding = (dml_float_t)p->WritebackInterfaceBufferSize * 1024.0 / ((dml_float_t)p->WritebackDestinationWidth[k] * (dml_float_t)p->WritebackDestinationHeight[k] / ((dml_float_t)p->WritebackSourceHeight[k] * (dml_float_t)p->HTotal[k] / p->PixelClock[k]) * 4.0);
2957 if (p->WritebackPixelFormat[k] == dml_444_64) {
2958 s->WritebackLatencyHiding = s->WritebackLatencyHiding / 2;
2959 }
2960 s->WritebackDRAMClockChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackDRAMClockChangeWatermark;
2961
2962 s->WritebackFCLKChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackFCLKChangeWatermark;
2963
2964 s->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(s->ActiveDRAMClockChangeLatencyMargin[k], s->WritebackFCLKChangeLatencyMargin);
2965 s->ActiveFCLKChangeLatencyMargin[k] = dml_min(s->ActiveFCLKChangeLatencyMargin[k], s->WritebackDRAMClockChangeLatencyMargin);
2966 }
2967 p->MaxActiveDRAMClockChangeLatencySupported[k] = (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) ? 0 : (s->ActiveDRAMClockChangeLatencyMargin[k] + p->mmSOCParameters.DRAMClockChangeLatency);
2968 p->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveDRAMClockChangeLatencyMargin[k];
2969 }
2970
2971 *p->USRRetrainingSupport = true;
2972 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
2973 if ((p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) && (s->USRRetrainingLatencyMargin[k] < 0)) {
2974 *p->USRRetrainingSupport = false;
2975 }
2976 }
2977
2978 s->FoundCriticalSurface = false;
2979 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
2980 if ((p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) && ((!s->FoundCriticalSurface)
2981 || ((s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency) < *p->MaxActiveFCLKChangeLatencySupported))) {
2982 s->FoundCriticalSurface = true;
2983 *p->MaxActiveFCLKChangeLatencySupported = s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency;
2984 }
2985 }
2986
2987 for (dml_uint_t i = 0; i < p->NumberOfActiveSurfaces; ++i) {
2988 for (dml_uint_t j = 0; j < p->NumberOfActiveSurfaces; ++j) {
2989 if (i == j ||
2990 (p->BlendingAndTiming[i] == i && p->BlendingAndTiming[j] == i) ||
2991 (p->BlendingAndTiming[j] == j && p->BlendingAndTiming[i] == j) ||
2992 (p->BlendingAndTiming[i] == p->BlendingAndTiming[j] && p->BlendingAndTiming[i] != i) ||
2993 (p->SynchronizeTimingsFinal && p->PixelClock[i] == p->PixelClock[j] && p->HTotal[i] == p->HTotal[j] && p->VTotal[i] == p->VTotal[j] && p->VActive[i] == p->VActive[j]) ||
2994 (p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && (p->DRRDisplay[i] || p->DRRDisplay[j]))) {
2995 s->SynchronizedSurfaces[i][j] = true;
2996 } else {
2997 s->SynchronizedSurfaces[i][j] = false;
2998 }
2999 }
3000 }
3001
3002 s->FCLKChangeSupportNumber = 0;
3003 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3004 if ((p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) && (s->ActiveFCLKChangeLatencyMargin[k] < 0)) {
3005 if (!(p->PrefetchMode[k] <= 1)) {
3006 s->FCLKChangeSupportNumber = 3;
3007 } else if (s->FCLKChangeSupportNumber == 0) {
3008 s->FCLKChangeSupportNumber = ((p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && p->DRRDisplay[k]) ? 2 : 1);
3009 s->LastSurfaceWithoutMargin = k;
3010 } else if (((s->FCLKChangeSupportNumber == 1) && (p->DRRDisplay[k] || (!s->SynchronizedSurfaces[s->LastSurfaceWithoutMargin][k]))) || (s->FCLKChangeSupportNumber == 2))
3011 s->FCLKChangeSupportNumber = 3;
3012 }
3013 }
3014
3015 if (s->FCLKChangeSupportNumber == 0) {
3016 *p->FCLKChangeSupport = dml_fclock_change_vactive;
3017 } else if ((s->FCLKChangeSupportNumber == 1) || (s->FCLKChangeSupportNumber == 2)) {
3018 *p->FCLKChangeSupport = dml_fclock_change_vblank;
3019 } else {
3020 *p->FCLKChangeSupport = dml_fclock_change_unsupported;
3021 }
3022
3023 s->DRAMClockChangeMethod = 0;
3024 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3025 if (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame)
3026 s->DRAMClockChangeMethod = 1;
3027 else if (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport)
3028 s->DRAMClockChangeMethod = 2;
3029 }
3030
3031 s->DRAMClockChangeSupportNumber = 0;
3032 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3033 if (((s->DRAMClockChangeMethod == 0) && (s->ActiveDRAMClockChangeLatencyMargin[k] < 0)) ||
3034 ((s->DRAMClockChangeMethod == 1) && (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_full_frame)) ||
3035 ((s->DRAMClockChangeMethod == 2) && (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_sub_viewport) && (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe))) {
3036 if (p->PrefetchMode[k] != 0) { // Don't need to support DRAM clock change, PrefetchMode 0 means needs DRAM clock change support
3037 s->DRAMClockChangeSupportNumber = 3;
3038 } else if (s->DRAMClockChangeSupportNumber == 0) {
3039 s->DRAMClockChangeSupportNumber = (p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && p->DRRDisplay[k]) ? 2 : 1;
3040 s->LastSurfaceWithoutMargin = k;
3041 } else if (((s->DRAMClockChangeSupportNumber == 1) && (p->DRRDisplay[k] || !s->SynchronizedSurfaces[s->LastSurfaceWithoutMargin][k])) || (s->DRAMClockChangeSupportNumber == 2)) {
3042 s->DRAMClockChangeSupportNumber = 3;
3043 }
3044 }
3045 }
3046
3047 if (s->DRAMClockChangeMethod == 0) { // No MALL usage
3048 if (s->DRAMClockChangeSupportNumber == 0) {
3049 *p->DRAMClockChangeSupport = dml_dram_clock_change_vactive;
3050 } else if (s->DRAMClockChangeSupportNumber == 1) {
3051 *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank;
3052 } else if (s->DRAMClockChangeSupportNumber == 2) {
3053 *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_drr;
3054 } else {
3055 *p->DRAMClockChangeSupport = dml_dram_clock_change_unsupported;
3056 }
3057 } else if (s->DRAMClockChangeMethod == 1) { // Any pipe using MALL full frame
3058 if (s->DRAMClockChangeSupportNumber == 0) {
3059 *p->DRAMClockChangeSupport = dml_dram_clock_change_vactive_w_mall_full_frame;
3060 } else if (s->DRAMClockChangeSupportNumber == 1) {
3061 *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_w_mall_full_frame;
3062 } else if (s->DRAMClockChangeSupportNumber == 2) {
3063 *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_drr_w_mall_full_frame;
3064 } else {
3065 *p->DRAMClockChangeSupport = dml_dram_clock_change_unsupported;
3066 }
3067 } else { // Any pipe using MALL subviewport
3068 if (s->DRAMClockChangeSupportNumber == 0) {
3069 *p->DRAMClockChangeSupport = dml_dram_clock_change_vactive_w_mall_sub_vp;
3070 } else if (s->DRAMClockChangeSupportNumber == 1) {
3071 *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_w_mall_sub_vp;
3072 } else if (s->DRAMClockChangeSupportNumber == 2) {
3073 *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_drr_w_mall_sub_vp;
3074 } else {
3075 *p->DRAMClockChangeSupport = dml_dram_clock_change_unsupported;
3076 }
3077 }
3078
3079 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3080 s->dst_y_pstate = (dml_uint_t)(dml_ceil((p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.UrgentLatency) / (p->HTotal[k] / p->PixelClock[k]), 1));
3081 s->src_y_pstate_l = (dml_uint_t)(dml_ceil(s->dst_y_pstate * p->VRatio[k], p->SwathHeightY[k]));
3082 s->src_y_ahead_l = (dml_uint_t)(dml_floor(p->DETBufferSizeY[k] / p->BytePerPixelDETY[k] / p->SwathWidthY[k], p->SwathHeightY[k]) + s->LBLatencyHidingSourceLinesY[k]);
3083 s->sub_vp_lines_l = s->src_y_pstate_l + s->src_y_ahead_l + p->meta_row_height[k];
3084
3085 #ifdef __DML_VBA_DEBUG__
3086 dml_print("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
3087 dml_print("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
3088 dml_print("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
3089 dml_print("DML::%s: k=%u, SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
3090 dml_print("DML::%s: k=%u, LBLatencyHidingSourceLinesY = %u\n", __func__, k, s->LBLatencyHidingSourceLinesY[k]);
3091 dml_print("DML::%s: k=%u, dst_y_pstate = %u\n", __func__, k, s->dst_y_pstate);
3092 dml_print("DML::%s: k=%u, src_y_pstate_l = %u\n", __func__, k, s->src_y_pstate_l);
3093 dml_print("DML::%s: k=%u, src_y_ahead_l = %u\n", __func__, k, s->src_y_ahead_l);
3094 dml_print("DML::%s: k=%u, meta_row_height = %u\n", __func__, k, p->meta_row_height[k]);
3095 dml_print("DML::%s: k=%u, sub_vp_lines_l = %u\n", __func__, k, s->sub_vp_lines_l);
3096 #endif
3097 p->SubViewportLinesNeededInMALL[k] = s->sub_vp_lines_l;
3098
3099 if (p->BytePerPixelDETC[k] > 0) {
3100 s->src_y_pstate_c = (dml_uint_t)(dml_ceil(s->dst_y_pstate * p->VRatioChroma[k], p->SwathHeightC[k]));
3101 s->src_y_ahead_c = (dml_uint_t)(dml_floor(p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k], p->SwathHeightC[k]) + s->LBLatencyHidingSourceLinesC[k]);
3102 s->sub_vp_lines_c = s->src_y_pstate_c + s->src_y_ahead_c + p->meta_row_height_chroma[k];
3103 p->SubViewportLinesNeededInMALL[k] = (dml_uint_t)(dml_max(s->sub_vp_lines_l, s->sub_vp_lines_c));
3104
3105 #ifdef __DML_VBA_DEBUG__
3106 dml_print("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c);
3107 dml_print("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c);
3108 dml_print("DML::%s: k=%u, meta_row_height_chroma = %u\n", __func__, k, p->meta_row_height_chroma[k]);
3109 dml_print("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c);
3110 #endif
3111 }
3112 }
3113
3114 #ifdef __DML_VBA_DEBUG__
3115 dml_print("DML::%s: DRAMClockChangeSupport = %u\n", __func__, *p->DRAMClockChangeSupport);
3116 dml_print("DML::%s: FCLKChangeSupport = %u\n", __func__, *p->FCLKChangeSupport);
3117 dml_print("DML::%s: MaxActiveFCLKChangeLatencySupported = %f\n", __func__, *p->MaxActiveFCLKChangeLatencySupported);
3118 dml_print("DML::%s: USRRetrainingSupport = %u\n", __func__, *p->USRRetrainingSupport);
3119 #endif
3120 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
3121
CalculateDCFCLKDeepSleep(dml_uint_t NumberOfActiveSurfaces,dml_uint_t BytePerPixelY[],dml_uint_t BytePerPixelC[],dml_float_t VRatio[],dml_float_t VRatioChroma[],dml_uint_t SwathWidthY[],dml_uint_t SwathWidthC[],dml_uint_t DPPPerSurface[],dml_float_t HRatio[],dml_float_t HRatioChroma[],dml_float_t PixelClock[],dml_float_t PSCL_THROUGHPUT[],dml_float_t PSCL_THROUGHPUT_CHROMA[],dml_float_t Dppclk[],dml_float_t ReadBandwidthLuma[],dml_float_t ReadBandwidthChroma[],dml_uint_t ReturnBusWidth,dml_float_t * DCFClkDeepSleep)3122 static void CalculateDCFCLKDeepSleep(
3123 dml_uint_t NumberOfActiveSurfaces,
3124 dml_uint_t BytePerPixelY[],
3125 dml_uint_t BytePerPixelC[],
3126 dml_float_t VRatio[],
3127 dml_float_t VRatioChroma[],
3128 dml_uint_t SwathWidthY[],
3129 dml_uint_t SwathWidthC[],
3130 dml_uint_t DPPPerSurface[],
3131 dml_float_t HRatio[],
3132 dml_float_t HRatioChroma[],
3133 dml_float_t PixelClock[],
3134 dml_float_t PSCL_THROUGHPUT[],
3135 dml_float_t PSCL_THROUGHPUT_CHROMA[],
3136 dml_float_t Dppclk[],
3137 dml_float_t ReadBandwidthLuma[],
3138 dml_float_t ReadBandwidthChroma[],
3139 dml_uint_t ReturnBusWidth,
3140
3141 // Output
3142 dml_float_t *DCFClkDeepSleep)
3143 {
3144 dml_float_t DisplayPipeLineDeliveryTimeLuma;
3145 dml_float_t DisplayPipeLineDeliveryTimeChroma;
3146 dml_float_t DCFClkDeepSleepPerSurface[__DML_NUM_PLANES__];
3147 dml_float_t ReadBandwidth = 0.0;
3148
3149 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3150
3151 if (VRatio[k] <= 1) {
3152 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
3153 } else {
3154 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
3155 }
3156 if (BytePerPixelC[k] == 0) {
3157 DisplayPipeLineDeliveryTimeChroma = 0;
3158 } else {
3159 if (VRatioChroma[k] <= 1) {
3160 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
3161 } else {
3162 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
3163 }
3164 }
3165
3166 if (BytePerPixelC[k] > 0) {
3167 DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
3168 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
3169 } else {
3170 DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
3171 }
3172 DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
3173
3174 #ifdef __DML_VBA_DEBUG__
3175 dml_print("DML::%s: k=%u, PixelClock = %f\n", __func__, k, PixelClock[k]);
3176 dml_print("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
3177 #endif
3178 }
3179
3180 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3181 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
3182 }
3183
3184 *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (dml_float_t) ReturnBusWidth);
3185
3186 #ifdef __DML_VBA_DEBUG__
3187 dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
3188 dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
3189 dml_print("DML::%s: ReturnBusWidth = %u\n", __func__, ReturnBusWidth);
3190 dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
3191 #endif
3192
3193 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3194 *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
3195 }
3196 dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
3197 } // CalculateDCFCLKDeepSleep
3198
CalculateUrgentBurstFactor(enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,dml_uint_t swath_width_luma_ub,dml_uint_t swath_width_chroma_ub,dml_uint_t SwathHeightY,dml_uint_t SwathHeightC,dml_float_t LineTime,dml_float_t UrgentLatency,dml_float_t CursorBufferSize,dml_uint_t CursorWidth,dml_uint_t CursorBPP,dml_float_t VRatio,dml_float_t VRatioC,dml_float_t BytePerPixelInDETY,dml_float_t BytePerPixelInDETC,dml_uint_t DETBufferSizeY,dml_uint_t DETBufferSizeC,dml_float_t * UrgentBurstFactorCursor,dml_float_t * UrgentBurstFactorLuma,dml_float_t * UrgentBurstFactorChroma,dml_bool_t * NotEnoughUrgentLatencyHiding)3199 static void CalculateUrgentBurstFactor(
3200 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,
3201 dml_uint_t swath_width_luma_ub,
3202 dml_uint_t swath_width_chroma_ub,
3203 dml_uint_t SwathHeightY,
3204 dml_uint_t SwathHeightC,
3205 dml_float_t LineTime,
3206 dml_float_t UrgentLatency,
3207 dml_float_t CursorBufferSize,
3208 dml_uint_t CursorWidth,
3209 dml_uint_t CursorBPP,
3210 dml_float_t VRatio,
3211 dml_float_t VRatioC,
3212 dml_float_t BytePerPixelInDETY,
3213 dml_float_t BytePerPixelInDETC,
3214 dml_uint_t DETBufferSizeY,
3215 dml_uint_t DETBufferSizeC,
3216 // Output
3217 dml_float_t *UrgentBurstFactorCursor,
3218 dml_float_t *UrgentBurstFactorLuma,
3219 dml_float_t *UrgentBurstFactorChroma,
3220 dml_bool_t *NotEnoughUrgentLatencyHiding)
3221 {
3222 dml_float_t LinesInDETLuma;
3223 dml_float_t LinesInDETChroma;
3224 dml_uint_t LinesInCursorBuffer;
3225 dml_float_t CursorBufferSizeInTime;
3226 dml_float_t DETBufferSizeInTimeLuma;
3227 dml_float_t DETBufferSizeInTimeChroma;
3228
3229 *NotEnoughUrgentLatencyHiding = 0;
3230
3231 if (CursorWidth > 0) {
3232 LinesInCursorBuffer = 1 << (dml_uint_t) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
3233 if (VRatio > 0) {
3234 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
3235 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
3236 *NotEnoughUrgentLatencyHiding = 1;
3237 *UrgentBurstFactorCursor = 0;
3238 } else {
3239 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
3240 }
3241 } else {
3242 *UrgentBurstFactorCursor = 1;
3243 }
3244 }
3245
3246 LinesInDETLuma = (UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe ? 1024*1024 : DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
3247
3248 if (VRatio > 0) {
3249 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
3250 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
3251 *NotEnoughUrgentLatencyHiding = 1;
3252 *UrgentBurstFactorLuma = 0;
3253 } else {
3254 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
3255 }
3256 } else {
3257 *UrgentBurstFactorLuma = 1;
3258 }
3259
3260 if (BytePerPixelInDETC > 0) {
3261 LinesInDETChroma = (UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe ? 1024*1024 : DETBufferSizeC) / BytePerPixelInDETC / swath_width_chroma_ub;
3262
3263 if (VRatioC > 0) {
3264 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatioC;
3265 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
3266 *NotEnoughUrgentLatencyHiding = 1;
3267 *UrgentBurstFactorChroma = 0;
3268 } else {
3269 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
3270 }
3271 } else {
3272 *UrgentBurstFactorChroma = 1;
3273 }
3274 }
3275 } // CalculateUrgentBurstFactor
3276
CalculatePixelDeliveryTimes(dml_uint_t NumberOfActiveSurfaces,dml_float_t VRatio[],dml_float_t VRatioChroma[],dml_float_t VRatioPrefetchY[],dml_float_t VRatioPrefetchC[],dml_uint_t swath_width_luma_ub[],dml_uint_t swath_width_chroma_ub[],dml_uint_t DPPPerSurface[],dml_float_t HRatio[],dml_float_t HRatioChroma[],dml_float_t PixelClock[],dml_float_t PSCL_THROUGHPUT[],dml_float_t PSCL_THROUGHPUT_CHROMA[],dml_float_t Dppclk[],dml_uint_t BytePerPixelC[],enum dml_rotation_angle SourceScan[],dml_uint_t NumberOfCursors[],dml_uint_t CursorWidth[],dml_uint_t CursorBPP[],dml_uint_t BlockWidth256BytesY[],dml_uint_t BlockHeight256BytesY[],dml_uint_t BlockWidth256BytesC[],dml_uint_t BlockHeight256BytesC[],dml_float_t DisplayPipeLineDeliveryTimeLuma[],dml_float_t DisplayPipeLineDeliveryTimeChroma[],dml_float_t DisplayPipeLineDeliveryTimeLumaPrefetch[],dml_float_t DisplayPipeLineDeliveryTimeChromaPrefetch[],dml_float_t DisplayPipeRequestDeliveryTimeLuma[],dml_float_t DisplayPipeRequestDeliveryTimeChroma[],dml_float_t DisplayPipeRequestDeliveryTimeLumaPrefetch[],dml_float_t DisplayPipeRequestDeliveryTimeChromaPrefetch[],dml_float_t CursorRequestDeliveryTime[],dml_float_t CursorRequestDeliveryTimePrefetch[])3277 static void CalculatePixelDeliveryTimes(
3278 dml_uint_t NumberOfActiveSurfaces,
3279 dml_float_t VRatio[],
3280 dml_float_t VRatioChroma[],
3281 dml_float_t VRatioPrefetchY[],
3282 dml_float_t VRatioPrefetchC[],
3283 dml_uint_t swath_width_luma_ub[],
3284 dml_uint_t swath_width_chroma_ub[],
3285 dml_uint_t DPPPerSurface[],
3286 dml_float_t HRatio[],
3287 dml_float_t HRatioChroma[],
3288 dml_float_t PixelClock[],
3289 dml_float_t PSCL_THROUGHPUT[],
3290 dml_float_t PSCL_THROUGHPUT_CHROMA[],
3291 dml_float_t Dppclk[],
3292 dml_uint_t BytePerPixelC[],
3293 enum dml_rotation_angle SourceScan[],
3294 dml_uint_t NumberOfCursors[],
3295 dml_uint_t CursorWidth[],
3296 dml_uint_t CursorBPP[],
3297 dml_uint_t BlockWidth256BytesY[],
3298 dml_uint_t BlockHeight256BytesY[],
3299 dml_uint_t BlockWidth256BytesC[],
3300 dml_uint_t BlockHeight256BytesC[],
3301
3302 // Output
3303 dml_float_t DisplayPipeLineDeliveryTimeLuma[],
3304 dml_float_t DisplayPipeLineDeliveryTimeChroma[],
3305 dml_float_t DisplayPipeLineDeliveryTimeLumaPrefetch[],
3306 dml_float_t DisplayPipeLineDeliveryTimeChromaPrefetch[],
3307 dml_float_t DisplayPipeRequestDeliveryTimeLuma[],
3308 dml_float_t DisplayPipeRequestDeliveryTimeChroma[],
3309 dml_float_t DisplayPipeRequestDeliveryTimeLumaPrefetch[],
3310 dml_float_t DisplayPipeRequestDeliveryTimeChromaPrefetch[],
3311 dml_float_t CursorRequestDeliveryTime[],
3312 dml_float_t CursorRequestDeliveryTimePrefetch[])
3313 {
3314 dml_float_t req_per_swath_ub;
3315
3316 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3317
3318 #ifdef __DML_VBA_DEBUG__
3319 dml_print("DML::%s: k=%u : HRatio = %f\n", __func__, k, HRatio[k]);
3320 dml_print("DML::%s: k=%u : VRatio = %f\n", __func__, k, VRatio[k]);
3321 dml_print("DML::%s: k=%u : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
3322 dml_print("DML::%s: k=%u : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
3323 dml_print("DML::%s: k=%u : swath_width_luma_ub = %u\n", __func__, k, swath_width_luma_ub[k]);
3324 dml_print("DML::%s: k=%u : swath_width_chroma_ub = %u\n", __func__, k, swath_width_chroma_ub[k]);
3325 dml_print("DML::%s: k=%u : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
3326 dml_print("DML::%s: k=%u : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
3327 dml_print("DML::%s: k=%u : DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]);
3328 dml_print("DML::%s: k=%u : PixelClock = %f\n", __func__, k, PixelClock[k]);
3329 dml_print("DML::%s: k=%u : Dppclk = %f\n", __func__, k, Dppclk[k]);
3330 #endif
3331
3332 if (VRatio[k] <= 1) {
3333 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
3334 } else {
3335 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
3336 }
3337
3338 if (BytePerPixelC[k] == 0) {
3339 DisplayPipeLineDeliveryTimeChroma[k] = 0;
3340 } else {
3341 if (VRatioChroma[k] <= 1) {
3342 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
3343 } else {
3344 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
3345 }
3346 }
3347
3348 if (VRatioPrefetchY[k] <= 1) {
3349 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
3350 } else {
3351 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
3352 }
3353
3354 if (BytePerPixelC[k] == 0) {
3355 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
3356 } else {
3357 if (VRatioPrefetchC[k] <= 1) {
3358 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
3359 } else {
3360 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
3361 }
3362 }
3363 #ifdef __DML_VBA_DEBUG__
3364 dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
3365 dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
3366 dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
3367 dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
3368 #endif
3369 }
3370
3371 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3372 if (!dml_is_vertical_rotation(SourceScan[k])) {
3373 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
3374 } else {
3375 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
3376 }
3377 #ifdef __DML_VBA_DEBUG__
3378 dml_print("DML::%s: k=%u : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
3379 #endif
3380
3381 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
3382 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
3383 if (BytePerPixelC[k] == 0) {
3384 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
3385 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
3386 } else {
3387 if (!dml_is_vertical_rotation(SourceScan[k])) {
3388 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
3389 } else {
3390 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
3391 }
3392 #ifdef __DML_VBA_DEBUG__
3393 dml_print("DML::%s: k=%u : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
3394 #endif
3395 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
3396 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
3397 }
3398 #ifdef __DML_VBA_DEBUG__
3399 dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
3400 dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
3401 dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
3402 dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
3403 #endif
3404 }
3405
3406 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3407 dml_uint_t cursor_req_per_width;
3408 cursor_req_per_width = (dml_uint_t)(dml_ceil((dml_float_t) CursorWidth[k] * (dml_float_t) CursorBPP[k] / 256.0 / 8.0, 1.0));
3409 if (NumberOfCursors[k] > 0) {
3410 if (VRatio[k] <= 1) {
3411 CursorRequestDeliveryTime[k] = (dml_float_t) CursorWidth[k] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
3412 } else {
3413 CursorRequestDeliveryTime[k] = (dml_float_t) CursorWidth[k] / PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
3414 }
3415 if (VRatioPrefetchY[k] <= 1) {
3416 CursorRequestDeliveryTimePrefetch[k] = (dml_float_t) CursorWidth[k] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
3417 } else {
3418 CursorRequestDeliveryTimePrefetch[k] = (dml_float_t) CursorWidth[k] / PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
3419 }
3420 } else {
3421 CursorRequestDeliveryTime[k] = 0;
3422 CursorRequestDeliveryTimePrefetch[k] = 0;
3423 }
3424 #ifdef __DML_VBA_DEBUG__
3425 dml_print("DML::%s: k=%u : NumberOfCursors = %u\n", __func__, k, NumberOfCursors[k]);
3426 dml_print("DML::%s: k=%u : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
3427 dml_print("DML::%s: k=%u : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
3428 #endif
3429 }
3430 } // CalculatePixelDeliveryTimes
3431
CalculateMetaAndPTETimes(dml_bool_t use_one_row_for_frame[],dml_uint_t NumberOfActiveSurfaces,dml_bool_t GPUVMEnable,dml_uint_t MetaChunkSize,dml_uint_t MinMetaChunkSizeBytes,dml_uint_t HTotal[],dml_float_t VRatio[],dml_float_t VRatioChroma[],dml_float_t DestinationLinesToRequestRowInVBlank[],dml_float_t DestinationLinesToRequestRowInImmediateFlip[],dml_bool_t DCCEnable[],dml_float_t PixelClock[],dml_uint_t BytePerPixelY[],dml_uint_t BytePerPixelC[],enum dml_rotation_angle SourceScan[],dml_uint_t dpte_row_height[],dml_uint_t dpte_row_height_chroma[],dml_uint_t meta_row_width[],dml_uint_t meta_row_width_chroma[],dml_uint_t meta_row_height[],dml_uint_t meta_row_height_chroma[],dml_uint_t meta_req_width[],dml_uint_t meta_req_width_chroma[],dml_uint_t meta_req_height[],dml_uint_t meta_req_height_chroma[],dml_uint_t dpte_group_bytes[],dml_uint_t PTERequestSizeY[],dml_uint_t PTERequestSizeC[],dml_uint_t PixelPTEReqWidthY[],dml_uint_t PixelPTEReqHeightY[],dml_uint_t PixelPTEReqWidthC[],dml_uint_t PixelPTEReqHeightC[],dml_uint_t dpte_row_width_luma_ub[],dml_uint_t dpte_row_width_chroma_ub[],dml_float_t DST_Y_PER_PTE_ROW_NOM_L[],dml_float_t DST_Y_PER_PTE_ROW_NOM_C[],dml_float_t DST_Y_PER_META_ROW_NOM_L[],dml_float_t DST_Y_PER_META_ROW_NOM_C[],dml_float_t TimePerMetaChunkNominal[],dml_float_t TimePerChromaMetaChunkNominal[],dml_float_t TimePerMetaChunkVBlank[],dml_float_t TimePerChromaMetaChunkVBlank[],dml_float_t TimePerMetaChunkFlip[],dml_float_t TimePerChromaMetaChunkFlip[],dml_float_t time_per_pte_group_nom_luma[],dml_float_t time_per_pte_group_vblank_luma[],dml_float_t time_per_pte_group_flip_luma[],dml_float_t time_per_pte_group_nom_chroma[],dml_float_t time_per_pte_group_vblank_chroma[],dml_float_t time_per_pte_group_flip_chroma[])3432 static void CalculateMetaAndPTETimes(
3433 dml_bool_t use_one_row_for_frame[],
3434 dml_uint_t NumberOfActiveSurfaces,
3435 dml_bool_t GPUVMEnable,
3436 dml_uint_t MetaChunkSize,
3437 dml_uint_t MinMetaChunkSizeBytes,
3438 dml_uint_t HTotal[],
3439 dml_float_t VRatio[],
3440 dml_float_t VRatioChroma[],
3441 dml_float_t DestinationLinesToRequestRowInVBlank[],
3442 dml_float_t DestinationLinesToRequestRowInImmediateFlip[],
3443 dml_bool_t DCCEnable[],
3444 dml_float_t PixelClock[],
3445 dml_uint_t BytePerPixelY[],
3446 dml_uint_t BytePerPixelC[],
3447 enum dml_rotation_angle SourceScan[],
3448 dml_uint_t dpte_row_height[],
3449 dml_uint_t dpte_row_height_chroma[],
3450 dml_uint_t meta_row_width[],
3451 dml_uint_t meta_row_width_chroma[],
3452 dml_uint_t meta_row_height[],
3453 dml_uint_t meta_row_height_chroma[],
3454 dml_uint_t meta_req_width[],
3455 dml_uint_t meta_req_width_chroma[],
3456 dml_uint_t meta_req_height[],
3457 dml_uint_t meta_req_height_chroma[],
3458 dml_uint_t dpte_group_bytes[],
3459 dml_uint_t PTERequestSizeY[],
3460 dml_uint_t PTERequestSizeC[],
3461 dml_uint_t PixelPTEReqWidthY[],
3462 dml_uint_t PixelPTEReqHeightY[],
3463 dml_uint_t PixelPTEReqWidthC[],
3464 dml_uint_t PixelPTEReqHeightC[],
3465 dml_uint_t dpte_row_width_luma_ub[],
3466 dml_uint_t dpte_row_width_chroma_ub[],
3467
3468 // Output
3469 dml_float_t DST_Y_PER_PTE_ROW_NOM_L[],
3470 dml_float_t DST_Y_PER_PTE_ROW_NOM_C[],
3471 dml_float_t DST_Y_PER_META_ROW_NOM_L[],
3472 dml_float_t DST_Y_PER_META_ROW_NOM_C[],
3473 dml_float_t TimePerMetaChunkNominal[],
3474 dml_float_t TimePerChromaMetaChunkNominal[],
3475 dml_float_t TimePerMetaChunkVBlank[],
3476 dml_float_t TimePerChromaMetaChunkVBlank[],
3477 dml_float_t TimePerMetaChunkFlip[],
3478 dml_float_t TimePerChromaMetaChunkFlip[],
3479 dml_float_t time_per_pte_group_nom_luma[],
3480 dml_float_t time_per_pte_group_vblank_luma[],
3481 dml_float_t time_per_pte_group_flip_luma[],
3482 dml_float_t time_per_pte_group_nom_chroma[],
3483 dml_float_t time_per_pte_group_vblank_chroma[],
3484 dml_float_t time_per_pte_group_flip_chroma[])
3485 {
3486 dml_uint_t meta_chunk_width;
3487 dml_uint_t min_meta_chunk_width;
3488 dml_uint_t meta_chunk_per_row_int;
3489 dml_uint_t meta_row_remainder;
3490 dml_uint_t meta_chunk_threshold;
3491 dml_uint_t meta_chunks_per_row_ub;
3492 dml_uint_t meta_chunk_width_chroma;
3493 dml_uint_t min_meta_chunk_width_chroma;
3494 dml_uint_t meta_chunk_per_row_int_chroma;
3495 dml_uint_t meta_row_remainder_chroma;
3496 dml_uint_t meta_chunk_threshold_chroma;
3497 dml_uint_t meta_chunks_per_row_ub_chroma;
3498 dml_uint_t dpte_group_width_luma;
3499 dml_uint_t dpte_groups_per_row_luma_ub;
3500 dml_uint_t dpte_group_width_chroma;
3501 dml_uint_t dpte_groups_per_row_chroma_ub;
3502
3503 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3504 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
3505 if (BytePerPixelC[k] == 0) {
3506 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
3507 } else {
3508 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
3509 }
3510 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
3511 if (BytePerPixelC[k] == 0) {
3512 DST_Y_PER_META_ROW_NOM_C[k] = 0;
3513 } else {
3514 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
3515 }
3516 }
3517
3518 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3519 if (DCCEnable[k] == true) {
3520 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
3521 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
3522 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
3523 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
3524 if (!dml_is_vertical_rotation(SourceScan[k])) {
3525 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
3526 } else {
3527 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
3528 }
3529 if (meta_row_remainder <= meta_chunk_threshold) {
3530 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
3531 } else {
3532 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
3533 }
3534 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
3535 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
3536 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
3537 if (BytePerPixelC[k] == 0) {
3538 TimePerChromaMetaChunkNominal[k] = 0;
3539 TimePerChromaMetaChunkVBlank[k] = 0;
3540 TimePerChromaMetaChunkFlip[k] = 0;
3541 } else {
3542 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
3543 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
3544 meta_chunk_per_row_int_chroma = (dml_uint_t)((dml_float_t) meta_row_width_chroma[k] / meta_chunk_width_chroma);
3545 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
3546 if (!dml_is_vertical_rotation(SourceScan[k])) {
3547 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
3548 } else {
3549 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
3550 }
3551 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
3552 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
3553 } else {
3554 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
3555 }
3556 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
3557 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
3558 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
3559 }
3560 } else {
3561 TimePerMetaChunkNominal[k] = 0;
3562 TimePerMetaChunkVBlank[k] = 0;
3563 TimePerMetaChunkFlip[k] = 0;
3564 TimePerChromaMetaChunkNominal[k] = 0;
3565 TimePerChromaMetaChunkVBlank[k] = 0;
3566 TimePerChromaMetaChunkFlip[k] = 0;
3567 }
3568 }
3569
3570 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3571 if (GPUVMEnable == true) {
3572 if (!dml_is_vertical_rotation(SourceScan[k])) {
3573 dpte_group_width_luma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeY[k] * PixelPTEReqWidthY[k]);
3574 } else {
3575 dpte_group_width_luma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeY[k] * PixelPTEReqHeightY[k]);
3576 }
3577
3578 if (use_one_row_for_frame[k]) {
3579 dpte_groups_per_row_luma_ub = (dml_uint_t)(dml_ceil((dml_float_t) dpte_row_width_luma_ub[k] / (dml_float_t) dpte_group_width_luma / 2.0, 1.0));
3580 } else {
3581 dpte_groups_per_row_luma_ub = (dml_uint_t)(dml_ceil((dml_float_t) dpte_row_width_luma_ub[k] / (dml_float_t) dpte_group_width_luma, 1.0));
3582 }
3583
3584 dml_print("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, use_one_row_for_frame[k]);
3585 dml_print("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, dpte_group_bytes[k]);
3586 dml_print("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, PTERequestSizeY[k]);
3587 dml_print("DML::%s: k=%u, PixelPTEReqWidthY = %u\n", __func__, k, PixelPTEReqWidthY[k]);
3588 dml_print("DML::%s: k=%u, PixelPTEReqHeightY = %u\n", __func__, k, PixelPTEReqHeightY[k]);
3589 dml_print("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, dpte_row_width_luma_ub[k]);
3590 dml_print("DML::%s: k=%u, dpte_group_width_luma = %u\n", __func__, k, dpte_group_width_luma);
3591 dml_print("DML::%s: k=%u, dpte_groups_per_row_luma_ub = %u\n", __func__, k, dpte_groups_per_row_luma_ub);
3592
3593 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
3594 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
3595 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
3596 if (BytePerPixelC[k] == 0) {
3597 time_per_pte_group_nom_chroma[k] = 0;
3598 time_per_pte_group_vblank_chroma[k] = 0;
3599 time_per_pte_group_flip_chroma[k] = 0;
3600 } else {
3601 if (!dml_is_vertical_rotation(SourceScan[k])) {
3602 dpte_group_width_chroma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeC[k] * PixelPTEReqWidthC[k]);
3603 } else {
3604 dpte_group_width_chroma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeC[k] * PixelPTEReqHeightC[k]);
3605 }
3606
3607 if (use_one_row_for_frame[k]) {
3608 dpte_groups_per_row_chroma_ub = (dml_uint_t)(dml_ceil((dml_float_t) dpte_row_width_chroma_ub[k] / (dml_float_t) dpte_group_width_chroma / 2.0, 1.0));
3609 } else {
3610 dpte_groups_per_row_chroma_ub = (dml_uint_t)(dml_ceil((dml_float_t) dpte_row_width_chroma_ub[k] / (dml_float_t) dpte_group_width_chroma, 1.0));
3611 }
3612 dml_print("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, dpte_row_width_chroma_ub[k]);
3613 dml_print("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma);
3614 dml_print("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub);
3615
3616 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
3617 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
3618 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
3619 }
3620 } else {
3621 time_per_pte_group_nom_luma[k] = 0;
3622 time_per_pte_group_vblank_luma[k] = 0;
3623 time_per_pte_group_flip_luma[k] = 0;
3624 time_per_pte_group_nom_chroma[k] = 0;
3625 time_per_pte_group_vblank_chroma[k] = 0;
3626 time_per_pte_group_flip_chroma[k] = 0;
3627 }
3628 #ifdef __DML_VBA_DEBUG__
3629 dml_print("DML::%s: k=%u, DestinationLinesToRequestRowInVBlank = %f\n", __func__, k, DestinationLinesToRequestRowInVBlank[k]);
3630 dml_print("DML::%s: k=%u, DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
3631
3632 dml_print("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_L = %f\n", __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
3633 dml_print("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_C = %f\n", __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
3634 dml_print("DML::%s: k=%u, DST_Y_PER_META_ROW_NOM_L = %f\n", __func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
3635 dml_print("DML::%s: k=%u, DST_Y_PER_META_ROW_NOM_C = %f\n", __func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
3636 dml_print("DML::%s: k=%u, TimePerMetaChunkNominal = %f\n", __func__, k, TimePerMetaChunkNominal[k]);
3637 dml_print("DML::%s: k=%u, TimePerMetaChunkVBlank = %f\n", __func__, k, TimePerMetaChunkVBlank[k]);
3638 dml_print("DML::%s: k=%u, TimePerMetaChunkFlip = %f\n", __func__, k, TimePerMetaChunkFlip[k]);
3639 dml_print("DML::%s: k=%u, TimePerChromaMetaChunkNominal = %f\n", __func__, k, TimePerChromaMetaChunkNominal[k]);
3640 dml_print("DML::%s: k=%u, TimePerChromaMetaChunkVBlank = %f\n", __func__, k, TimePerChromaMetaChunkVBlank[k]);
3641 dml_print("DML::%s: k=%u, TimePerChromaMetaChunkFlip = %f\n", __func__, k, TimePerChromaMetaChunkFlip[k]);
3642 dml_print("DML::%s: k=%u, time_per_pte_group_nom_luma = %f\n", __func__, k, time_per_pte_group_nom_luma[k]);
3643 dml_print("DML::%s: k=%u, time_per_pte_group_vblank_luma = %f\n", __func__, k, time_per_pte_group_vblank_luma[k]);
3644 dml_print("DML::%s: k=%u, time_per_pte_group_flip_luma = %f\n", __func__, k, time_per_pte_group_flip_luma[k]);
3645 dml_print("DML::%s: k=%u, time_per_pte_group_nom_chroma = %f\n", __func__, k, time_per_pte_group_nom_chroma[k]);
3646 dml_print("DML::%s: k=%u, time_per_pte_group_vblank_chroma = %f\n", __func__, k, time_per_pte_group_vblank_chroma[k]);
3647 dml_print("DML::%s: k=%u, time_per_pte_group_flip_chroma = %f\n", __func__, k, time_per_pte_group_flip_chroma[k]);
3648 #endif
3649 }
3650 } // CalculateMetaAndPTETimes
3651
CalculateVMGroupAndRequestTimes(dml_uint_t NumberOfActiveSurfaces,dml_bool_t GPUVMEnable,dml_uint_t GPUVMMaxPageTableLevels,dml_uint_t HTotal[],dml_uint_t BytePerPixelC[],dml_float_t DestinationLinesToRequestVMInVBlank[],dml_float_t DestinationLinesToRequestVMInImmediateFlip[],dml_bool_t DCCEnable[],dml_float_t PixelClock[],dml_uint_t dpte_row_width_luma_ub[],dml_uint_t dpte_row_width_chroma_ub[],dml_uint_t vm_group_bytes[],dml_uint_t dpde0_bytes_per_frame_ub_l[],dml_uint_t dpde0_bytes_per_frame_ub_c[],dml_uint_t meta_pte_bytes_per_frame_ub_l[],dml_uint_t meta_pte_bytes_per_frame_ub_c[],dml_float_t TimePerVMGroupVBlank[],dml_float_t TimePerVMGroupFlip[],dml_float_t TimePerVMRequestVBlank[],dml_float_t TimePerVMRequestFlip[])3652 static void CalculateVMGroupAndRequestTimes(
3653 dml_uint_t NumberOfActiveSurfaces,
3654 dml_bool_t GPUVMEnable,
3655 dml_uint_t GPUVMMaxPageTableLevels,
3656 dml_uint_t HTotal[],
3657 dml_uint_t BytePerPixelC[],
3658 dml_float_t DestinationLinesToRequestVMInVBlank[],
3659 dml_float_t DestinationLinesToRequestVMInImmediateFlip[],
3660 dml_bool_t DCCEnable[],
3661 dml_float_t PixelClock[],
3662 dml_uint_t dpte_row_width_luma_ub[],
3663 dml_uint_t dpte_row_width_chroma_ub[],
3664 dml_uint_t vm_group_bytes[],
3665 dml_uint_t dpde0_bytes_per_frame_ub_l[],
3666 dml_uint_t dpde0_bytes_per_frame_ub_c[],
3667 dml_uint_t meta_pte_bytes_per_frame_ub_l[],
3668 dml_uint_t meta_pte_bytes_per_frame_ub_c[],
3669
3670 // Output
3671 dml_float_t TimePerVMGroupVBlank[],
3672 dml_float_t TimePerVMGroupFlip[],
3673 dml_float_t TimePerVMRequestVBlank[],
3674 dml_float_t TimePerVMRequestFlip[])
3675 {
3676 (void)dpte_row_width_luma_ub;
3677 (void)dpte_row_width_chroma_ub;
3678 dml_uint_t num_group_per_lower_vm_stage;
3679 dml_uint_t num_req_per_lower_vm_stage;
3680
3681 #ifdef __DML_VBA_DEBUG__
3682 dml_print("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
3683 dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
3684 #endif
3685 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3686
3687 #ifdef __DML_VBA_DEBUG__
3688 dml_print("DML::%s: k=%u, DCCEnable = %u\n", __func__, k, DCCEnable[k]);
3689 dml_print("DML::%s: k=%u, vm_group_bytes = %u\n", __func__, k, vm_group_bytes[k]);
3690 dml_print("DML::%s: k=%u, dpde0_bytes_per_frame_ub_l = %u\n", __func__, k, dpde0_bytes_per_frame_ub_l[k]);
3691 dml_print("DML::%s: k=%u, dpde0_bytes_per_frame_ub_c = %u\n", __func__, k, dpde0_bytes_per_frame_ub_c[k]);
3692 dml_print("DML::%s: k=%u, meta_pte_bytes_per_frame_ub_l = %u\n", __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
3693 dml_print("DML::%s: k=%u, meta_pte_bytes_per_frame_ub_c = %u\n", __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
3694 #endif
3695
3696 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
3697 if (DCCEnable[k] == false) {
3698 if (BytePerPixelC[k] > 0) {
3699 num_group_per_lower_vm_stage = (dml_uint_t) (dml_ceil((dml_float_t) dpde0_bytes_per_frame_ub_l[k] / (dml_float_t) vm_group_bytes[k], 1.0) +
3700 dml_ceil((dml_float_t) dpde0_bytes_per_frame_ub_c[k] / (dml_float_t) vm_group_bytes[k], 1.0));
3701 } else {
3702 num_group_per_lower_vm_stage = (dml_uint_t) (dml_ceil((dml_float_t) dpde0_bytes_per_frame_ub_l[k] / (dml_float_t) vm_group_bytes[k], 1.0));
3703 }
3704 } else {
3705 if (GPUVMMaxPageTableLevels == 1) {
3706 if (BytePerPixelC[k] > 0) {
3707 num_group_per_lower_vm_stage = (dml_uint_t)(dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1.0) +
3708 dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_c[k]) / (dml_float_t) (vm_group_bytes[k]), 1.0));
3709 } else {
3710 num_group_per_lower_vm_stage = (dml_uint_t)(dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1.0));
3711 }
3712 } else {
3713 if (BytePerPixelC[k] > 0) {
3714 num_group_per_lower_vm_stage = (dml_uint_t)(2.0 + dml_ceil((dml_float_t) (dpde0_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1) +
3715 dml_ceil((dml_float_t) (dpde0_bytes_per_frame_ub_c[k]) / (dml_float_t) (vm_group_bytes[k]), 1) +
3716 dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1) +
3717 dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_c[k]) / (dml_float_t) (vm_group_bytes[k]), 1));
3718 } else {
3719 num_group_per_lower_vm_stage = (dml_uint_t)(1.0 + dml_ceil((dml_float_t) (dpde0_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1) +
3720 dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1));
3721 }
3722 }
3723 }
3724
3725 if (DCCEnable[k] == false) {
3726 if (BytePerPixelC[k] > 0) {
3727 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
3728 } else {
3729 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
3730 }
3731 } else {
3732 if (GPUVMMaxPageTableLevels == 1) {
3733 if (BytePerPixelC[k] > 0) {
3734 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
3735 } else {
3736 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
3737 }
3738 } else {
3739 if (BytePerPixelC[k] > 0) {
3740 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
3741 } else {
3742 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
3743 }
3744 }
3745 }
3746
3747 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
3748 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
3749 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
3750 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
3751
3752 if (GPUVMMaxPageTableLevels > 2) {
3753 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
3754 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
3755 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
3756 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
3757 }
3758
3759 } else {
3760 TimePerVMGroupVBlank[k] = 0;
3761 TimePerVMGroupFlip[k] = 0;
3762 TimePerVMRequestVBlank[k] = 0;
3763 TimePerVMRequestFlip[k] = 0;
3764 }
3765
3766 #ifdef __DML_VBA_DEBUG__
3767 dml_print("DML::%s: k=%u, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
3768 dml_print("DML::%s: k=%u, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
3769 dml_print("DML::%s: k=%u, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
3770 dml_print("DML::%s: k=%u, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
3771 #endif
3772 }
3773 } // CalculateVMGroupAndRequestTimes
3774
CalculateStutterEfficiency(struct display_mode_lib_scratch_st * scratch,struct CalculateStutterEfficiency_params_st * p)3775 static void CalculateStutterEfficiency(struct display_mode_lib_scratch_st *scratch,
3776 struct CalculateStutterEfficiency_params_st *p)
3777 {
3778 (void)scratch;
3779 dml_float_t DETBufferingTimeY = 0;
3780 dml_float_t SwathWidthYCriticalSurface = 0;
3781 dml_float_t SwathHeightYCriticalSurface = 0;
3782 dml_float_t VActiveTimeCriticalSurface = 0;
3783 dml_float_t FrameTimeCriticalSurface = 0;
3784 dml_uint_t BytePerPixelYCriticalSurface = 0;
3785 dml_float_t LinesToFinishSwathTransferStutterCriticalSurface = 0;
3786 dml_uint_t DETBufferSizeYCriticalSurface = 0;
3787 dml_float_t MinTTUVBlankCriticalSurface = 0;
3788 dml_uint_t BlockWidth256BytesYCriticalSurface = 0;
3789 dml_bool_t SinglePlaneCriticalSurface = 0;
3790 dml_bool_t SinglePipeCriticalSurface = 0;
3791 dml_float_t TotalCompressedReadBandwidth = 0;
3792 dml_float_t TotalRowReadBandwidth = 0;
3793 dml_float_t AverageDCCCompressionRate = 0;
3794 dml_float_t EffectiveCompressedBufferSize = 0;
3795 dml_float_t PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = 0;
3796 dml_float_t StutterBurstTime = 0;
3797 dml_uint_t TotalActiveWriteback = 0;
3798 dml_float_t LinesInDETY = 0;
3799 dml_float_t LinesInDETYRoundedDownToSwath = 0;
3800 dml_float_t MaximumEffectiveCompressionLuma = 0;
3801 dml_float_t MaximumEffectiveCompressionChroma = 0;
3802 dml_float_t TotalZeroSizeRequestReadBandwidth = 0;
3803 dml_float_t TotalZeroSizeCompressedReadBandwidth = 0;
3804 dml_float_t AverageDCCZeroSizeFraction = 0;
3805 dml_float_t AverageZeroSizeCompressionRate = 0;
3806
3807 dml_bool_t FoundCriticalSurface = false;
3808
3809 dml_uint_t TotalNumberOfActiveOTG = 0;
3810 dml_float_t SinglePixelClock = 0;
3811 dml_uint_t SingleHTotal = 0;
3812 dml_uint_t SingleVTotal = 0;
3813 dml_bool_t SameTiming = true;
3814
3815 dml_float_t LastStutterPeriod = 0.0;
3816 dml_float_t LastZ8StutterPeriod = 0.0;
3817
3818 dml_uint_t SwathSizeCriticalSurface;
3819 dml_uint_t LastChunkOfSwathSize;
3820 dml_uint_t MissingPartOfLastSwathOfDETSize;
3821
3822 TotalZeroSizeRequestReadBandwidth = 0;
3823 TotalZeroSizeCompressedReadBandwidth = 0;
3824 TotalRowReadBandwidth = 0;
3825 TotalCompressedReadBandwidth = 0;
3826
3827 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3828 if (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) {
3829 if (p->DCCEnable[k] == true) {
3830 if ((dml_is_vertical_rotation(p->SourceScan[k]) && p->BlockWidth256BytesY[k] > p->SwathHeightY[k]) || (!dml_is_vertical_rotation(p->SourceScan[k]) && p->BlockHeight256BytesY[k] > p->SwathHeightY[k]) || p->DCCYMaxUncompressedBlock[k] < 256) {
3831 MaximumEffectiveCompressionLuma = 2;
3832 } else {
3833 MaximumEffectiveCompressionLuma = 4;
3834 }
3835 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] / dml_min(p->NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
3836 #ifdef __DML_VBA_DEBUG__
3837 dml_print("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
3838 dml_print("DML::%s: k=%u, NetDCCRateLuma = %f\n", __func__, k, p->NetDCCRateLuma[k]);
3839 dml_print("DML::%s: k=%u, MaximumEffectiveCompressionLuma = %f\n", __func__, k, MaximumEffectiveCompressionLuma);
3840 #endif
3841 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->DCCFractionOfZeroSizeRequestsLuma[k];
3842 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
3843
3844 if (p->ReadBandwidthSurfaceChroma[k] > 0) {
3845 if ((dml_is_vertical_rotation(p->SourceScan[k]) && p->BlockWidth256BytesC[k] > p->SwathHeightC[k]) || (!dml_is_vertical_rotation(p->SourceScan[k]) && p->BlockHeight256BytesC[k] > p->SwathHeightC[k]) || p->DCCCMaxUncompressedBlock[k] < 256) {
3846 MaximumEffectiveCompressionChroma = 2;
3847 } else {
3848 MaximumEffectiveCompressionChroma = 4;
3849 }
3850 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] / dml_min(p->NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
3851 #ifdef __DML_VBA_DEBUG__
3852 dml_print("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, p->ReadBandwidthSurfaceChroma[k]);
3853 dml_print("DML::%s: k=%u, NetDCCRateChroma = %f\n", __func__, k, p->NetDCCRateChroma[k]);
3854 dml_print("DML::%s: k=%u, MaximumEffectiveCompressionChroma = %f\n", __func__, k, MaximumEffectiveCompressionChroma);
3855 #endif
3856 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->DCCFractionOfZeroSizeRequestsChroma[k];
3857 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
3858 }
3859 } else {
3860 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] + p->ReadBandwidthSurfaceChroma[k];
3861 }
3862 TotalRowReadBandwidth = TotalRowReadBandwidth + p->DPPPerSurface[k] * (p->meta_row_bw[k] + p->dpte_row_bw[k]);
3863 }
3864 }
3865
3866 AverageDCCCompressionRate = p->TotalDataReadBandwidth / TotalCompressedReadBandwidth;
3867 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / p->TotalDataReadBandwidth;
3868
3869 #ifdef __DML_VBA_DEBUG__
3870 dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled);
3871 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
3872 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
3873 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
3874 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
3875 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
3876 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
3877 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
3878 dml_print("DML::%s: CompbufReservedSpace64B = %u\n", __func__, p->CompbufReservedSpace64B);
3879 dml_print("DML::%s: CompbufReservedSpaceZs = %u\n", __func__, p->CompbufReservedSpaceZs);
3880 dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, p->CompressedBufferSizeInkByte);
3881 #endif
3882 if (AverageDCCZeroSizeFraction == 1) {
3883 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
3884 EffectiveCompressedBufferSize = (dml_float_t)p->MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + ((dml_float_t)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 * AverageZeroSizeCompressionRate;
3885 } else if (AverageDCCZeroSizeFraction > 0) {
3886 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
3887 EffectiveCompressedBufferSize = dml_min((dml_float_t)p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
3888 (dml_float_t)p->MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)) +
3889 dml_min(((dml_float_t)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * AverageDCCCompressionRate,
3890 ((dml_float_t)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
3891
3892 #ifdef __DML_VBA_DEBUG__
3893 dml_print("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
3894 dml_print("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
3895 dml_print("DML::%s: min 3 = %f\n", __func__, (p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
3896 dml_print("DML::%s: min 4 = %f\n", __func__, (p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
3897 #endif
3898 } else {
3899 EffectiveCompressedBufferSize = dml_min((dml_float_t)p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
3900 (dml_float_t)p->MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) +
3901 ((dml_float_t)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * AverageDCCCompressionRate;
3902
3903 #ifdef __DML_VBA_DEBUG__
3904 dml_print("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
3905 dml_print("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
3906 #endif
3907 }
3908
3909 #ifdef __DML_VBA_DEBUG__
3910 dml_print("DML::%s: MetaFIFOSizeInKEntries = %u\n", __func__, p->MetaFIFOSizeInKEntries);
3911 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
3912 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
3913 #endif
3914
3915 *p->StutterPeriod = 0;
3916
3917 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3918 if (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) {
3919 LinesInDETY = ((dml_float_t)p->DETBufferSizeY[k] + (p->UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * p->ReadBandwidthSurfaceLuma[k] / p->TotalDataReadBandwidth) / p->BytePerPixelDETY[k] / p->SwathWidthY[k];
3920 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, p->SwathHeightY[k]);
3921 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((dml_float_t)p->HTotal[k] / p->PixelClock[k]) / p->VRatio[k];
3922 #ifdef __DML_VBA_DEBUG__
3923 dml_print("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
3924 dml_print("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
3925 dml_print("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
3926 dml_print("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
3927 dml_print("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, p->TotalDataReadBandwidth);
3928 dml_print("DML::%s: k=%u, LinesInDETY = %f\n", __func__, k, LinesInDETY);
3929 dml_print("DML::%s: k=%u, LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
3930 dml_print("DML::%s: k=%u, HTotal = %u\n", __func__, k, p->HTotal[k]);
3931 dml_print("DML::%s: k=%u, PixelClock = %f\n", __func__, k, p->PixelClock[k]);
3932 dml_print("DML::%s: k=%u, VRatio = %f\n", __func__, k, p->VRatio[k]);
3933 dml_print("DML::%s: k=%u, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
3934 dml_print("DML::%s: k=%u,PixelClock = %f\n", __func__, k, p->PixelClock[k]);
3935 #endif
3936
3937 if (!FoundCriticalSurface || DETBufferingTimeY < *p->StutterPeriod) {
3938 dml_bool_t isInterlaceTiming = p->Interlace[k] && !p->ProgressiveToInterlaceUnitInOPP;
3939
3940 FoundCriticalSurface = true;
3941 *p->StutterPeriod = DETBufferingTimeY;
3942 FrameTimeCriticalSurface = (isInterlaceTiming ? dml_floor((dml_float_t)p->VTotal[k]/2.0, 1.0) : p->VTotal[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k];
3943 VActiveTimeCriticalSurface = (isInterlaceTiming ? dml_floor((dml_float_t)p->VActive[k]/2.0, 1.0) : p->VActive[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k];
3944 BytePerPixelYCriticalSurface = p->BytePerPixelY[k];
3945 SwathWidthYCriticalSurface = p->SwathWidthY[k];
3946 SwathHeightYCriticalSurface = p->SwathHeightY[k];
3947 BlockWidth256BytesYCriticalSurface = p->BlockWidth256BytesY[k];
3948 LinesToFinishSwathTransferStutterCriticalSurface = p->SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
3949 DETBufferSizeYCriticalSurface = p->DETBufferSizeY[k];
3950 MinTTUVBlankCriticalSurface = p->MinTTUVBlank[k];
3951 SinglePlaneCriticalSurface = (p->ReadBandwidthSurfaceChroma[k] == 0);
3952 SinglePipeCriticalSurface = (p->DPPPerSurface[k] == 1);
3953
3954 #ifdef __DML_VBA_DEBUG__
3955 dml_print("DML::%s: k=%u, FoundCriticalSurface = %u\n", __func__, k, FoundCriticalSurface);
3956 dml_print("DML::%s: k=%u, StutterPeriod = %f\n", __func__, k, *p->StutterPeriod);
3957 dml_print("DML::%s: k=%u, MinTTUVBlankCriticalSurface = %f\n", __func__, k, MinTTUVBlankCriticalSurface);
3958 dml_print("DML::%s: k=%u, FrameTimeCriticalSurface = %f\n", __func__, k, FrameTimeCriticalSurface);
3959 dml_print("DML::%s: k=%u, VActiveTimeCriticalSurface = %f\n", __func__, k, VActiveTimeCriticalSurface);
3960 dml_print("DML::%s: k=%u, BytePerPixelYCriticalSurface = %u\n", __func__, k, BytePerPixelYCriticalSurface);
3961 dml_print("DML::%s: k=%u, SwathWidthYCriticalSurface = %f\n", __func__, k, SwathWidthYCriticalSurface);
3962 dml_print("DML::%s: k=%u, SwathHeightYCriticalSurface = %f\n", __func__, k, SwathHeightYCriticalSurface);
3963 dml_print("DML::%s: k=%u, BlockWidth256BytesYCriticalSurface = %u\n", __func__, k, BlockWidth256BytesYCriticalSurface);
3964 dml_print("DML::%s: k=%u, SinglePlaneCriticalSurface = %u\n", __func__, k, SinglePlaneCriticalSurface);
3965 dml_print("DML::%s: k=%u, SinglePipeCriticalSurface = %u\n", __func__, k, SinglePipeCriticalSurface);
3966 dml_print("DML::%s: k=%u, LinesToFinishSwathTransferStutterCriticalSurface = %f\n", __func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
3967 #endif
3968 }
3969 }
3970 }
3971
3972 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*p->StutterPeriod * p->TotalDataReadBandwidth, EffectiveCompressedBufferSize);
3973 #ifdef __DML_VBA_DEBUG__
3974 dml_print("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, p->ROBBufferSizeInKByte);
3975 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
3976 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *p->StutterPeriod * p->TotalDataReadBandwidth);
3977 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, p->ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
3978 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
3979 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
3980 dml_print("DML::%s: ReturnBW = %f\n", __func__, p->ReturnBW);
3981 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, p->TotalDataReadBandwidth);
3982 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
3983 dml_print("DML::%s: DCFCLK = %f\n", __func__, p->DCFCLK);
3984 #endif
3985
3986 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / p->ReturnBW + (*p->StutterPeriod * p->TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64) + *p->StutterPeriod * TotalRowReadBandwidth / p->ReturnBW;
3987 #ifdef __DML_VBA_DEBUG__
3988 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / p->ReturnBW);
3989 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth));
3990 dml_print("DML::%s: Part 2 = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64));
3991 dml_print("DML::%s: Part 3 = %f\n", __func__, *p->StutterPeriod * TotalRowReadBandwidth / p->ReturnBW);
3992 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
3993 #endif
3994 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / p->ReturnBW);
3995
3996 dml_print("DML::%s: Time to finish residue swath=%f\n", __func__, LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / p->ReturnBW);
3997
3998 TotalActiveWriteback = 0;
3999 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
4000 if (p->WritebackEnable[k]) {
4001 TotalActiveWriteback = TotalActiveWriteback + 1;
4002 }
4003 }
4004
4005 if (TotalActiveWriteback == 0) {
4006 #ifdef __DML_VBA_DEBUG__
4007 dml_print("DML::%s: SRExitTime = %f\n", __func__, p->SRExitTime);
4008 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, p->SRExitZ8Time);
4009 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
4010 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
4011 #endif
4012 *p->StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (p->SRExitTime + StutterBurstTime) / *p->StutterPeriod) * 100;
4013 *p->Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (p->SRExitZ8Time + StutterBurstTime) / *p->StutterPeriod) * 100;
4014 *p->NumberOfStutterBurstsPerFrame = (*p->StutterEfficiencyNotIncludingVBlank > 0 ? (dml_uint_t)(dml_ceil(VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0);
4015 *p->Z8NumberOfStutterBurstsPerFrame = (*p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? (dml_uint_t)(dml_ceil(VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0);
4016 } else {
4017 *p->StutterEfficiencyNotIncludingVBlank = 0.;
4018 *p->Z8StutterEfficiencyNotIncludingVBlank = 0.;
4019 *p->NumberOfStutterBurstsPerFrame = 0;
4020 *p->Z8NumberOfStutterBurstsPerFrame = 0;
4021 }
4022 #ifdef __DML_VBA_DEBUG__
4023 dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
4024 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
4025 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank);
4026 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->NumberOfStutterBurstsPerFrame);
4027 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
4028 #endif
4029
4030 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
4031 if (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) {
4032 if (p->BlendingAndTiming[k] == k) {
4033 if (TotalNumberOfActiveOTG == 0) {
4034 SinglePixelClock = p->PixelClock[k];
4035 SingleHTotal = p->HTotal[k];
4036 SingleVTotal = p->VTotal[k];
4037 } else if (SinglePixelClock != p->PixelClock[k] || SingleHTotal != p->HTotal[k] || SingleVTotal != p->VTotal[k]) {
4038 SameTiming = false;
4039 }
4040 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
4041 }
4042 }
4043 }
4044
4045 if (*p->StutterEfficiencyNotIncludingVBlank > 0) {
4046 LastStutterPeriod = VActiveTimeCriticalSurface - (*p->NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod;
4047
4048 if ((p->SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming &&
4049 LastStutterPeriod + MinTTUVBlankCriticalSurface > p->StutterEnterPlusExitWatermark) {
4050 *p->StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitTime + StutterBurstTime * VActiveTimeCriticalSurface / *p->StutterPeriod) / FrameTimeCriticalSurface) * 100;
4051 } else {
4052 *p->StutterEfficiency = *p->StutterEfficiencyNotIncludingVBlank;
4053 }
4054 } else {
4055 *p->StutterEfficiency = 0;
4056 }
4057
4058 if (*p->Z8StutterEfficiencyNotIncludingVBlank > 0) {
4059 LastZ8StutterPeriod = VActiveTimeCriticalSurface - (*p->NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod;
4060 if ((p->SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod + MinTTUVBlankCriticalSurface > p->Z8StutterEnterPlusExitWatermark) {
4061 *p->Z8StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalSurface / *p->StutterPeriod) / FrameTimeCriticalSurface) * 100;
4062 } else {
4063 *p->Z8StutterEfficiency = *p->Z8StutterEfficiencyNotIncludingVBlank;
4064 }
4065 } else {
4066 *p->Z8StutterEfficiency = 0.;
4067 }
4068
4069 #ifdef __DML_VBA_DEBUG__
4070 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
4071 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Z8StutterEnterPlusExitWatermark);
4072 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
4073 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
4074 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *p->StutterEfficiency);
4075 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *p->Z8StutterEfficiency);
4076 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
4077 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
4078 #endif
4079
4080 SwathSizeCriticalSurface = (dml_uint_t)(BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface));
4081 LastChunkOfSwathSize = SwathSizeCriticalSurface % (p->PixelChunkSizeInKByte * 1024);
4082 MissingPartOfLastSwathOfDETSize = (dml_uint_t)(dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface) - DETBufferSizeYCriticalSurface);
4083
4084 *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && SinglePlaneCriticalSurface && SinglePipeCriticalSurface && (LastChunkOfSwathSize > 0) &&
4085 (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0) && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
4086
4087 #ifdef __DML_VBA_DEBUG__
4088 dml_print("DML::%s: SwathSizeCriticalSurface = %u\n", __func__, SwathSizeCriticalSurface);
4089 dml_print("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, DETBufferSizeYCriticalSurface);
4090 dml_print("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte);
4091 dml_print("DML::%s: LastChunkOfSwathSize = %u\n", __func__, LastChunkOfSwathSize);
4092 dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %u\n", __func__, MissingPartOfLastSwathOfDETSize);
4093 dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
4094 #endif
4095 } // CalculateStutterEfficiency
4096
4097 /// \CalculateSwathAndDETConfiguration
4098 /// @brief Calculates swath width and different return buffers sizing (DET, CDB, etc.)
CalculateSwathAndDETConfiguration(struct display_mode_lib_scratch_st * scratch,struct CalculateSwathAndDETConfiguration_params_st * p)4099 static void CalculateSwathAndDETConfiguration(struct display_mode_lib_scratch_st *scratch,
4100 struct CalculateSwathAndDETConfiguration_params_st *p)
4101 {
4102 (void)scratch;
4103 dml_uint_t MaximumSwathHeightY[__DML_NUM_PLANES__];
4104 dml_uint_t MaximumSwathHeightC[__DML_NUM_PLANES__];
4105 dml_uint_t RoundedUpMaxSwathSizeBytesY[__DML_NUM_PLANES__];
4106 dml_uint_t RoundedUpMaxSwathSizeBytesC[__DML_NUM_PLANES__];
4107 dml_uint_t RoundedUpSwathSizeBytesY[__DML_NUM_PLANES__] = { 0 };
4108 dml_uint_t RoundedUpSwathSizeBytesC[__DML_NUM_PLANES__] = { 0 };
4109 dml_uint_t SwathWidthSingleDPP[__DML_NUM_PLANES__];
4110 dml_uint_t SwathWidthSingleDPPChroma[__DML_NUM_PLANES__];
4111
4112 dml_uint_t TotalActiveDPP = 0;
4113 dml_bool_t NoChromaOrLinearSurfaces = true;
4114 dml_uint_t SurfaceDoingUnboundedRequest = 0;
4115
4116 dml_uint_t DETBufferSizeInKByteForSwathCalculation;
4117
4118 const long TTUFIFODEPTH = 8;
4119 const long MAXIMUMCOMPRESSION = 4;
4120
4121 #ifdef __DML_VBA_DEBUG__
4122 dml_print("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP);
4123 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
4124 dml_print("DML::%s: DPPPerSurface[%u] = %u\n", __func__, k, p->DPPPerSurface[k]);
4125 }
4126 #endif
4127 CalculateSwathWidth(p->ForceSingleDPP,
4128 p->NumberOfActiveSurfaces,
4129 p->SourcePixelFormat,
4130 p->SourceScan,
4131 p->ViewportStationary,
4132 p->ViewportWidth,
4133 p->ViewportHeight,
4134 p->ViewportXStart,
4135 p->ViewportYStart,
4136 p->ViewportXStartC,
4137 p->ViewportYStartC,
4138 p->SurfaceWidthY,
4139 p->SurfaceWidthC,
4140 p->SurfaceHeightY,
4141 p->SurfaceHeightC,
4142 p->ODMMode,
4143 p->BytePerPixY,
4144 p->BytePerPixC,
4145 p->Read256BytesBlockHeightY,
4146 p->Read256BytesBlockHeightC,
4147 p->Read256BytesBlockWidthY,
4148 p->Read256BytesBlockWidthC,
4149 p->BlendingAndTiming,
4150 p->HActive,
4151 p->HRatio,
4152 p->DPPPerSurface,
4153
4154 // Output
4155 SwathWidthSingleDPP,
4156 SwathWidthSingleDPPChroma,
4157 p->SwathWidth,
4158 p->SwathWidthChroma,
4159 MaximumSwathHeightY,
4160 MaximumSwathHeightC,
4161 p->swath_width_luma_ub,
4162 p->swath_width_chroma_ub);
4163
4164 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
4165 RoundedUpMaxSwathSizeBytesY[k] = (dml_uint_t)(p->swath_width_luma_ub[k] * p->BytePerPixDETY[k] * MaximumSwathHeightY[k]);
4166 RoundedUpMaxSwathSizeBytesC[k] = (dml_uint_t)(p->swath_width_chroma_ub[k] * p->BytePerPixDETC[k] * MaximumSwathHeightC[k]);
4167 #ifdef __DML_VBA_DEBUG__
4168 dml_print("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]);
4169 dml_print("DML::%s: k=%u swath_width_luma_ub = %u\n", __func__, k, p->swath_width_luma_ub[k]);
4170 dml_print("DML::%s: k=%u BytePerPixDETY = %f\n", __func__, k, p->BytePerPixDETY[k]);
4171 dml_print("DML::%s: k=%u MaximumSwathHeightY = %u\n", __func__, k, MaximumSwathHeightY[k]);
4172 dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesY = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesY[k]);
4173 dml_print("DML::%s: k=%u swath_width_chroma_ub = %u\n", __func__, k, p->swath_width_chroma_ub[k]);
4174 dml_print("DML::%s: k=%u BytePerPixDETC = %f\n", __func__, k, p->BytePerPixDETC[k]);
4175 dml_print("DML::%s: k=%u MaximumSwathHeightC = %u\n", __func__, k, MaximumSwathHeightC[k]);
4176 dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesC = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesC[k]);
4177 #endif
4178 if (p->SourcePixelFormat[k] == dml_420_10) {
4179 RoundedUpMaxSwathSizeBytesY[k] = (dml_uint_t)(dml_ceil((dml_float_t) RoundedUpMaxSwathSizeBytesY[k], 256));
4180 RoundedUpMaxSwathSizeBytesC[k] = (dml_uint_t)(dml_ceil((dml_float_t) RoundedUpMaxSwathSizeBytesC[k], 256));
4181 }
4182 }
4183
4184 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
4185 TotalActiveDPP = TotalActiveDPP + (p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]);
4186 if (p->DPPPerSurface[k] > 0)
4187 SurfaceDoingUnboundedRequest = k;
4188 if (p->SourcePixelFormat[k] == dml_420_8 || p->SourcePixelFormat[k] == dml_420_10 ||
4189 p->SourcePixelFormat[k] == dml_420_12 || p->SourcePixelFormat[k] == dml_rgbe_alpha
4190 || p->SurfaceTiling[k] == dml_sw_linear) {
4191 NoChromaOrLinearSurfaces = false;
4192 }
4193 }
4194
4195 *p->UnboundedRequestEnabled = UnboundedRequest(p->UseUnboundedRequestingFinal, TotalActiveDPP,
4196 NoChromaOrLinearSurfaces, p->Output[0]);
4197
4198 CalculateDETBufferSize(p->DETSizeOverride,
4199 p->UseMALLForPStateChange,
4200 p->ForceSingleDPP,
4201 p->NumberOfActiveSurfaces,
4202 *p->UnboundedRequestEnabled,
4203 p->nomDETInKByte,
4204 p->MaxTotalDETInKByte,
4205 p->ConfigReturnBufferSizeInKByte,
4206 p->MinCompressedBufferSizeInKByte,
4207 p->ConfigReturnBufferSegmentSizeInkByte,
4208 p->CompressedBufferSegmentSizeInkByteFinal,
4209 p->SourcePixelFormat,
4210 p->ReadBandwidthLuma,
4211 p->ReadBandwidthChroma,
4212 RoundedUpMaxSwathSizeBytesY,
4213 RoundedUpMaxSwathSizeBytesC,
4214 p->DPPPerSurface,
4215
4216 // Output
4217 p->DETBufferSizeInKByte, // per hubp pipe
4218 p->CompressedBufferSizeInkByte);
4219
4220 #ifdef __DML_VBA_DEBUG__
4221 dml_print("DML::%s: TotalActiveDPP = %u\n", __func__, TotalActiveDPP);
4222 dml_print("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte);
4223 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte);
4224 dml_print("DML::%s: UseUnboundedRequestingFinal = %u\n", __func__, p->UseUnboundedRequestingFinal);
4225 dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, *p->UnboundedRequestEnabled);
4226 dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte);
4227 #endif
4228
4229 *p->ViewportSizeSupport = true;
4230 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
4231
4232 DETBufferSizeInKByteForSwathCalculation = (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe ? 1024 : p->DETBufferSizeInKByte[k]);
4233 #ifdef __DML_VBA_DEBUG__
4234 dml_print("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation = %u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation);
4235 #endif
4236
4237 if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
4238 p->SwathHeightY[k] = MaximumSwathHeightY[k];
4239 p->SwathHeightC[k] = MaximumSwathHeightC[k];
4240 RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k];
4241 RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k];
4242 } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] && RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
4243 p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
4244 p->SwathHeightC[k] = MaximumSwathHeightC[k];
4245 RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k] / 2;
4246 RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k];
4247 } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] && RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
4248 p->SwathHeightY[k] = MaximumSwathHeightY[k];
4249 p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
4250 RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k];
4251 RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k] / 2;
4252 } else {
4253 p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
4254 p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
4255 RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k] / 2;
4256 RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k] / 2;
4257 }
4258
4259 if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 > DETBufferSizeInKByteForSwathCalculation * 1024 / 2) ||
4260 p->SwathWidth[k] > p->MaximumSwathWidthLuma[k] || (p->SwathHeightC[k] > 0 && p->SwathWidthChroma[k] > p->MaximumSwathWidthChroma[k])) {
4261 *p->ViewportSizeSupport = false;
4262 p->ViewportSizeSupportPerSurface[k] = false;
4263 } else {
4264 p->ViewportSizeSupportPerSurface[k] = true;
4265 }
4266
4267 if (p->SwathHeightC[k] == 0) {
4268 #ifdef __DML_VBA_DEBUG__
4269 dml_print("DML::%s: k=%u All DET for plane0\n", __func__, k);
4270 #endif
4271 p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024;
4272 p->DETBufferSizeC[k] = 0;
4273 } else if (RoundedUpSwathSizeBytesY[k] <= 1.5 * RoundedUpSwathSizeBytesC[k]) {
4274 #ifdef __DML_VBA_DEBUG__
4275 dml_print("DML::%s: k=%u Half DET for plane0, half for plane1\n", __func__, k);
4276 #endif
4277 p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024 / 2;
4278 p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 / 2;
4279 } else {
4280 #ifdef __DML_VBA_DEBUG__
4281 dml_print("DML::%s: k=%u 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
4282 #endif
4283 p->DETBufferSizeY[k] = (dml_uint_t)(dml_floor(p->DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024));
4284 p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 - p->DETBufferSizeY[k];
4285 }
4286
4287 #ifdef __DML_VBA_DEBUG__
4288 dml_print("DML::%s: k=%u SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
4289 dml_print("DML::%s: k=%u SwathHeightC = %u\n", __func__, k, p->SwathHeightC[k]);
4290 dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesY = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesY[k]);
4291 dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesC = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesC[k]);
4292 dml_print("DML::%s: k=%u RoundedUpSwathSizeBytesY = %u\n", __func__, k, RoundedUpSwathSizeBytesY[k]);
4293 dml_print("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]);
4294 dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]);
4295 dml_print("DML::%s: k=%u DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
4296 dml_print("DML::%s: k=%u DETBufferSizeC = %u\n", __func__, k, p->DETBufferSizeC[k]);
4297 dml_print("DML::%s: k=%u ViewportSizeSupportPerSurface = %u\n", __func__, k, p->ViewportSizeSupportPerSurface[k]);
4298 #endif
4299
4300 }
4301
4302 *p->compbuf_reserved_space_64b = 2 * p->PixelChunkSizeInKByte * 1024 / 64;
4303 if (*p->UnboundedRequestEnabled) {
4304 *p->compbuf_reserved_space_64b = dml_max(*p->compbuf_reserved_space_64b,
4305 (dml_float_t)(p->ROBBufferSizeInKByte * 1024/64)
4306 - (dml_float_t)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / MAXIMUMCOMPRESSION/64));
4307 }
4308 *p->compbuf_reserved_space_zs = 2 * p->PixelChunkSizeInKByte * 1024 / 256;
4309 } // CalculateSwathAndDETConfiguration
4310
CalculateSwathWidth(dml_bool_t ForceSingleDPP,dml_uint_t NumberOfActiveSurfaces,enum dml_source_format_class SourcePixelFormat[],enum dml_rotation_angle SourceScan[],dml_bool_t ViewportStationary[],dml_uint_t ViewportWidth[],dml_uint_t ViewportHeight[],dml_uint_t ViewportXStart[],dml_uint_t ViewportYStart[],dml_uint_t ViewportXStartC[],dml_uint_t ViewportYStartC[],dml_uint_t SurfaceWidthY[],dml_uint_t SurfaceWidthC[],dml_uint_t SurfaceHeightY[],dml_uint_t SurfaceHeightC[],enum dml_odm_mode ODMMode[],dml_uint_t BytePerPixY[],dml_uint_t BytePerPixC[],dml_uint_t Read256BytesBlockHeightY[],dml_uint_t Read256BytesBlockHeightC[],dml_uint_t Read256BytesBlockWidthY[],dml_uint_t Read256BytesBlockWidthC[],dml_uint_t BlendingAndTiming[],dml_uint_t HActive[],dml_float_t HRatio[],dml_uint_t DPPPerSurface[],dml_uint_t SwathWidthSingleDPPY[],dml_uint_t SwathWidthSingleDPPC[],dml_uint_t SwathWidthY[],dml_uint_t SwathWidthC[],dml_uint_t MaximumSwathHeightY[],dml_uint_t MaximumSwathHeightC[],dml_uint_t swath_width_luma_ub[],dml_uint_t swath_width_chroma_ub[])4311 static void CalculateSwathWidth(
4312 dml_bool_t ForceSingleDPP,
4313 dml_uint_t NumberOfActiveSurfaces,
4314 enum dml_source_format_class SourcePixelFormat[],
4315 enum dml_rotation_angle SourceScan[],
4316 dml_bool_t ViewportStationary[],
4317 dml_uint_t ViewportWidth[],
4318 dml_uint_t ViewportHeight[],
4319 dml_uint_t ViewportXStart[],
4320 dml_uint_t ViewportYStart[],
4321 dml_uint_t ViewportXStartC[],
4322 dml_uint_t ViewportYStartC[],
4323 dml_uint_t SurfaceWidthY[],
4324 dml_uint_t SurfaceWidthC[],
4325 dml_uint_t SurfaceHeightY[],
4326 dml_uint_t SurfaceHeightC[],
4327 enum dml_odm_mode ODMMode[],
4328 dml_uint_t BytePerPixY[],
4329 dml_uint_t BytePerPixC[],
4330 dml_uint_t Read256BytesBlockHeightY[],
4331 dml_uint_t Read256BytesBlockHeightC[],
4332 dml_uint_t Read256BytesBlockWidthY[],
4333 dml_uint_t Read256BytesBlockWidthC[],
4334 dml_uint_t BlendingAndTiming[],
4335 dml_uint_t HActive[],
4336 dml_float_t HRatio[],
4337 dml_uint_t DPPPerSurface[],
4338
4339 // Output
4340 dml_uint_t SwathWidthSingleDPPY[],
4341 dml_uint_t SwathWidthSingleDPPC[],
4342 dml_uint_t SwathWidthY[], // per-pipe
4343 dml_uint_t SwathWidthC[], // per-pipe
4344 dml_uint_t MaximumSwathHeightY[],
4345 dml_uint_t MaximumSwathHeightC[],
4346 dml_uint_t swath_width_luma_ub[], // per-pipe
4347 dml_uint_t swath_width_chroma_ub[]) // per-pipe
4348 {
4349 (void)BytePerPixY;
4350 enum dml_odm_mode MainSurfaceODMMode;
4351 dml_uint_t surface_width_ub_l;
4352 dml_uint_t surface_height_ub_l;
4353 dml_uint_t surface_width_ub_c = 0;
4354 dml_uint_t surface_height_ub_c = 0;
4355
4356 #ifdef __DML_VBA_DEBUG__
4357 dml_print("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
4358 dml_print("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
4359 #endif
4360
4361 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
4362 if (!dml_is_vertical_rotation(SourceScan[k])) {
4363 SwathWidthSingleDPPY[k] = ViewportWidth[k];
4364 } else {
4365 SwathWidthSingleDPPY[k] = ViewportHeight[k];
4366 }
4367
4368 #ifdef __DML_VBA_DEBUG__
4369 dml_print("DML::%s: k=%u ViewportWidth=%u\n", __func__, k, ViewportWidth[k]);
4370 dml_print("DML::%s: k=%u ViewportHeight=%u\n", __func__, k, ViewportHeight[k]);
4371 dml_print("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
4372 #endif
4373
4374 MainSurfaceODMMode = ODMMode[k];
4375 for (dml_uint_t j = 0; j < NumberOfActiveSurfaces; ++j) {
4376 if (BlendingAndTiming[k] == j) {
4377 MainSurfaceODMMode = ODMMode[j];
4378 }
4379 }
4380
4381 if (ForceSingleDPP) {
4382 SwathWidthY[k] = SwathWidthSingleDPPY[k];
4383 } else {
4384 if (MainSurfaceODMMode == dml_odm_mode_combine_4to1) {
4385 SwathWidthY[k] = (dml_uint_t)(dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k], true)));
4386 } else if (MainSurfaceODMMode == dml_odm_mode_combine_2to1) {
4387 SwathWidthY[k] = (dml_uint_t)(dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k], true)));
4388 } else if (DPPPerSurface[k] == 2) {
4389 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
4390 } else {
4391 SwathWidthY[k] = SwathWidthSingleDPPY[k];
4392 }
4393 }
4394
4395 #ifdef __DML_VBA_DEBUG__
4396 dml_print("DML::%s: k=%u HActive=%u\n", __func__, k, HActive[k]);
4397 dml_print("DML::%s: k=%u HRatio=%f\n", __func__, k, HRatio[k]);
4398 dml_print("DML::%s: k=%u MainSurfaceODMMode=%u\n", __func__, k, MainSurfaceODMMode);
4399 dml_print("DML::%s: k=%u SwathWidthSingleDPPY=%u\n", __func__, k, SwathWidthSingleDPPY[k]);
4400 dml_print("DML::%s: k=%u SwathWidthY=%u\n", __func__, k, SwathWidthY[k]);
4401 #endif
4402
4403 if (SourcePixelFormat[k] == dml_420_8 || SourcePixelFormat[k] == dml_420_10 || SourcePixelFormat[k] == dml_420_12) {
4404 SwathWidthC[k] = SwathWidthY[k] / 2;
4405 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
4406 } else {
4407 SwathWidthC[k] = SwathWidthY[k];
4408 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
4409 }
4410
4411 if (ForceSingleDPP == true) {
4412 SwathWidthY[k] = SwathWidthSingleDPPY[k];
4413 SwathWidthC[k] = SwathWidthSingleDPPC[k];
4414 }
4415
4416 surface_width_ub_l = (dml_uint_t)dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
4417 surface_height_ub_l = (dml_uint_t)dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
4418
4419 if (!dml_is_vertical_rotation(SourceScan[k])) {
4420 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
4421 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
4422 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
4423 swath_width_luma_ub[k] = (dml_uint_t)(dml_min(surface_width_ub_l, dml_floor(ViewportXStart[k] + SwathWidthY[k] + Read256BytesBlockWidthY[k] - 1, Read256BytesBlockWidthY[k]) - dml_floor(ViewportXStart[k], Read256BytesBlockWidthY[k])));
4424 } else {
4425 swath_width_luma_ub[k] = (dml_uint_t)(dml_min(surface_width_ub_l, dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]));
4426 }
4427 if (BytePerPixC[k] > 0) {
4428 surface_width_ub_c = (dml_uint_t)dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
4429 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
4430 swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(surface_width_ub_c, dml_floor(ViewportXStartC[k] + SwathWidthC[k] + Read256BytesBlockWidthC[k] - 1, Read256BytesBlockWidthC[k]) - dml_floor(ViewportXStartC[k], Read256BytesBlockWidthC[k])));
4431 } else {
4432 swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(surface_width_ub_c, dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]));
4433 }
4434 } else {
4435 swath_width_chroma_ub[k] = 0;
4436 }
4437 } else {
4438 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
4439 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
4440
4441 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
4442 swath_width_luma_ub[k] = (dml_uint_t)(dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] + SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k])));
4443 } else {
4444 swath_width_luma_ub[k] = (dml_uint_t)(dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]));
4445 }
4446 if (BytePerPixC[k] > 0) {
4447 surface_height_ub_c = (dml_uint_t)dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
4448 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
4449 swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(surface_height_ub_c, dml_floor(ViewportYStartC[k] + SwathWidthC[k] + Read256BytesBlockHeightC[k] - 1, Read256BytesBlockHeightC[k]) - dml_floor(ViewportYStartC[k], Read256BytesBlockHeightC[k])));
4450 } else {
4451 swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(surface_height_ub_c, dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]));
4452 }
4453 } else {
4454 swath_width_chroma_ub[k] = 0;
4455 }
4456 }
4457
4458 #ifdef __DML_VBA_DEBUG__
4459 dml_print("DML::%s: k=%u surface_width_ub_l=%u\n", __func__, k, surface_width_ub_l);
4460 dml_print("DML::%s: k=%u surface_height_ub_l=%u\n", __func__, k, surface_height_ub_l);
4461 dml_print("DML::%s: k=%u surface_width_ub_c=%u\n", __func__, k, surface_width_ub_c);
4462 dml_print("DML::%s: k=%u surface_height_ub_c=%u\n", __func__, k, surface_height_ub_c);
4463 dml_print("DML::%s: k=%u Read256BytesBlockWidthY=%u\n", __func__, k, Read256BytesBlockWidthY[k]);
4464 dml_print("DML::%s: k=%u Read256BytesBlockHeightY=%u\n", __func__, k, Read256BytesBlockHeightY[k]);
4465 dml_print("DML::%s: k=%u Read256BytesBlockWidthC=%u\n", __func__, k, Read256BytesBlockWidthC[k]);
4466 dml_print("DML::%s: k=%u Read256BytesBlockHeightC=%u\n", __func__, k, Read256BytesBlockHeightC[k]);
4467 dml_print("DML::%s: k=%u ViewportStationary=%u\n", __func__, k, ViewportStationary[k]);
4468 dml_print("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
4469 dml_print("DML::%s: k=%u swath_width_luma_ub=%u\n", __func__, k, swath_width_luma_ub[k]);
4470 dml_print("DML::%s: k=%u swath_width_chroma_ub=%u\n", __func__, k, swath_width_chroma_ub[k]);
4471 dml_print("DML::%s: k=%u MaximumSwathHeightY=%u\n", __func__, k, MaximumSwathHeightY[k]);
4472 dml_print("DML::%s: k=%u MaximumSwathHeightC=%u\n", __func__, k, MaximumSwathHeightC[k]);
4473 #endif
4474
4475 }
4476 } // CalculateSwathWidth
4477
CalculateExtraLatency(dml_uint_t RoundTripPingLatencyCycles,dml_uint_t ReorderingBytes,dml_float_t DCFCLK,dml_uint_t TotalNumberOfActiveDPP,dml_uint_t PixelChunkSizeInKByte,dml_uint_t TotalNumberOfDCCActiveDPP,dml_uint_t MetaChunkSize,dml_float_t ReturnBW,dml_bool_t GPUVMEnable,dml_bool_t HostVMEnable,dml_uint_t NumberOfActiveSurfaces,dml_uint_t NumberOfDPP[],dml_uint_t dpte_group_bytes[],dml_float_t HostVMInefficiencyFactor,dml_uint_t HostVMMinPageSize,dml_uint_t HostVMMaxNonCachedPageTableLevels)4478 static noinline_for_stack dml_float_t CalculateExtraLatency(
4479 dml_uint_t RoundTripPingLatencyCycles,
4480 dml_uint_t ReorderingBytes,
4481 dml_float_t DCFCLK,
4482 dml_uint_t TotalNumberOfActiveDPP,
4483 dml_uint_t PixelChunkSizeInKByte,
4484 dml_uint_t TotalNumberOfDCCActiveDPP,
4485 dml_uint_t MetaChunkSize,
4486 dml_float_t ReturnBW,
4487 dml_bool_t GPUVMEnable,
4488 dml_bool_t HostVMEnable,
4489 dml_uint_t NumberOfActiveSurfaces,
4490 dml_uint_t NumberOfDPP[],
4491 dml_uint_t dpte_group_bytes[],
4492 dml_float_t HostVMInefficiencyFactor,
4493 dml_uint_t HostVMMinPageSize,
4494 dml_uint_t HostVMMaxNonCachedPageTableLevels)
4495 {
4496 dml_float_t ExtraLatencyBytes;
4497 dml_float_t ExtraLatency;
4498
4499 ExtraLatencyBytes = CalculateExtraLatencyBytes(
4500 ReorderingBytes,
4501 TotalNumberOfActiveDPP,
4502 PixelChunkSizeInKByte,
4503 TotalNumberOfDCCActiveDPP,
4504 MetaChunkSize,
4505 GPUVMEnable,
4506 HostVMEnable,
4507 NumberOfActiveSurfaces,
4508 NumberOfDPP,
4509 dpte_group_bytes,
4510 HostVMInefficiencyFactor,
4511 HostVMMinPageSize,
4512 HostVMMaxNonCachedPageTableLevels);
4513
4514 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
4515
4516 #ifdef __DML_VBA_DEBUG__
4517 dml_print("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles);
4518 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
4519 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
4520 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
4521 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
4522 #endif
4523
4524 return ExtraLatency;
4525 } // CalculateExtraLatency
4526
CalculateHostVMDynamicLevels(dml_bool_t GPUVMEnable,dml_bool_t HostVMEnable,dml_uint_t HostVMMinPageSize,dml_uint_t HostVMMaxNonCachedPageTableLevels)4527 static dml_uint_t CalculateHostVMDynamicLevels(
4528 dml_bool_t GPUVMEnable,
4529 dml_bool_t HostVMEnable,
4530 dml_uint_t HostVMMinPageSize,
4531 dml_uint_t HostVMMaxNonCachedPageTableLevels)
4532 {
4533 dml_uint_t HostVMDynamicLevels = 0;
4534
4535 if (GPUVMEnable && HostVMEnable) {
4536 if (HostVMMinPageSize < 2048)
4537 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
4538 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
4539 HostVMDynamicLevels = (dml_uint_t) dml_max(0, (dml_float_t) HostVMMaxNonCachedPageTableLevels - 1);
4540 else
4541 HostVMDynamicLevels = (dml_uint_t) dml_max(0, (dml_float_t) HostVMMaxNonCachedPageTableLevels - 2);
4542 } else {
4543 HostVMDynamicLevels = 0;
4544 }
4545 return HostVMDynamicLevels;
4546 }
4547
CalculateExtraLatencyBytes(dml_uint_t ReorderingBytes,dml_uint_t TotalNumberOfActiveDPP,dml_uint_t PixelChunkSizeInKByte,dml_uint_t TotalNumberOfDCCActiveDPP,dml_uint_t MetaChunkSize,dml_bool_t GPUVMEnable,dml_bool_t HostVMEnable,dml_uint_t NumberOfActiveSurfaces,dml_uint_t NumberOfDPP[],dml_uint_t dpte_group_bytes[],dml_float_t HostVMInefficiencyFactor,dml_uint_t HostVMMinPageSize,dml_uint_t HostVMMaxNonCachedPageTableLevels)4548 static dml_uint_t CalculateExtraLatencyBytes(dml_uint_t ReorderingBytes,
4549 dml_uint_t TotalNumberOfActiveDPP,
4550 dml_uint_t PixelChunkSizeInKByte,
4551 dml_uint_t TotalNumberOfDCCActiveDPP,
4552 dml_uint_t MetaChunkSize,
4553 dml_bool_t GPUVMEnable,
4554 dml_bool_t HostVMEnable,
4555 dml_uint_t NumberOfActiveSurfaces,
4556 dml_uint_t NumberOfDPP[],
4557 dml_uint_t dpte_group_bytes[],
4558 dml_float_t HostVMInefficiencyFactor,
4559 dml_uint_t HostVMMinPageSize,
4560 dml_uint_t HostVMMaxNonCachedPageTableLevels)
4561 {
4562 dml_uint_t HostVMDynamicLevels = CalculateHostVMDynamicLevels(GPUVMEnable, HostVMEnable, HostVMMinPageSize, HostVMMaxNonCachedPageTableLevels);
4563 dml_float_t ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
4564
4565 if (GPUVMEnable == true) {
4566 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
4567 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
4568 }
4569 }
4570 return (dml_uint_t)(ret);
4571 }
4572
CalculateUrgentLatency(dml_float_t UrgentLatencyPixelDataOnly,dml_float_t UrgentLatencyPixelMixedWithVMData,dml_float_t UrgentLatencyVMDataOnly,dml_bool_t DoUrgentLatencyAdjustment,dml_float_t UrgentLatencyAdjustmentFabricClockComponent,dml_float_t UrgentLatencyAdjustmentFabricClockReference,dml_float_t FabricClock)4573 static dml_float_t CalculateUrgentLatency(
4574 dml_float_t UrgentLatencyPixelDataOnly,
4575 dml_float_t UrgentLatencyPixelMixedWithVMData,
4576 dml_float_t UrgentLatencyVMDataOnly,
4577 dml_bool_t DoUrgentLatencyAdjustment,
4578 dml_float_t UrgentLatencyAdjustmentFabricClockComponent,
4579 dml_float_t UrgentLatencyAdjustmentFabricClockReference,
4580 dml_float_t FabricClock)
4581 {
4582 dml_float_t ret;
4583
4584 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
4585 if (DoUrgentLatencyAdjustment == true) {
4586 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
4587 }
4588 return ret;
4589 }
4590
RequiredDTBCLK(dml_bool_t DSCEnable,dml_float_t PixelClock,enum dml_output_format_class OutputFormat,dml_float_t OutputBpp,dml_uint_t DSCSlices,dml_uint_t HTotal,dml_uint_t HActive,dml_uint_t AudioRate,dml_uint_t AudioLayout)4591 static dml_float_t RequiredDTBCLK(
4592 dml_bool_t DSCEnable,
4593 dml_float_t PixelClock,
4594 enum dml_output_format_class OutputFormat,
4595 dml_float_t OutputBpp,
4596 dml_uint_t DSCSlices,
4597 dml_uint_t HTotal,
4598 dml_uint_t HActive,
4599 dml_uint_t AudioRate,
4600 dml_uint_t AudioLayout)
4601 {
4602 if (DSCEnable != true) {
4603 return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
4604 } else {
4605 dml_float_t PixelWordRate = PixelClock / (OutputFormat == dml_444 ? 1 : 2);
4606 dml_float_t HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp * dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
4607 dml_float_t HCBlank = 64 + 32 * dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
4608 dml_float_t AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
4609 dml_float_t HActiveTribyteRate = PixelWordRate * HCActive / HActive;
4610 return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
4611 }
4612 }
4613
UseMinimumDCFCLK(struct display_mode_lib_scratch_st * scratch,struct UseMinimumDCFCLK_params_st * p)4614 static void UseMinimumDCFCLK(struct display_mode_lib_scratch_st *scratch, struct UseMinimumDCFCLK_params_st *p)
4615 {
4616 struct UseMinimumDCFCLK_locals_st *s = &scratch->UseMinimumDCFCLK_locals;
4617
4618 s->NormalEfficiency = p->PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
4619 for (dml_uint_t j = 0; j < 2; ++j) {
4620
4621
4622 s->TotalMaxPrefetchFlipDPTERowBandwidth[j] = 0;
4623 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
4624 s->TotalMaxPrefetchFlipDPTERowBandwidth[j] = s->TotalMaxPrefetchFlipDPTERowBandwidth[j] + p->NoOfDPP[j][k] * p->DPTEBytesPerRow[j][k] / (15.75 * p->HTotal[k] / p->PixelClock[k]);
4625 }
4626
4627 for (dml_uint_t k = 0; k <= p->NumberOfActiveSurfaces - 1; ++k) {
4628 s->NoOfDPPState[k] = p->NoOfDPP[j][k];
4629 }
4630
4631 s->DPTEBandwidth = s->TotalMaxPrefetchFlipDPTERowBandwidth[j];
4632
4633 s->DCFCLKRequiredForAverageBandwidth = dml_max(p->ProjectedDCFCLKDeepSleep[j], s->DPTEBandwidth / s->NormalEfficiency / p->ReturnBusWidth);
4634
4635 s->ExtraLatencyBytes = CalculateExtraLatencyBytes(p->ReorderingBytes, p->TotalNumberOfActiveDPP[j], p->PixelChunkSizeInKByte, p->TotalNumberOfDCCActiveDPP[j],
4636 p->MetaChunkSize, p->GPUVMEnable, p->HostVMEnable, p->NumberOfActiveSurfaces, s->NoOfDPPState, p->dpte_group_bytes,
4637 1, p->HostVMMinPageSize, p->HostVMMaxNonCachedPageTableLevels);
4638 s->ExtraLatencyCycles = p->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + s->ExtraLatencyBytes / s->NormalEfficiency / p->ReturnBusWidth;
4639 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
4640 dml_float_t DCFCLKCyclesRequiredInPrefetch;
4641 dml_float_t PrefetchTime;
4642
4643 s->PixelDCFCLKCyclesRequiredInPrefetch[k] = (p->PrefetchLinesY[j][k] * p->swath_width_luma_ub_all_states[j][k] * p->BytePerPixelY[k] + p->PrefetchLinesC[j][k] * p->swath_width_chroma_ub_all_states[j][k] * p->BytePerPixelC[k]) / s->NormalEfficiency / p->ReturnBusWidth;
4644 DCFCLKCyclesRequiredInPrefetch = 2 * s->ExtraLatencyCycles / s->NoOfDPPState[k] + p->PDEAndMetaPTEBytesPerFrame[j][k] / s->NormalEfficiency / s->NormalEfficiency / p->ReturnBusWidth * (p->GPUVMMaxPageTableLevels > 2 ? 1 : 0) + 2 * p->DPTEBytesPerRow[j][k] / s->NormalEfficiency / s->NormalEfficiency / p->ReturnBusWidth + 2 * p->MetaRowBytes[j][k] / s->NormalEfficiency / p->ReturnBusWidth + s->PixelDCFCLKCyclesRequiredInPrefetch[k];
4645 s->PrefetchPixelLinesTime[k] = dml_max(p->PrefetchLinesY[j][k], p->PrefetchLinesC[j][k]) * p->HTotal[k] / p->PixelClock[k];
4646 s->DynamicMetadataVMExtraLatency[k] = (p->GPUVMEnable == true && p->DynamicMetadataEnable[k] == true && p->DynamicMetadataVMEnabled == true) ? p->UrgLatency * p->GPUVMMaxPageTableLevels * (p->HostVMEnable == true ? p->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
4647
4648 s->MinimumTWait = CalculateTWait(p->MaxPrefetchMode,
4649 p->UseMALLForPStateChange[k],
4650 p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
4651 p->DRRDisplay[k],
4652 p->DRAMClockChangeLatencyFinal,
4653 p->FCLKChangeLatency,
4654 p->UrgLatency,
4655 p->SREnterPlusExitTime);
4656
4657 PrefetchTime = (p->MaximumVStartup[j][k] - 1) * p->HTotal[k] / p->PixelClock[k] - s->MinimumTWait - p->UrgLatency * ((p->GPUVMMaxPageTableLevels <= 2 ? p->GPUVMMaxPageTableLevels : p->GPUVMMaxPageTableLevels - 2) * (p->HostVMEnable == true ? p->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - s->DynamicMetadataVMExtraLatency[k];
4658
4659 if (PrefetchTime > 0) {
4660 dml_float_t ExpectedVRatioPrefetch;
4661 ExpectedVRatioPrefetch = s->PrefetchPixelLinesTime[k] / (PrefetchTime * s->PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
4662 s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = s->NoOfDPPState[k] * s->PixelDCFCLKCyclesRequiredInPrefetch[k] / s->PrefetchPixelLinesTime[k] * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4);
4663 if (p->HostVMEnable == true || p->ImmediateFlipRequirement == true) {
4664 s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = s->DCFCLKRequiredForPeakBandwidthPerSurface[k] + s->NoOfDPPState[k] * s->DPTEBandwidth / s->NormalEfficiency / s->NormalEfficiency / p->ReturnBusWidth;
4665 }
4666 } else {
4667 s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = p->DCFCLKPerState;
4668 }
4669 if (p->DynamicMetadataEnable[k] == true) {
4670 dml_float_t TSetupPipe;
4671 dml_float_t TdmbfPipe;
4672 dml_float_t TdmsksPipe;
4673 dml_float_t TdmecPipe;
4674 dml_float_t AllowedTimeForUrgentExtraLatency;
4675
4676 CalculateVUpdateAndDynamicMetadataParameters(
4677 p->MaxInterDCNTileRepeaters,
4678 p->RequiredDPPCLKPerSurface[j][k],
4679 p->RequiredDISPCLK[j],
4680 p->ProjectedDCFCLKDeepSleep[j],
4681 p->PixelClock[k],
4682 p->HTotal[k],
4683 p->VTotal[k] - p->VActive[k],
4684 p->DynamicMetadataTransmittedBytes[k],
4685 p->DynamicMetadataLinesBeforeActiveRequired[k],
4686 p->Interlace[k],
4687 p->ProgressiveToInterlaceUnitInOPP,
4688
4689 // Output
4690 &TSetupPipe,
4691 &TdmbfPipe,
4692 &TdmecPipe,
4693 &TdmsksPipe,
4694 &s->dummy1,
4695 &s->dummy2,
4696 &s->dummy3);
4697
4698 AllowedTimeForUrgentExtraLatency = p->MaximumVStartup[j][k] * p->HTotal[k] / p->PixelClock[k] - s->MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe - TdmsksPipe - s->DynamicMetadataVMExtraLatency[k];
4699 if (AllowedTimeForUrgentExtraLatency > 0) {
4700 s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = dml_max(s->DCFCLKRequiredForPeakBandwidthPerSurface[k], s->ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
4701 } else {
4702 s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = p->DCFCLKPerState;
4703 }
4704 }
4705 }
4706 s->DCFCLKRequiredForPeakBandwidth = 0;
4707 for (dml_uint_t k = 0; k <= p->NumberOfActiveSurfaces - 1; ++k) {
4708 s->DCFCLKRequiredForPeakBandwidth = s->DCFCLKRequiredForPeakBandwidth + s->DCFCLKRequiredForPeakBandwidthPerSurface[k];
4709 }
4710 s->MinimumTvmPlus2Tr0 = p->UrgLatency * (p->GPUVMEnable == true ? (p->HostVMEnable == true ? (p->GPUVMMaxPageTableLevels + 2) * (p->HostVMMaxNonCachedPageTableLevels + 1) - 1 : p->GPUVMMaxPageTableLevels + 1) : 0);
4711 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
4712 dml_float_t MaximumTvmPlus2Tr0PlusTsw;
4713 MaximumTvmPlus2Tr0PlusTsw = (p->MaximumVStartup[j][k] - 2) * p->HTotal[k] / p->PixelClock[k] - s->MinimumTWait - s->DynamicMetadataVMExtraLatency[k];
4714 if (MaximumTvmPlus2Tr0PlusTsw <= s->MinimumTvmPlus2Tr0 + s->PrefetchPixelLinesTime[k] / 4) {
4715 s->DCFCLKRequiredForPeakBandwidth = p->DCFCLKPerState;
4716 } else {
4717 s->DCFCLKRequiredForPeakBandwidth = dml_max3(s->DCFCLKRequiredForPeakBandwidth,
4718 2 * s->ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - s->MinimumTvmPlus2Tr0 - s->PrefetchPixelLinesTime[k] / 4),
4719 (2 * s->ExtraLatencyCycles + s->PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - s->MinimumTvmPlus2Tr0));
4720 }
4721 }
4722 p->DCFCLKState[j] = dml_min(p->DCFCLKPerState, 1.05 * dml_max(s->DCFCLKRequiredForAverageBandwidth, s->DCFCLKRequiredForPeakBandwidth));
4723 }
4724 }
4725
4726
UnboundedRequest(enum dml_unbounded_requesting_policy UseUnboundedRequestingFinal,dml_uint_t TotalNumberOfActiveDPP,dml_bool_t NoChromaOrLinear,enum dml_output_encoder_class Output)4727 static dml_bool_t UnboundedRequest(enum dml_unbounded_requesting_policy UseUnboundedRequestingFinal,
4728 dml_uint_t TotalNumberOfActiveDPP,
4729 dml_bool_t NoChromaOrLinear,
4730 enum dml_output_encoder_class Output)
4731 {
4732 dml_bool_t ret_val = false;
4733
4734 ret_val = (UseUnboundedRequestingFinal != dml_unbounded_requesting_disable
4735 && TotalNumberOfActiveDPP == 1 && NoChromaOrLinear);
4736 if (UseUnboundedRequestingFinal == dml_unbounded_requesting_edp_only && Output != dml_edp) {
4737 ret_val = false;
4738 }
4739 return (ret_val);
4740 }
4741
CalculateSurfaceSizeInMall(dml_uint_t NumberOfActiveSurfaces,dml_uint_t MALLAllocatedForDCN,enum dml_use_mall_for_static_screen_mode UseMALLForStaticScreen[],dml_bool_t DCCEnable[],dml_bool_t ViewportStationary[],dml_uint_t ViewportXStartY[],dml_uint_t ViewportYStartY[],dml_uint_t ViewportXStartC[],dml_uint_t ViewportYStartC[],dml_uint_t ViewportWidthY[],dml_uint_t ViewportHeightY[],dml_uint_t BytesPerPixelY[],dml_uint_t ViewportWidthC[],dml_uint_t ViewportHeightC[],dml_uint_t BytesPerPixelC[],dml_uint_t SurfaceWidthY[],dml_uint_t SurfaceWidthC[],dml_uint_t SurfaceHeightY[],dml_uint_t SurfaceHeightC[],dml_uint_t Read256BytesBlockWidthY[],dml_uint_t Read256BytesBlockWidthC[],dml_uint_t Read256BytesBlockHeightY[],dml_uint_t Read256BytesBlockHeightC[],dml_uint_t ReadBlockWidthY[],dml_uint_t ReadBlockWidthC[],dml_uint_t ReadBlockHeightY[],dml_uint_t ReadBlockHeightC[],dml_uint_t SurfaceSizeInMALL[],dml_bool_t * ExceededMALLSize)4742 static void CalculateSurfaceSizeInMall(
4743 dml_uint_t NumberOfActiveSurfaces,
4744 dml_uint_t MALLAllocatedForDCN,
4745 enum dml_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
4746 dml_bool_t DCCEnable[],
4747 dml_bool_t ViewportStationary[],
4748 dml_uint_t ViewportXStartY[],
4749 dml_uint_t ViewportYStartY[],
4750 dml_uint_t ViewportXStartC[],
4751 dml_uint_t ViewportYStartC[],
4752 dml_uint_t ViewportWidthY[],
4753 dml_uint_t ViewportHeightY[],
4754 dml_uint_t BytesPerPixelY[],
4755 dml_uint_t ViewportWidthC[],
4756 dml_uint_t ViewportHeightC[],
4757 dml_uint_t BytesPerPixelC[],
4758 dml_uint_t SurfaceWidthY[],
4759 dml_uint_t SurfaceWidthC[],
4760 dml_uint_t SurfaceHeightY[],
4761 dml_uint_t SurfaceHeightC[],
4762 dml_uint_t Read256BytesBlockWidthY[],
4763 dml_uint_t Read256BytesBlockWidthC[],
4764 dml_uint_t Read256BytesBlockHeightY[],
4765 dml_uint_t Read256BytesBlockHeightC[],
4766 dml_uint_t ReadBlockWidthY[],
4767 dml_uint_t ReadBlockWidthC[],
4768 dml_uint_t ReadBlockHeightY[],
4769 dml_uint_t ReadBlockHeightC[],
4770
4771 // Output
4772 dml_uint_t SurfaceSizeInMALL[],
4773 dml_bool_t *ExceededMALLSize)
4774 {
4775 dml_uint_t TotalSurfaceSizeInMALL = 0;
4776
4777 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
4778 if (ViewportStationary[k]) {
4779 SurfaceSizeInMALL[k] = (dml_uint_t)(dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]), dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1, ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k], ReadBlockWidthY[k])) *
4780 dml_min(dml_ceil(SurfaceHeightY[k], ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] + ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) - dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) *
4781 BytesPerPixelY[k]);
4782
4783 if (ReadBlockWidthC[k] > 0) {
4784 SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
4785 dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]), dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) - dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
4786 dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]), dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) - dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) * BytesPerPixelC[k]);
4787 }
4788 if (DCCEnable[k] == true) {
4789 SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
4790 dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]), dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 * Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k]) - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k])) *
4791 dml_min(dml_ceil(SurfaceHeightY[k], 8 * Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] + ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 * Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256);
4792 if (Read256BytesBlockWidthC[k] > 0) {
4793 SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
4794 dml_min(dml_ceil(SurfaceWidthC[k], 8 * Read256BytesBlockWidthC[k]), dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8 * Read256BytesBlockWidthC[k] - 1, 8 * Read256BytesBlockWidthC[k]) - dml_floor(ViewportXStartC[k], 8 * Read256BytesBlockWidthC[k])) *
4795 dml_min(dml_ceil(SurfaceHeightC[k], 8 * Read256BytesBlockHeightC[k]), dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + 8 * Read256BytesBlockHeightC[k] - 1, 8 * Read256BytesBlockHeightC[k]) - dml_floor(ViewportYStartC[k], 8 * Read256BytesBlockHeightC[k])) * BytesPerPixelC[k] / 256);
4796 }
4797 }
4798 } else {
4799 SurfaceSizeInMALL[k] = (dml_uint_t)(dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) * dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * BytesPerPixelY[k]);
4800 if (ReadBlockWidthC[k] > 0) {
4801 SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
4802 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
4803 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * BytesPerPixelC[k]);
4804 }
4805 if (DCCEnable[k] == true) {
4806 SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
4807 dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 * Read256BytesBlockWidthY[k] - 1), 8 * Read256BytesBlockWidthY[k]) *
4808 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1), 8 * Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256);
4809
4810 if (Read256BytesBlockWidthC[k] > 0) {
4811 SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
4812 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 * Read256BytesBlockWidthC[k] - 1), 8 * Read256BytesBlockWidthC[k]) *
4813 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 * Read256BytesBlockHeightC[k] - 1), 8 * Read256BytesBlockHeightC[k]) * BytesPerPixelC[k] / 256);
4814 }
4815 }
4816 }
4817 }
4818
4819 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
4820 if (UseMALLForStaticScreen[k] == dml_use_mall_static_screen_enable)
4821 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
4822 }
4823 *ExceededMALLSize = (TotalSurfaceSizeInMALL > MALLAllocatedForDCN * 1024 * 1024);
4824 } // CalculateSurfaceSizeInMall
4825
CalculateDETBufferSize(dml_uint_t DETSizeOverride[],enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],dml_bool_t ForceSingleDPP,dml_uint_t NumberOfActiveSurfaces,dml_bool_t UnboundedRequestEnabled,dml_uint_t nomDETInKByte,dml_uint_t MaxTotalDETInKByte,dml_uint_t ConfigReturnBufferSizeInKByte,dml_uint_t MinCompressedBufferSizeInKByte,dml_uint_t ConfigReturnBufferSegmentSizeInkByte,dml_uint_t CompressedBufferSegmentSizeInkByteFinal,enum dml_source_format_class SourcePixelFormat[],dml_float_t ReadBandwidthLuma[],dml_float_t ReadBandwidthChroma[],dml_uint_t RoundedUpMaxSwathSizeBytesY[],dml_uint_t RoundedUpMaxSwathSizeBytesC[],dml_uint_t DPPPerSurface[],dml_uint_t DETBufferSizeInKByte[],dml_uint_t * CompressedBufferSizeInkByte)4826 static void CalculateDETBufferSize(
4827 dml_uint_t DETSizeOverride[],
4828 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
4829 dml_bool_t ForceSingleDPP,
4830 dml_uint_t NumberOfActiveSurfaces,
4831 dml_bool_t UnboundedRequestEnabled,
4832 dml_uint_t nomDETInKByte,
4833 dml_uint_t MaxTotalDETInKByte,
4834 dml_uint_t ConfigReturnBufferSizeInKByte,
4835 dml_uint_t MinCompressedBufferSizeInKByte,
4836 dml_uint_t ConfigReturnBufferSegmentSizeInkByte,
4837 dml_uint_t CompressedBufferSegmentSizeInkByteFinal,
4838 enum dml_source_format_class SourcePixelFormat[],
4839 dml_float_t ReadBandwidthLuma[],
4840 dml_float_t ReadBandwidthChroma[],
4841 dml_uint_t RoundedUpMaxSwathSizeBytesY[],
4842 dml_uint_t RoundedUpMaxSwathSizeBytesC[],
4843 dml_uint_t DPPPerSurface[],
4844 // Output
4845 dml_uint_t DETBufferSizeInKByte[],
4846 dml_uint_t *CompressedBufferSizeInkByte)
4847 {
4848 dml_uint_t DETBufferSizePoolInKByte;
4849 dml_uint_t NextDETBufferPieceInKByte;
4850 dml_bool_t DETPieceAssignedToThisSurfaceAlready[__DML_NUM_PLANES__];
4851 dml_bool_t NextPotentialSurfaceToAssignDETPieceFound;
4852 dml_uint_t NextSurfaceToAssignDETPiece;
4853 dml_float_t TotalBandwidth;
4854 dml_float_t BandwidthOfSurfacesNotAssignedDETPiece;
4855 dml_uint_t max_minDET;
4856 dml_uint_t minDET;
4857 dml_uint_t minDET_pipe;
4858
4859 #ifdef __DML_VBA_DEBUG__
4860 dml_print("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
4861 dml_print("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
4862 dml_print("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
4863 dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, UnboundedRequestEnabled);
4864 dml_print("DML::%s: MaxTotalDETInKByte = %u\n", __func__, MaxTotalDETInKByte);
4865 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
4866 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, MinCompressedBufferSizeInKByte);
4867 dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %u\n", __func__, CompressedBufferSegmentSizeInkByteFinal);
4868 #endif
4869
4870 // Note: Will use default det size if that fits 2 swaths
4871 if (UnboundedRequestEnabled) {
4872 if (DETSizeOverride[0] > 0) {
4873 DETBufferSizeInKByte[0] = DETSizeOverride[0];
4874 } else {
4875 DETBufferSizeInKByte[0] = (dml_uint_t) dml_max(128.0, dml_ceil(2.0 * ((dml_float_t) RoundedUpMaxSwathSizeBytesY[0] + (dml_float_t) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte));
4876 }
4877 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
4878 } else {
4879 DETBufferSizePoolInKByte = MaxTotalDETInKByte;
4880 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
4881 DETBufferSizeInKByte[k] = 0;
4882 if (SourcePixelFormat[k] == dml_420_8 || SourcePixelFormat[k] == dml_420_10 || SourcePixelFormat[k] == dml_420_12) {
4883 max_minDET = nomDETInKByte - ConfigReturnBufferSegmentSizeInkByte;
4884 } else {
4885 max_minDET = nomDETInKByte;
4886 }
4887 minDET = 128;
4888 minDET_pipe = 0;
4889
4890 // add DET resource until can hold 2 full swaths
4891 while (minDET <= max_minDET && minDET_pipe == 0) {
4892 if (2.0 * ((dml_float_t) RoundedUpMaxSwathSizeBytesY[k] + (dml_float_t) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
4893 minDET_pipe = minDET;
4894 minDET = minDET + ConfigReturnBufferSegmentSizeInkByte;
4895 }
4896
4897 #ifdef __DML_VBA_DEBUG__
4898 dml_print("DML::%s: k=%u minDET = %u\n", __func__, k, minDET);
4899 dml_print("DML::%s: k=%u max_minDET = %u\n", __func__, k, max_minDET);
4900 dml_print("DML::%s: k=%u minDET_pipe = %u\n", __func__, k, minDET_pipe);
4901 dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesY = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesY[k]);
4902 dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesC = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesC[k]);
4903 #endif
4904
4905 if (minDET_pipe == 0) {
4906 minDET_pipe = (dml_uint_t)(dml_max(128, dml_ceil(((dml_float_t)RoundedUpMaxSwathSizeBytesY[k] + (dml_float_t)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte)));
4907 #ifdef __DML_VBA_DEBUG__
4908 dml_print("DML::%s: k=%u minDET_pipe = %u (assume each plane take half DET)\n", __func__, k, minDET_pipe);
4909 #endif
4910 }
4911
4912 if (UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) {
4913 DETBufferSizeInKByte[k] = 0;
4914 } else if (DETSizeOverride[k] > 0) {
4915 DETBufferSizeInKByte[k] = DETSizeOverride[k];
4916 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
4917 } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
4918 DETBufferSizeInKByte[k] = minDET_pipe;
4919 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
4920 }
4921
4922 #ifdef __DML_VBA_DEBUG__
4923 dml_print("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]);
4924 dml_print("DML::%s: k=%u DETSizeOverride = %u\n", __func__, k, DETSizeOverride[k]);
4925 dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]);
4926 dml_print("DML::%s: DETBufferSizePoolInKByte = %u\n", __func__, DETBufferSizePoolInKByte);
4927 #endif
4928 }
4929
4930 TotalBandwidth = 0;
4931 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
4932 if (UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe)
4933 TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
4934 }
4935 #ifdef __DML_VBA_DEBUG__
4936 dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
4937 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
4938 dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]);
4939 }
4940 dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
4941 #endif
4942 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
4943 BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
4944 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
4945
4946 if (UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) {
4947 DETPieceAssignedToThisSurfaceAlready[k] = true;
4948 } else if (DETSizeOverride[k] > 0 || (((dml_float_t) (ForceSingleDPP ? 1 : DPPPerSurface[k]) * (dml_float_t) DETBufferSizeInKByte[k] / (dml_float_t) MaxTotalDETInKByte) >= ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
4949 DETPieceAssignedToThisSurfaceAlready[k] = true;
4950 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
4951 } else {
4952 DETPieceAssignedToThisSurfaceAlready[k] = false;
4953 }
4954 #ifdef __DML_VBA_DEBUG__
4955 dml_print("DML::%s: k=%u DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, k, DETPieceAssignedToThisSurfaceAlready[k]);
4956 dml_print("DML::%s: k=%u BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, BandwidthOfSurfacesNotAssignedDETPiece);
4957 #endif
4958 }
4959
4960 for (dml_uint_t j = 0; j < NumberOfActiveSurfaces; ++j) {
4961 NextPotentialSurfaceToAssignDETPieceFound = false;
4962 NextSurfaceToAssignDETPiece = 0;
4963
4964 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
4965 #ifdef __DML_VBA_DEBUG__
4966 dml_print("DML::%s: j=%u k=%u, ReadBandwidthLuma[k] = %f\n", __func__, j, k, ReadBandwidthLuma[k]);
4967 dml_print("DML::%s: j=%u k=%u, ReadBandwidthChroma[k] = %f\n", __func__, j, k, ReadBandwidthChroma[k]);
4968 dml_print("DML::%s: j=%u k=%u, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
4969 dml_print("DML::%s: j=%u k=%u, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
4970 dml_print("DML::%s: j=%u k=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, k, NextSurfaceToAssignDETPiece);
4971 #endif
4972 if (!DETPieceAssignedToThisSurfaceAlready[k] && (!NextPotentialSurfaceToAssignDETPieceFound ||
4973 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
4974 NextSurfaceToAssignDETPiece = k;
4975 NextPotentialSurfaceToAssignDETPieceFound = true;
4976 }
4977 #ifdef __DML_VBA_DEBUG__
4978 dml_print("DML::%s: j=%u k=%u, DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
4979 dml_print("DML::%s: j=%u k=%u, NextPotentialSurfaceToAssignDETPieceFound = %u\n", __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
4980 #endif
4981 }
4982
4983 if (NextPotentialSurfaceToAssignDETPieceFound) {
4984 // Note: To show the banker's rounding behavior in VBA and also the fact that the DET buffer size varies due to precision issue
4985 //
4986 //dml_float_t tmp1 = ((dml_float_t) DETBufferSizePoolInKByte * (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / BandwidthOfSurfacesNotAssignedDETPiece /
4987 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
4988 //dml_float_t tmp2 = dml_round((dml_float_t) DETBufferSizePoolInKByte * (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / BandwidthOfSurfacesNotAssignedDETPiece /
4989 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
4990 //
4991 //dml_print("DML::%s: j=%u, tmp1 = %f\n", __func__, j, tmp1);
4992 //dml_print("DML::%s: j=%u, tmp2 = %f\n", __func__, j, tmp2);
4993
4994 NextDETBufferPieceInKByte = (dml_uint_t)(dml_min(
4995 dml_round((dml_float_t) DETBufferSizePoolInKByte * (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / BandwidthOfSurfacesNotAssignedDETPiece /
4996 ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte), true)
4997 * (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte,
4998 dml_floor((dml_float_t) DETBufferSizePoolInKByte, (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte)));
4999
5000 #ifdef __DML_VBA_DEBUG__
5001 dml_print("DML::%s: j=%u, DETBufferSizePoolInKByte = %u\n", __func__, j, DETBufferSizePoolInKByte);
5002 dml_print("DML::%s: j=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, NextSurfaceToAssignDETPiece);
5003 dml_print("DML::%s: j=%u, ReadBandwidthLuma[%u] = %f\n", __func__, j, NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
5004 dml_print("DML::%s: j=%u, ReadBandwidthChroma[%u] = %f\n", __func__, j, NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
5005 dml_print("DML::%s: j=%u, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
5006 dml_print("DML::%s: j=%u, NextDETBufferPieceInKByte = %u\n", __func__, j, NextDETBufferPieceInKByte);
5007 dml_print("DML::%s: j=%u, DETBufferSizeInKByte[%u] increases from %u ", __func__, j, NextSurfaceToAssignDETPiece, DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
5008 #endif
5009
5010 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] = DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] + NextDETBufferPieceInKByte / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
5011 #ifdef __DML_VBA_DEBUG__
5012 dml_print("to %u\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
5013 #endif
5014
5015 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
5016 DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
5017 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
5018 }
5019 }
5020 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
5021 }
5022 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / ConfigReturnBufferSegmentSizeInkByte;
5023
5024 #ifdef __DML_VBA_DEBUG__
5025 dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
5026 dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *CompressedBufferSizeInkByte);
5027 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
5028 dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u (TotalReadBandWidth=%f)\n", __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
5029 }
5030 #endif
5031 } // CalculateDETBufferSize
5032
5033
5034 /// @brief Calculate the bound for return buffer sizing
CalculateMaxDETAndMinCompressedBufferSize(dml_uint_t ConfigReturnBufferSizeInKByte,dml_uint_t ConfigReturnBufferSegmentSizeInKByte,dml_uint_t ROBBufferSizeInKByte,dml_uint_t MaxNumDPP,dml_bool_t nomDETInKByteOverrideEnable,dml_uint_t nomDETInKByteOverrideValue,dml_uint_t * MaxTotalDETInKByte,dml_uint_t * nomDETInKByte,dml_uint_t * MinCompressedBufferSizeInKByte)5035 static void CalculateMaxDETAndMinCompressedBufferSize(
5036 dml_uint_t ConfigReturnBufferSizeInKByte,
5037 dml_uint_t ConfigReturnBufferSegmentSizeInKByte,
5038 dml_uint_t ROBBufferSizeInKByte,
5039 dml_uint_t MaxNumDPP,
5040 dml_bool_t nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
5041 dml_uint_t nomDETInKByteOverrideValue, // VBA_DELTA
5042
5043 // Output
5044 dml_uint_t *MaxTotalDETInKByte,
5045 dml_uint_t *nomDETInKByte,
5046 dml_uint_t *MinCompressedBufferSizeInKByte)
5047 {
5048 (void)ROBBufferSizeInKByte;
5049 *MaxTotalDETInKByte = ConfigReturnBufferSizeInKByte - ConfigReturnBufferSegmentSizeInKByte;
5050 *nomDETInKByte = (dml_uint_t)(dml_floor((dml_float_t) *MaxTotalDETInKByte / (dml_float_t) MaxNumDPP, ConfigReturnBufferSegmentSizeInKByte));
5051 *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
5052
5053 #ifdef __DML_VBA_DEBUG__
5054 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
5055 dml_print("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, ROBBufferSizeInKByte);
5056 dml_print("DML::%s: MaxNumDPP = %u\n", __func__, MaxNumDPP);
5057 dml_print("DML::%s: MaxTotalDETInKByte = %u\n", __func__, *MaxTotalDETInKByte);
5058 dml_print("DML::%s: nomDETInKByte = %u\n", __func__, *nomDETInKByte);
5059 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, *MinCompressedBufferSizeInKByte);
5060 #endif
5061
5062 if (nomDETInKByteOverrideEnable) {
5063 *nomDETInKByte = nomDETInKByteOverrideValue;
5064 dml_print("DML::%s: nomDETInKByte = %u (overrided)\n", __func__, *nomDETInKByte);
5065 }
5066 } // CalculateMaxDETAndMinCompressedBufferSize
5067
5068 /// @brief Calculate all the RQ request attributes, like row height and # swath
CalculateVMRowAndSwath(struct display_mode_lib_scratch_st * scratch,struct CalculateVMRowAndSwath_params_st * p)5069 static void CalculateVMRowAndSwath(struct display_mode_lib_scratch_st *scratch,
5070 struct CalculateVMRowAndSwath_params_st *p)
5071 {
5072 struct CalculateVMRowAndSwath_locals_st *s = &scratch->CalculateVMRowAndSwath_locals;
5073
5074 s->HostVMDynamicLevels = CalculateHostVMDynamicLevels(p->GPUVMEnable, p->HostVMEnable, p->HostVMMinPageSize, p->HostVMMaxNonCachedPageTableLevels);
5075
5076 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
5077 if (p->HostVMEnable == true) {
5078 p->vm_group_bytes[k] = 512;
5079 p->dpte_group_bytes[k] = 512;
5080 } else if (p->GPUVMEnable == true) {
5081 p->vm_group_bytes[k] = 2048;
5082 if (p->GPUVMMinPageSizeKBytes[k] >= 64 && dml_is_vertical_rotation(p->myPipe[k].SourceScan)) {
5083 p->dpte_group_bytes[k] = 512;
5084 } else {
5085 p->dpte_group_bytes[k] = 2048;
5086 }
5087 } else {
5088 p->vm_group_bytes[k] = 0;
5089 p->dpte_group_bytes[k] = 0;
5090 }
5091
5092 if (p->myPipe[k].SourcePixelFormat == dml_420_8 || p->myPipe[k].SourcePixelFormat == dml_420_10 ||
5093 p->myPipe[k].SourcePixelFormat == dml_420_12 || p->myPipe[k].SourcePixelFormat == dml_rgbe_alpha) {
5094 if ((p->myPipe[k].SourcePixelFormat == dml_420_10 || p->myPipe[k].SourcePixelFormat == dml_420_12) && !dml_is_vertical_rotation(p->myPipe[k].SourceScan)) {
5095 s->PTEBufferSizeInRequestsForLuma[k] = (p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma) / 2;
5096 s->PTEBufferSizeInRequestsForChroma[k] = s->PTEBufferSizeInRequestsForLuma[k];
5097 } else {
5098 s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma;
5099 s->PTEBufferSizeInRequestsForChroma[k] = p->PTEBufferSizeInRequestsChroma;
5100 }
5101
5102 s->PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
5103 p->myPipe[k].ViewportStationary,
5104 p->myPipe[k].DCCEnable,
5105 p->myPipe[k].DPPPerSurface,
5106 p->myPipe[k].BlockHeight256BytesC,
5107 p->myPipe[k].BlockWidth256BytesC,
5108 p->myPipe[k].SourcePixelFormat,
5109 p->myPipe[k].SurfaceTiling,
5110 p->myPipe[k].BytePerPixelC,
5111 p->myPipe[k].SourceScan,
5112 p->SwathWidthC[k],
5113 p->myPipe[k].ViewportHeightChroma,
5114 p->myPipe[k].ViewportXStartC,
5115 p->myPipe[k].ViewportYStartC,
5116 p->GPUVMEnable,
5117 p->GPUVMMaxPageTableLevels,
5118 p->GPUVMMinPageSizeKBytes[k],
5119 s->PTEBufferSizeInRequestsForChroma[k],
5120 p->myPipe[k].PitchC,
5121 p->myPipe[k].DCCMetaPitchC,
5122 p->myPipe[k].BlockWidthC,
5123 p->myPipe[k].BlockHeightC,
5124
5125 // Output
5126 &s->MetaRowByteC[k],
5127 &s->PixelPTEBytesPerRowC[k],
5128 &s->PixelPTEBytesPerRowStorageC[k],
5129 &p->dpte_row_width_chroma_ub[k],
5130 &p->dpte_row_height_chroma[k],
5131 &p->dpte_row_height_linear_chroma[k],
5132 &s->PixelPTEBytesPerRowC_one_row_per_frame[k],
5133 &s->dpte_row_width_chroma_ub_one_row_per_frame[k],
5134 &s->dpte_row_height_chroma_one_row_per_frame[k],
5135 &p->meta_req_width_chroma[k],
5136 &p->meta_req_height_chroma[k],
5137 &p->meta_row_width_chroma[k],
5138 &p->meta_row_height_chroma[k],
5139 &p->PixelPTEReqWidthC[k],
5140 &p->PixelPTEReqHeightC[k],
5141 &p->PTERequestSizeC[k],
5142 &p->dpde0_bytes_per_frame_ub_c[k],
5143 &p->meta_pte_bytes_per_frame_ub_c[k]);
5144
5145 p->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines (
5146 p->myPipe[k].VRatioChroma,
5147 p->myPipe[k].VTapsChroma,
5148 p->myPipe[k].InterlaceEnable,
5149 p->myPipe[k].ProgressiveToInterlaceUnitInOPP,
5150 p->myPipe[k].SwathHeightC,
5151 p->myPipe[k].SourceScan,
5152 p->myPipe[k].ViewportStationary,
5153 p->SwathWidthC[k],
5154 p->myPipe[k].ViewportHeightChroma,
5155 p->myPipe[k].ViewportXStartC,
5156 p->myPipe[k].ViewportYStartC,
5157
5158 // Output
5159 &p->VInitPreFillC[k],
5160 &p->MaxNumSwathC[k]);
5161 } else {
5162 s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma;
5163 s->PTEBufferSizeInRequestsForChroma[k] = 0;
5164 s->PixelPTEBytesPerRowC[k] = 0;
5165 s->PixelPTEBytesPerRowStorageC[k] = 0;
5166 s->PDEAndMetaPTEBytesFrameC = 0;
5167 s->MetaRowByteC[k] = 0;
5168 p->MaxNumSwathC[k] = 0;
5169 p->PrefetchSourceLinesC[k] = 0;
5170 s->dpte_row_height_chroma_one_row_per_frame[k] = 0;
5171 s->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
5172 s->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
5173 }
5174
5175 s->PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
5176 p->myPipe[k].ViewportStationary,
5177 p->myPipe[k].DCCEnable,
5178 p->myPipe[k].DPPPerSurface,
5179 p->myPipe[k].BlockHeight256BytesY,
5180 p->myPipe[k].BlockWidth256BytesY,
5181 p->myPipe[k].SourcePixelFormat,
5182 p->myPipe[k].SurfaceTiling,
5183 p->myPipe[k].BytePerPixelY,
5184 p->myPipe[k].SourceScan,
5185 p->SwathWidthY[k],
5186 p->myPipe[k].ViewportHeight,
5187 p->myPipe[k].ViewportXStart,
5188 p->myPipe[k].ViewportYStart,
5189 p->GPUVMEnable,
5190 p->GPUVMMaxPageTableLevels,
5191 p->GPUVMMinPageSizeKBytes[k],
5192 s->PTEBufferSizeInRequestsForLuma[k],
5193 p->myPipe[k].PitchY,
5194 p->myPipe[k].DCCMetaPitchY,
5195 p->myPipe[k].BlockWidthY,
5196 p->myPipe[k].BlockHeightY,
5197
5198 // Output
5199 &s->MetaRowByteY[k],
5200 &s->PixelPTEBytesPerRowY[k],
5201 &s->PixelPTEBytesPerRowStorageY[k],
5202 &p->dpte_row_width_luma_ub[k],
5203 &p->dpte_row_height_luma[k],
5204 &p->dpte_row_height_linear_luma[k],
5205 &s->PixelPTEBytesPerRowY_one_row_per_frame[k],
5206 &s->dpte_row_width_luma_ub_one_row_per_frame[k],
5207 &s->dpte_row_height_luma_one_row_per_frame[k],
5208 &p->meta_req_width[k],
5209 &p->meta_req_height[k],
5210 &p->meta_row_width[k],
5211 &p->meta_row_height[k],
5212 &p->PixelPTEReqWidthY[k],
5213 &p->PixelPTEReqHeightY[k],
5214 &p->PTERequestSizeY[k],
5215 &p->dpde0_bytes_per_frame_ub_l[k],
5216 &p->meta_pte_bytes_per_frame_ub_l[k]);
5217
5218 p->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
5219 p->myPipe[k].VRatio,
5220 p->myPipe[k].VTaps,
5221 p->myPipe[k].InterlaceEnable,
5222 p->myPipe[k].ProgressiveToInterlaceUnitInOPP,
5223 p->myPipe[k].SwathHeightY,
5224 p->myPipe[k].SourceScan,
5225 p->myPipe[k].ViewportStationary,
5226 p->SwathWidthY[k],
5227 p->myPipe[k].ViewportHeight,
5228 p->myPipe[k].ViewportXStart,
5229 p->myPipe[k].ViewportYStart,
5230
5231 // Output
5232 &p->VInitPreFillY[k],
5233 &p->MaxNumSwathY[k]);
5234
5235 p->PDEAndMetaPTEBytesFrame[k] = (s->PDEAndMetaPTEBytesFrameY + s->PDEAndMetaPTEBytesFrameC) * (1 + 8 * s->HostVMDynamicLevels);
5236 p->MetaRowByte[k] = s->MetaRowByteY[k] + s->MetaRowByteC[k];
5237
5238 if (s->PixelPTEBytesPerRowStorageY[k] <= 64 * s->PTEBufferSizeInRequestsForLuma[k] && s->PixelPTEBytesPerRowStorageC[k] <= 64 * s->PTEBufferSizeInRequestsForChroma[k]) {
5239 p->PTEBufferSizeNotExceeded[k] = true;
5240 } else {
5241 p->PTEBufferSizeNotExceeded[k] = false;
5242 #ifdef __DML_VBA_DEBUG__
5243 dml_print("DML::%s: k=%u, PixelPTEBytesPerRowY = %u\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
5244 dml_print("DML::%s: k=%u, PixelPTEBytesPerRowC = %u\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
5245 dml_print("DML::%s: k=%u, PixelPTEBytesPerRowStorageY = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageY[k]);
5246 dml_print("DML::%s: k=%u, PixelPTEBytesPerRowStorageC = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageC[k]);
5247 dml_print("DML::%s: k=%u, PTEBufferSizeInRequestsForLuma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForLuma[k]);
5248 dml_print("DML::%s: k=%u, PTEBufferSizeInRequestsForChroma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForChroma[k]);
5249 dml_print("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
5250 #endif
5251 }
5252 s->one_row_per_frame_fits_in_buffer[k] = (s->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForLuma[k] &&
5253 s->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForChroma[k]);
5254
5255 #ifdef __DML_VBA_DEBUG__
5256 dml_print("DML::%s: k=%u, PDEAndMetaPTEBytesFrame = %u\n", __func__, k, p->PDEAndMetaPTEBytesFrame[k]);
5257 dml_print("DML::%s: k=%u, PDEAndMetaPTEBytesFrameY = %u\n", __func__, k, s->PDEAndMetaPTEBytesFrameY);
5258 dml_print("DML::%s: k=%u, PDEAndMetaPTEBytesFrameC = %u\n", __func__, k, s->PDEAndMetaPTEBytesFrameC);
5259 dml_print("DML::%s: k=%u, HostVMDynamicLevels = %u\n", __func__, k, s->HostVMDynamicLevels);
5260 dml_print("DML::%s: k=%u, one_row_per_frame_fits_in_buffer = %u\n", __func__, k, s->one_row_per_frame_fits_in_buffer[k]);
5261 dml_print("DML::%s: k=%u, PixelPTEBytesPerRowY_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowY_one_row_per_frame[k]);
5262 dml_print("DML::%s: k=%u, PixelPTEBytesPerRowC_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowC_one_row_per_frame[k]);
5263 #endif
5264 }
5265
5266 CalculateMALLUseForStaticScreen(
5267 p->NumberOfActiveSurfaces,
5268 p->MALLAllocatedForDCN,
5269 p->UseMALLForStaticScreen, // mode
5270 p->SurfaceSizeInMALL,
5271 s->one_row_per_frame_fits_in_buffer,
5272 // Output
5273 p->UsesMALLForStaticScreen); // boolen
5274
5275 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
5276 if (p->PTEBufferModeOverrideEn[k] == 1) {
5277 p->PTE_BUFFER_MODE[k] = p->PTEBufferModeOverrideVal[k];
5278 }
5279 p->PTE_BUFFER_MODE[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->UsesMALLForStaticScreen[k] || (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport) ||
5280 (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) || (p->GPUVMMinPageSizeKBytes[k] > 64);
5281 p->BIGK_FRAGMENT_SIZE[k] = (dml_uint_t)(dml_log2(p->GPUVMMinPageSizeKBytes[k] * 1024) - 12);
5282 }
5283
5284 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
5285 #ifdef __DML_VBA_DEBUG__
5286 dml_print("DML::%s: k=%u, SurfaceSizeInMALL = %u\n", __func__, k, p->SurfaceSizeInMALL[k]);
5287 dml_print("DML::%s: k=%u, UsesMALLForStaticScreen = %u\n", __func__, k, p->UsesMALLForStaticScreen[k]);
5288 #endif
5289 p->use_one_row_for_frame[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->UsesMALLForStaticScreen[k] || (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport) ||
5290 (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) || (p->GPUVMMinPageSizeKBytes[k] > 64 && dml_is_vertical_rotation(p->myPipe[k].SourceScan));
5291
5292 p->use_one_row_for_frame_flip[k] = p->use_one_row_for_frame[k] && !(p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame);
5293
5294 if (p->use_one_row_for_frame[k]) {
5295 p->dpte_row_height_luma[k] = s->dpte_row_height_luma_one_row_per_frame[k];
5296 p->dpte_row_width_luma_ub[k] = s->dpte_row_width_luma_ub_one_row_per_frame[k];
5297 s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY_one_row_per_frame[k];
5298 p->dpte_row_height_chroma[k] = s->dpte_row_height_chroma_one_row_per_frame[k];
5299 p->dpte_row_width_chroma_ub[k] = s->dpte_row_width_chroma_ub_one_row_per_frame[k];
5300 s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC_one_row_per_frame[k];
5301 p->PTEBufferSizeNotExceeded[k] = s->one_row_per_frame_fits_in_buffer[k];
5302 }
5303
5304 if (p->MetaRowByte[k] <= p->DCCMetaBufferSizeBytes) {
5305 p->DCCMetaBufferSizeNotExceeded[k] = true;
5306 } else {
5307 p->DCCMetaBufferSizeNotExceeded[k] = false;
5308
5309 #ifdef __DML_VBA_DEBUG__
5310 dml_print("DML::%s: k=%u, MetaRowByte = %u\n", __func__, k, p->MetaRowByte[k]);
5311 dml_print("DML::%s: k=%u, DCCMetaBufferSizeBytes = %u\n", __func__, k, p->DCCMetaBufferSizeBytes);
5312 dml_print("DML::%s: k=%u, DCCMetaBufferSizeNotExceeded = %u\n", __func__, k, p->DCCMetaBufferSizeNotExceeded[k]);
5313 #endif
5314 }
5315 s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY[k] * (1 + 8 * s->HostVMDynamicLevels);
5316 s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC[k] * (1 + 8 * s->HostVMDynamicLevels);
5317 p->PixelPTEBytesPerRow[k] = s->PixelPTEBytesPerRowY[k] + s->PixelPTEBytesPerRowC[k];
5318 if (p->use_one_row_for_frame[k])
5319 p->PixelPTEBytesPerRow[k] = p->PixelPTEBytesPerRow[k] / 2;
5320
5321 CalculateRowBandwidth(
5322 p->GPUVMEnable,
5323 p->myPipe[k].SourcePixelFormat,
5324 p->myPipe[k].VRatio,
5325 p->myPipe[k].VRatioChroma,
5326 p->myPipe[k].DCCEnable,
5327 p->myPipe[k].HTotal / p->myPipe[k].PixelClock,
5328 s->MetaRowByteY[k],
5329 s->MetaRowByteC[k],
5330 p->meta_row_height[k],
5331 p->meta_row_height_chroma[k],
5332 s->PixelPTEBytesPerRowY[k],
5333 s->PixelPTEBytesPerRowC[k],
5334 p->dpte_row_height_luma[k],
5335 p->dpte_row_height_chroma[k],
5336
5337 // Output
5338 &p->meta_row_bw[k],
5339 &p->dpte_row_bw[k]);
5340 #ifdef __DML_VBA_DEBUG__
5341 dml_print("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]);
5342 dml_print("DML::%s: k=%u, use_one_row_for_frame_flip = %u\n", __func__, k, p->use_one_row_for_frame_flip[k]);
5343 dml_print("DML::%s: k=%u, UseMALLForPStateChange = %u\n", __func__, k, p->UseMALLForPStateChange[k]);
5344 dml_print("DML::%s: k=%u, dpte_row_height_luma = %u\n", __func__, k, p->dpte_row_height_luma[k]);
5345 dml_print("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]);
5346 dml_print("DML::%s: k=%u, PixelPTEBytesPerRowY = %u\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
5347 dml_print("DML::%s: k=%u, dpte_row_height_chroma = %u\n", __func__, k, p->dpte_row_height_chroma[k]);
5348 dml_print("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]);
5349 dml_print("DML::%s: k=%u, PixelPTEBytesPerRowC = %u\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
5350 dml_print("DML::%s: k=%u, PixelPTEBytesPerRow = %u\n", __func__, k, p->PixelPTEBytesPerRow[k]);
5351 dml_print("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
5352 dml_print("DML::%s: k=%u, PTE_BUFFER_MODE = %u\n", __func__, k, p->PTE_BUFFER_MODE[k]);
5353 dml_print("DML::%s: k=%u, BIGK_FRAGMENT_SIZE = %u\n", __func__, k, p->BIGK_FRAGMENT_SIZE[k]);
5354 #endif
5355 }
5356 }
5357
CalculateOutputLink(dml_float_t PHYCLKPerState,dml_float_t PHYCLKD18PerState,dml_float_t PHYCLKD32PerState,dml_float_t Downspreading,dml_bool_t IsMainSurfaceUsingTheIndicatedTiming,enum dml_output_encoder_class Output,enum dml_output_format_class OutputFormat,dml_uint_t HTotal,dml_uint_t HActive,dml_float_t PixelClockBackEnd,dml_float_t ForcedOutputLinkBPP,dml_uint_t DSCInputBitPerComponent,dml_uint_t NumberOfDSCSlices,dml_float_t AudioSampleRate,dml_uint_t AudioSampleLayout,enum dml_odm_mode ODMModeNoDSC,enum dml_odm_mode ODMModeDSC,enum dml_dsc_enable DSCEnable,dml_uint_t OutputLinkDPLanes,enum dml_output_link_dp_rate OutputLinkDPRate,dml_bool_t * RequiresDSC,dml_bool_t * RequiresFEC,dml_float_t * OutBpp,enum dml_output_type_and_rate__type * OutputType,enum dml_output_type_and_rate__rate * OutputRate,dml_uint_t * RequiredSlots)5358 static void CalculateOutputLink(
5359 dml_float_t PHYCLKPerState,
5360 dml_float_t PHYCLKD18PerState,
5361 dml_float_t PHYCLKD32PerState,
5362 dml_float_t Downspreading,
5363 dml_bool_t IsMainSurfaceUsingTheIndicatedTiming,
5364 enum dml_output_encoder_class Output,
5365 enum dml_output_format_class OutputFormat,
5366 dml_uint_t HTotal,
5367 dml_uint_t HActive,
5368 dml_float_t PixelClockBackEnd,
5369 dml_float_t ForcedOutputLinkBPP,
5370 dml_uint_t DSCInputBitPerComponent,
5371 dml_uint_t NumberOfDSCSlices,
5372 dml_float_t AudioSampleRate,
5373 dml_uint_t AudioSampleLayout,
5374 enum dml_odm_mode ODMModeNoDSC,
5375 enum dml_odm_mode ODMModeDSC,
5376 enum dml_dsc_enable DSCEnable,
5377 dml_uint_t OutputLinkDPLanes,
5378 enum dml_output_link_dp_rate OutputLinkDPRate,
5379
5380 // Output
5381 dml_bool_t *RequiresDSC,
5382 dml_bool_t *RequiresFEC,
5383 dml_float_t *OutBpp,
5384 enum dml_output_type_and_rate__type *OutputType,
5385 enum dml_output_type_and_rate__rate *OutputRate,
5386 dml_uint_t *RequiredSlots)
5387 {
5388 dml_bool_t LinkDSCEnable;
5389 dml_uint_t dummy;
5390 *RequiresDSC = false;
5391 *RequiresFEC = false;
5392 *OutBpp = 0;
5393
5394 *OutputType = dml_output_type_unknown;
5395 *OutputRate = dml_output_rate_unknown;
5396
5397 if (IsMainSurfaceUsingTheIndicatedTiming) {
5398 if (Output == dml_hdmi) {
5399 *RequiresDSC = false;
5400 *RequiresFEC = false;
5401 *OutBpp = TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, false, Output,
5402 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
5403 //OutputTypeAndRate = "HDMI";
5404 *OutputType = dml_output_type_hdmi;
5405
5406 } else if (Output == dml_dp || Output == dml_dp2p0 || Output == dml_edp) {
5407 if (DSCEnable == dml_dsc_enable) {
5408 *RequiresDSC = true;
5409 LinkDSCEnable = true;
5410 if (Output == dml_dp || Output == dml_dp2p0) {
5411 *RequiresFEC = true;
5412 } else {
5413 *RequiresFEC = false;
5414 }
5415 } else {
5416 *RequiresDSC = false;
5417 LinkDSCEnable = false;
5418 if (Output == dml_dp2p0) {
5419 *RequiresFEC = true;
5420 } else {
5421 *RequiresFEC = false;
5422 }
5423 }
5424 if (Output == dml_dp2p0) {
5425 *OutBpp = 0;
5426 if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_uhbr10) && PHYCLKD32PerState >= 10000 / 32.0) {
5427 *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
5428 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5429 if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32.0 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
5430 *RequiresDSC = true;
5431 LinkDSCEnable = true;
5432 *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
5433 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5434 }
5435 //OutputTypeAndRate = Output & " UHBR10";
5436 *OutputType = dml_output_type_dp2p0;
5437 *OutputRate = dml_output_rate_dp_rate_uhbr10;
5438 }
5439 if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_uhbr13p5) && *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32.0) {
5440 *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
5441 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5442
5443 if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
5444 *RequiresDSC = true;
5445 LinkDSCEnable = true;
5446 *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
5447 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5448 }
5449 //OutputTypeAndRate = Output & " UHBR13p5";
5450 *OutputType = dml_output_type_dp2p0;
5451 *OutputRate = dml_output_rate_dp_rate_uhbr13p5;
5452 }
5453 if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_uhbr20) && *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
5454 *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
5455 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5456 if (*OutBpp == 0 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
5457 *RequiresDSC = true;
5458 LinkDSCEnable = true;
5459 *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
5460 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5461 }
5462 //OutputTypeAndRate = Output & " UHBR20";
5463 *OutputType = dml_output_type_dp2p0;
5464 *OutputRate = dml_output_rate_dp_rate_uhbr20;
5465 }
5466 } else { // output is dp or edp
5467 *OutBpp = 0;
5468 if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_hbr) && PHYCLKPerState >= 270) {
5469 *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
5470 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5471 if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
5472 *RequiresDSC = true;
5473 LinkDSCEnable = true;
5474 if (Output == dml_dp) {
5475 *RequiresFEC = true;
5476 }
5477 *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
5478 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5479 }
5480 //OutputTypeAndRate = Output & " HBR";
5481 *OutputType = (Output == dml_dp) ? dml_output_type_dp : dml_output_type_edp;
5482 *OutputRate = dml_output_rate_dp_rate_hbr;
5483 }
5484 if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_hbr2) && *OutBpp == 0 && PHYCLKPerState >= 540) {
5485 *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
5486 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5487
5488 if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
5489 *RequiresDSC = true;
5490 LinkDSCEnable = true;
5491 if (Output == dml_dp) {
5492 *RequiresFEC = true;
5493 }
5494 *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
5495 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5496 }
5497 //OutputTypeAndRate = Output & " HBR2";
5498 *OutputType = (Output == dml_dp) ? dml_output_type_dp : dml_output_type_edp;
5499 *OutputRate = dml_output_rate_dp_rate_hbr2;
5500 }
5501 if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) { // VBA_ERROR, vba code doesn't have hbr3 check
5502 *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
5503 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5504
5505 if (*OutBpp == 0 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
5506 *RequiresDSC = true;
5507 LinkDSCEnable = true;
5508 if (Output == dml_dp) {
5509 *RequiresFEC = true;
5510 }
5511 *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
5512 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5513 }
5514 //OutputTypeAndRate = Output & " HBR3";
5515 *OutputType = (Output == dml_dp) ? dml_output_type_dp : dml_output_type_edp;
5516 *OutputRate = dml_output_rate_dp_rate_hbr3;
5517 }
5518 }
5519 }
5520 }
5521 }
5522
5523 /// @brief Determine the ODM mode and number of DPP used per plane based on dispclk, dsc usage, odm usage policy
CalculateODMMode(dml_uint_t MaximumPixelsPerLinePerDSCUnit,dml_uint_t HActive,enum dml_output_encoder_class Output,enum dml_output_format_class OutputFormat,enum dml_odm_use_policy ODMUse,dml_float_t StateDispclk,dml_float_t MaxDispclk,dml_bool_t DSCEnable,dml_uint_t TotalNumberOfActiveDPP,dml_uint_t MaxNumDPP,dml_float_t PixelClock,dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,dml_float_t DISPCLKRampingMargin,dml_float_t DISPCLKDPPCLKVCOSpeed,dml_uint_t NumberOfDSCSlices,dml_bool_t * TotalAvailablePipesSupport,dml_uint_t * NumberOfDPP,enum dml_odm_mode * ODMMode,dml_float_t * RequiredDISPCLKPerSurface)5524 static void CalculateODMMode(
5525 dml_uint_t MaximumPixelsPerLinePerDSCUnit,
5526 dml_uint_t HActive,
5527 enum dml_output_encoder_class Output,
5528 enum dml_output_format_class OutputFormat,
5529 enum dml_odm_use_policy ODMUse,
5530 dml_float_t StateDispclk,
5531 dml_float_t MaxDispclk,
5532 dml_bool_t DSCEnable,
5533 dml_uint_t TotalNumberOfActiveDPP,
5534 dml_uint_t MaxNumDPP,
5535 dml_float_t PixelClock,
5536 dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
5537 dml_float_t DISPCLKRampingMargin,
5538 dml_float_t DISPCLKDPPCLKVCOSpeed,
5539 dml_uint_t NumberOfDSCSlices,
5540
5541 // Output
5542 dml_bool_t *TotalAvailablePipesSupport,
5543 dml_uint_t *NumberOfDPP,
5544 enum dml_odm_mode *ODMMode,
5545 dml_float_t *RequiredDISPCLKPerSurface)
5546 {
5547
5548 dml_float_t SurfaceRequiredDISPCLKWithoutODMCombine;
5549 dml_float_t SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
5550 dml_float_t SurfaceRequiredDISPCLKWithODMCombineFourToOne;
5551
5552 SurfaceRequiredDISPCLKWithoutODMCombine = CalculateRequiredDispclk(dml_odm_mode_bypass, PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, MaxDispclk);
5553 SurfaceRequiredDISPCLKWithODMCombineTwoToOne = CalculateRequiredDispclk(dml_odm_mode_combine_2to1, PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, MaxDispclk);
5554 SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(dml_odm_mode_combine_4to1, PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, MaxDispclk);
5555 *TotalAvailablePipesSupport = true;
5556
5557 if (OutputFormat == dml_420) {
5558 if (HActive > 4 * DML2_MAX_FMT_420_BUFFER_WIDTH)
5559 *TotalAvailablePipesSupport = false;
5560 else if (HActive > 2 * DML2_MAX_FMT_420_BUFFER_WIDTH)
5561 ODMUse = dml_odm_use_policy_combine_4to1;
5562 else if (HActive > DML2_MAX_FMT_420_BUFFER_WIDTH && ODMUse != dml_odm_use_policy_combine_4to1)
5563 ODMUse = dml_odm_use_policy_combine_2to1;
5564 if (Output == dml_hdmi && ODMUse == dml_odm_use_policy_combine_2to1)
5565 *TotalAvailablePipesSupport = false;
5566 if ((Output == dml_hdmi || Output == dml_dp || Output == dml_edp) && ODMUse == dml_odm_use_policy_combine_4to1)
5567 *TotalAvailablePipesSupport = false;
5568 }
5569
5570 if (ODMUse == dml_odm_use_policy_bypass || ODMUse == dml_odm_use_policy_combine_as_needed)
5571 *ODMMode = dml_odm_mode_bypass;
5572 else if (ODMUse == dml_odm_use_policy_combine_2to1)
5573 *ODMMode = dml_odm_mode_combine_2to1;
5574 else if (ODMUse == dml_odm_use_policy_combine_4to1)
5575 *ODMMode = dml_odm_mode_combine_4to1;
5576 else if (ODMUse == dml_odm_use_policy_split_1to2)
5577 *ODMMode = dml_odm_mode_split_1to2;
5578 else if (ODMUse == dml_odm_use_policy_mso_1to2)
5579 *ODMMode = dml_odm_mode_mso_1to2;
5580 else if (ODMUse == dml_odm_use_policy_mso_1to4)
5581 *ODMMode = dml_odm_mode_mso_1to4;
5582
5583 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
5584 *NumberOfDPP = 0;
5585
5586 if (!(Output == dml_hdmi || Output == dml_dp || Output == dml_edp) && (ODMUse == dml_odm_use_policy_combine_4to1 || (ODMUse == dml_odm_use_policy_combine_as_needed &&
5587 (SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk || (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit)) || NumberOfDSCSlices > 8)))) {
5588 if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
5589 *ODMMode = dml_odm_mode_combine_4to1;
5590 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
5591 *NumberOfDPP = 4;
5592 } else {
5593 *TotalAvailablePipesSupport = false;
5594 }
5595 } else if (Output != dml_hdmi && (ODMUse == dml_odm_use_policy_combine_2to1 || (ODMUse == dml_odm_use_policy_combine_as_needed &&
5596 ((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk && SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
5597 (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit)) || (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) {
5598 if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
5599 *ODMMode = dml_odm_mode_combine_2to1;
5600 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
5601 *NumberOfDPP = 2;
5602 } else {
5603 *TotalAvailablePipesSupport = false;
5604 }
5605 } else {
5606 if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP) {
5607 *NumberOfDPP = 1;
5608 } else {
5609 *TotalAvailablePipesSupport = false;
5610 }
5611 }
5612 }
5613
5614 /// @brief Calculate the required DISPCLK given the odm mode and pixclk
CalculateRequiredDispclk(enum dml_odm_mode ODMMode,dml_float_t PixelClock,dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,dml_float_t DISPCLKRampingMargin,dml_float_t DISPCLKDPPCLKVCOSpeed,dml_float_t MaxDispclk)5615 static dml_float_t CalculateRequiredDispclk(
5616 enum dml_odm_mode ODMMode,
5617 dml_float_t PixelClock,
5618 dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
5619 dml_float_t DISPCLKRampingMargin,
5620 dml_float_t DISPCLKDPPCLKVCOSpeed,
5621 dml_float_t MaxDispclk)
5622 {
5623 dml_float_t RequiredDispclk = 0.;
5624 dml_float_t PixelClockAfterODM;
5625
5626 dml_float_t DISPCLKWithRampingRoundedToDFSGranularity;
5627 dml_float_t DISPCLKWithoutRampingRoundedToDFSGranularity;
5628 dml_float_t MaxDispclkRoundedDownToDFSGranularity;
5629
5630 if (ODMMode == dml_odm_mode_combine_4to1) {
5631 PixelClockAfterODM = PixelClock / 4;
5632 } else if (ODMMode == dml_odm_mode_combine_2to1) {
5633 PixelClockAfterODM = PixelClock / 2;
5634 } else {
5635 PixelClockAfterODM = PixelClock;
5636 }
5637
5638 DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularity(PixelClockAfterODM * (1.0 + DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1 + DISPCLKRampingMargin / 100.0), 1, DISPCLKDPPCLKVCOSpeed);
5639 DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularity(PixelClockAfterODM * (1.0 + DISPCLKDPPCLKDSCCLKDownSpreading / 100.0), 1, DISPCLKDPPCLKVCOSpeed);
5640 MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
5641
5642 if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) {
5643 RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
5644 } else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) {
5645 RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
5646 } else {
5647 RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
5648 }
5649
5650 return RequiredDispclk;
5651 }
5652
5653 /// @brief Determine DPPCLK if there only one DPP per plane, main factor is the pixel rate and DPP scaling parameter
CalculateSinglePipeDPPCLKAndSCLThroughput(dml_float_t HRatio,dml_float_t HRatioChroma,dml_float_t VRatio,dml_float_t VRatioChroma,dml_float_t MaxDCHUBToPSCLThroughput,dml_float_t MaxPSCLToLBThroughput,dml_float_t PixelClock,enum dml_source_format_class SourcePixelFormat,dml_uint_t HTaps,dml_uint_t HTapsChroma,dml_uint_t VTaps,dml_uint_t VTapsChroma,dml_float_t * PSCL_THROUGHPUT,dml_float_t * PSCL_THROUGHPUT_CHROMA,dml_float_t * DPPCLKUsingSingleDPP)5654 static void CalculateSinglePipeDPPCLKAndSCLThroughput(
5655 dml_float_t HRatio,
5656 dml_float_t HRatioChroma,
5657 dml_float_t VRatio,
5658 dml_float_t VRatioChroma,
5659 dml_float_t MaxDCHUBToPSCLThroughput,
5660 dml_float_t MaxPSCLToLBThroughput,
5661 dml_float_t PixelClock,
5662 enum dml_source_format_class SourcePixelFormat,
5663 dml_uint_t HTaps,
5664 dml_uint_t HTapsChroma,
5665 dml_uint_t VTaps,
5666 dml_uint_t VTapsChroma,
5667
5668 // Output
5669 dml_float_t *PSCL_THROUGHPUT,
5670 dml_float_t *PSCL_THROUGHPUT_CHROMA,
5671 dml_float_t *DPPCLKUsingSingleDPP)
5672 {
5673 dml_float_t DPPCLKUsingSingleDPPLuma;
5674 dml_float_t DPPCLKUsingSingleDPPChroma;
5675
5676 if (HRatio > 1) {
5677 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / dml_ceil((dml_float_t) HTaps / 6.0, 1.0));
5678 } else {
5679 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
5680 }
5681
5682 DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio / *PSCL_THROUGHPUT, 1);
5683
5684 if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
5685 DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
5686
5687 if ((SourcePixelFormat != dml_420_8 && SourcePixelFormat != dml_420_10 && SourcePixelFormat != dml_420_12 && SourcePixelFormat != dml_rgbe_alpha)) {
5688 *PSCL_THROUGHPUT_CHROMA = 0;
5689 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
5690 } else {
5691 if (HRatioChroma > 1) {
5692 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatioChroma / dml_ceil((dml_float_t) HTapsChroma / 6.0, 1.0));
5693 } else {
5694 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
5695 }
5696 DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
5697 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
5698 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
5699 DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
5700 *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
5701 }
5702 }
5703
5704 /// @brief Calculate the actual dppclk freq
5705 /// @param DPPCLKUsingSingleDPP DppClk freq required if there is only 1 DPP per plane
5706 /// @param DPPPerSurface Number of DPP for each plane
CalculateDPPCLK(dml_uint_t NumberOfActiveSurfaces,dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,dml_float_t DISPCLKDPPCLKVCOSpeed,dml_float_t DPPCLKUsingSingleDPP[],dml_uint_t DPPPerSurface[],dml_float_t * GlobalDPPCLK,dml_float_t Dppclk[])5707 static void CalculateDPPCLK(
5708 dml_uint_t NumberOfActiveSurfaces,
5709 dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
5710 dml_float_t DISPCLKDPPCLKVCOSpeed,
5711 dml_float_t DPPCLKUsingSingleDPP[],
5712 dml_uint_t DPPPerSurface[],
5713
5714 // Output
5715 dml_float_t *GlobalDPPCLK,
5716 dml_float_t Dppclk[])
5717 {
5718 *GlobalDPPCLK = 0;
5719 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
5720 Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
5721 *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
5722 }
5723 *GlobalDPPCLK = RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
5724
5725 dml_print("DML::%s: GlobalDPPCLK = %f\n", __func__, *GlobalDPPCLK);
5726 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
5727 Dppclk[k] = *GlobalDPPCLK / 255.0 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
5728 dml_print("DML::%s: Dppclk[%0d] = %f\n", __func__, k, Dppclk[k]);
5729 }
5730 }
5731
CalculateMALLUseForStaticScreen(dml_uint_t NumberOfActiveSurfaces,dml_uint_t MALLAllocatedForDCNFinal,enum dml_use_mall_for_static_screen_mode * UseMALLForStaticScreen,dml_uint_t SurfaceSizeInMALL[],dml_bool_t one_row_per_frame_fits_in_buffer[],dml_bool_t UsesMALLForStaticScreen[])5732 static void CalculateMALLUseForStaticScreen(
5733 dml_uint_t NumberOfActiveSurfaces,
5734 dml_uint_t MALLAllocatedForDCNFinal,
5735 enum dml_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
5736 dml_uint_t SurfaceSizeInMALL[],
5737 dml_bool_t one_row_per_frame_fits_in_buffer[],
5738
5739 // Output
5740 dml_bool_t UsesMALLForStaticScreen[])
5741 {
5742
5743 dml_uint_t SurfaceToAddToMALL;
5744 dml_bool_t CanAddAnotherSurfaceToMALL;
5745 dml_uint_t TotalSurfaceSizeInMALL;
5746
5747 TotalSurfaceSizeInMALL = 0;
5748 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
5749 UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dml_use_mall_static_screen_enable);
5750 if (UsesMALLForStaticScreen[k])
5751 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
5752 #ifdef __DML_VBA_DEBUG__
5753 dml_print("DML::%s: k=%u, UsesMALLForStaticScreen = %u\n", __func__, k, UsesMALLForStaticScreen[k]);
5754 dml_print("DML::%s: k=%u, TotalSurfaceSizeInMALL = %u\n", __func__, k, TotalSurfaceSizeInMALL);
5755 #endif
5756 }
5757
5758 SurfaceToAddToMALL = 0;
5759 CanAddAnotherSurfaceToMALL = true;
5760 while (CanAddAnotherSurfaceToMALL) {
5761 CanAddAnotherSurfaceToMALL = false;
5762 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
5763 if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
5764 !UsesMALLForStaticScreen[k] && UseMALLForStaticScreen[k] != dml_use_mall_static_screen_disable && one_row_per_frame_fits_in_buffer[k] &&
5765 (!CanAddAnotherSurfaceToMALL || SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
5766 CanAddAnotherSurfaceToMALL = true;
5767 SurfaceToAddToMALL = k;
5768 dml_print("DML::%s: k=%u, UseMALLForStaticScreen = %u (dis, en, optimize)\n", __func__, k, UseMALLForStaticScreen[k]);
5769 }
5770 }
5771 if (CanAddAnotherSurfaceToMALL) {
5772 UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
5773 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
5774
5775 #ifdef __DML_VBA_DEBUG__
5776 dml_print("DML::%s: SurfaceToAddToMALL = %u\n", __func__, SurfaceToAddToMALL);
5777 dml_print("DML::%s: TotalSurfaceSizeInMALL = %u\n", __func__, TotalSurfaceSizeInMALL);
5778 #endif
5779 }
5780 }
5781 }
5782
5783 // @brief Calculate return bw for VM only traffic
dml_get_return_bw_mbps_vm_only(const struct soc_bounding_box_st * soc,dml_bool_t use_ideal_dram_bw_strobe,dml_bool_t HostVMEnable,dml_float_t DCFCLK,dml_float_t FabricClock,dml_float_t DRAMSpeed)5784 dml_float_t dml_get_return_bw_mbps_vm_only(
5785 const struct soc_bounding_box_st *soc,
5786 dml_bool_t use_ideal_dram_bw_strobe,
5787 dml_bool_t HostVMEnable,
5788 dml_float_t DCFCLK,
5789 dml_float_t FabricClock,
5790 dml_float_t DRAMSpeed)
5791 {
5792 dml_float_t VMDataOnlyReturnBW =
5793 dml_min3(soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
5794 FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
5795 DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes *
5796 ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe : soc->pct_ideal_dram_bw_after_urgent_vm_only) / 100.0);
5797 #ifdef __DML_VBA_DEBUG__
5798 dml_print("DML::%s: use_ideal_dram_bw_strobe = %u\n", __func__, use_ideal_dram_bw_strobe);
5799 dml_print("DML::%s: HostVMEnable = %u\n", __func__, HostVMEnable);
5800 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5801 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
5802 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
5803 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
5804 #endif
5805 return VMDataOnlyReturnBW;
5806 }
5807
5808 // Function: dml_get_return_bw_mbps
5809 // Megabyte per second
dml_get_return_bw_mbps(const struct soc_bounding_box_st * soc,dml_bool_t use_ideal_dram_bw_strobe,dml_bool_t HostVMEnable,dml_float_t DCFCLK,dml_float_t FabricClock,dml_float_t DRAMSpeed)5810 dml_float_t dml_get_return_bw_mbps(
5811 const struct soc_bounding_box_st *soc,
5812 dml_bool_t use_ideal_dram_bw_strobe,
5813 dml_bool_t HostVMEnable,
5814 dml_float_t DCFCLK,
5815 dml_float_t FabricClock,
5816 dml_float_t DRAMSpeed)
5817 {
5818 dml_float_t ReturnBW = 0.;
5819 dml_float_t IdealSDPPortBandwidth = soc->return_bus_width_bytes * DCFCLK;
5820 dml_float_t IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
5821 dml_float_t IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
5822 dml_float_t PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
5823 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
5824 IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe :
5825 soc->pct_ideal_dram_bw_after_urgent_pixel_only) / 100);
5826 dml_float_t PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
5827 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
5828 IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe :
5829 soc->pct_ideal_dram_bw_after_urgent_pixel_and_vm) / 100);
5830
5831 if (HostVMEnable != true) {
5832 ReturnBW = PixelDataOnlyReturnBW;
5833 } else {
5834 ReturnBW = PixelMixedWithVMDataReturnBW;
5835 }
5836
5837 #ifdef __DML_VBA_DEBUG__
5838 dml_print("DML::%s: use_ideal_dram_bw_strobe = %u\n", __func__, use_ideal_dram_bw_strobe);
5839 dml_print("DML::%s: HostVMEnable = %u\n", __func__, HostVMEnable);
5840 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5841 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
5842 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
5843 dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth);
5844 dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth);
5845 dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth);
5846 dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW);
5847 dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
5848 dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW);
5849 #endif
5850 return ReturnBW;
5851 }
5852
5853 // Function: dml_get_return_dram_bw_mbps
5854 // Megabyte per second
dml_get_return_dram_bw_mbps(const struct soc_bounding_box_st * soc,dml_bool_t use_ideal_dram_bw_strobe,dml_bool_t HostVMEnable,dml_float_t DRAMSpeed)5855 static dml_float_t dml_get_return_dram_bw_mbps(
5856 const struct soc_bounding_box_st *soc,
5857 dml_bool_t use_ideal_dram_bw_strobe,
5858 dml_bool_t HostVMEnable,
5859 dml_float_t DRAMSpeed)
5860 {
5861 dml_float_t ReturnDRAMBW = 0.;
5862 dml_float_t IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
5863 dml_float_t PixelDataOnlyReturnBW = IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe :
5864 soc->pct_ideal_dram_bw_after_urgent_pixel_only) / 100;
5865 dml_float_t PixelMixedWithVMDataReturnBW = IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe :
5866 soc->pct_ideal_dram_bw_after_urgent_pixel_and_vm) / 100;
5867
5868 if (HostVMEnable != true) {
5869 ReturnDRAMBW = PixelDataOnlyReturnBW;
5870 } else {
5871 ReturnDRAMBW = PixelMixedWithVMDataReturnBW;
5872 }
5873
5874 #ifdef __DML_VBA_DEBUG__
5875 dml_print("DML::%s: use_ideal_dram_bw_strobe = %u\n", __func__, use_ideal_dram_bw_strobe);
5876 dml_print("DML::%s: HostVMEnable = %u\n", __func__, HostVMEnable);
5877 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
5878 dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth);
5879 dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW);
5880 dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
5881 dml_print("DML::%s: ReturnDRAMBW = %f MBps\n", __func__, ReturnDRAMBW);
5882 #endif
5883 return ReturnDRAMBW;
5884 }
5885
5886 /// @brief BACKEND
DSCDelayRequirement(dml_bool_t DSCEnabled,enum dml_odm_mode ODMMode,dml_uint_t DSCInputBitPerComponent,dml_float_t OutputBpp,dml_uint_t HActive,dml_uint_t HTotal,dml_uint_t NumberOfDSCSlices,enum dml_output_format_class OutputFormat,enum dml_output_encoder_class Output,dml_float_t PixelClock,dml_float_t PixelClockBackEnd)5887 static dml_uint_t DSCDelayRequirement(
5888 dml_bool_t DSCEnabled,
5889 enum dml_odm_mode ODMMode,
5890 dml_uint_t DSCInputBitPerComponent,
5891 dml_float_t OutputBpp,
5892 dml_uint_t HActive,
5893 dml_uint_t HTotal,
5894 dml_uint_t NumberOfDSCSlices,
5895 enum dml_output_format_class OutputFormat,
5896 enum dml_output_encoder_class Output,
5897 dml_float_t PixelClock,
5898 dml_float_t PixelClockBackEnd)
5899 {
5900 dml_uint_t DSCDelayRequirement_val = 0;
5901
5902 if (DSCEnabled == true && OutputBpp != 0) {
5903 if (ODMMode == dml_odm_mode_combine_4to1) {
5904 DSCDelayRequirement_val = dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (dml_uint_t)(dml_ceil((dml_float_t) HActive / (dml_float_t) NumberOfDSCSlices, 1.0)),
5905 (dml_uint_t) (NumberOfDSCSlices / 4.0), OutputFormat, Output) + dscComputeDelay(OutputFormat, Output);
5906 } else if (ODMMode == dml_odm_mode_combine_2to1) {
5907 DSCDelayRequirement_val = dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (dml_uint_t)(dml_ceil((dml_float_t) HActive / (dml_float_t) NumberOfDSCSlices, 1.0)),
5908 (dml_uint_t) (NumberOfDSCSlices / 2.0), OutputFormat, Output) + dscComputeDelay(OutputFormat, Output);
5909 } else {
5910 DSCDelayRequirement_val = dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (dml_uint_t)((dml_float_t) dml_ceil(HActive / (dml_float_t) NumberOfDSCSlices, 1.0)),
5911 NumberOfDSCSlices, OutputFormat, Output) + dscComputeDelay(OutputFormat, Output);
5912 }
5913 DSCDelayRequirement_val = (dml_uint_t)(DSCDelayRequirement_val + (HTotal - HActive) * dml_ceil((dml_float_t) DSCDelayRequirement_val / (dml_float_t) HActive, 1.0));
5914 DSCDelayRequirement_val = (dml_uint_t)(DSCDelayRequirement_val * PixelClock / PixelClockBackEnd);
5915
5916 } else {
5917 DSCDelayRequirement_val = 0;
5918 }
5919 #ifdef __DML_VBA_DEBUG__
5920 dml_print("DML::%s: DSCEnabled = %u\n", __func__, DSCEnabled);
5921 dml_print("DML::%s: ODMMode = %u\n", __func__, ODMMode);
5922 dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp);
5923 dml_print("DML::%s: HActive = %u\n", __func__, HActive);
5924 dml_print("DML::%s: HTotal = %u\n", __func__, HTotal);
5925 dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
5926 dml_print("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd);
5927 dml_print("DML::%s: OutputFormat = %u\n", __func__, OutputFormat);
5928 dml_print("DML::%s: DSCInputBitPerComponent = %u\n", __func__, DSCInputBitPerComponent);
5929 dml_print("DML::%s: NumberOfDSCSlices = %u\n", __func__, NumberOfDSCSlices);
5930 dml_print("DML::%s: DSCDelayRequirement_val = %u\n", __func__, DSCDelayRequirement_val);
5931 #endif
5932
5933 return DSCDelayRequirement_val;
5934 }
5935
CalculateVActiveBandwithSupport(dml_uint_t NumberOfActiveSurfaces,dml_float_t ReturnBW,dml_bool_t NotUrgentLatencyHiding[],dml_float_t ReadBandwidthLuma[],dml_float_t ReadBandwidthChroma[],dml_float_t cursor_bw[],dml_float_t meta_row_bandwidth[],dml_float_t dpte_row_bandwidth[],dml_uint_t NumberOfDPP[],dml_float_t UrgentBurstFactorLuma[],dml_float_t UrgentBurstFactorChroma[],dml_float_t UrgentBurstFactorCursor[])5936 static noinline_for_stack dml_bool_t CalculateVActiveBandwithSupport(dml_uint_t NumberOfActiveSurfaces,
5937 dml_float_t ReturnBW,
5938 dml_bool_t NotUrgentLatencyHiding[],
5939 dml_float_t ReadBandwidthLuma[],
5940 dml_float_t ReadBandwidthChroma[],
5941 dml_float_t cursor_bw[],
5942 dml_float_t meta_row_bandwidth[],
5943 dml_float_t dpte_row_bandwidth[],
5944 dml_uint_t NumberOfDPP[],
5945 dml_float_t UrgentBurstFactorLuma[],
5946 dml_float_t UrgentBurstFactorChroma[],
5947 dml_float_t UrgentBurstFactorCursor[])
5948 {
5949 dml_bool_t NotEnoughUrgentLatencyHiding = false;
5950 dml_bool_t CalculateVActiveBandwithSupport_val = false;
5951 dml_float_t VActiveBandwith = 0;
5952
5953 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
5954 if (NotUrgentLatencyHiding[k]) {
5955 NotEnoughUrgentLatencyHiding = true;
5956 }
5957 }
5958
5959 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
5960 VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
5961 }
5962
5963 CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
5964
5965 #ifdef __DML_VBA_DEBUG__
5966 dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %u\n", __func__, NotEnoughUrgentLatencyHiding);
5967 dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith);
5968 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
5969 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %u\n", __func__, CalculateVActiveBandwithSupport_val);
5970 #endif
5971 return CalculateVActiveBandwithSupport_val;
5972 }
5973
CalculatePrefetchBandwithSupport(dml_uint_t NumberOfActiveSurfaces,dml_float_t ReturnBW,enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],dml_bool_t NotUrgentLatencyHiding[],dml_float_t ReadBandwidthLuma[],dml_float_t ReadBandwidthChroma[],dml_float_t PrefetchBandwidthLuma[],dml_float_t PrefetchBandwidthChroma[],dml_float_t cursor_bw[],dml_float_t meta_row_bandwidth[],dml_float_t dpte_row_bandwidth[],dml_float_t cursor_bw_pre[],dml_float_t prefetch_vmrow_bw[],dml_uint_t NumberOfDPP[],dml_float_t UrgentBurstFactorLuma[],dml_float_t UrgentBurstFactorChroma[],dml_float_t UrgentBurstFactorCursor[],dml_float_t UrgentBurstFactorLumaPre[],dml_float_t UrgentBurstFactorChromaPre[],dml_float_t UrgentBurstFactorCursorPre[],dml_float_t * PrefetchBandwidth,dml_float_t * PrefetchBandwidthNotIncludingMALLPrefetch,dml_float_t * FractionOfUrgentBandwidth,dml_bool_t * PrefetchBandwidthSupport)5974 static void CalculatePrefetchBandwithSupport(
5975 dml_uint_t NumberOfActiveSurfaces,
5976 dml_float_t ReturnBW,
5977 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
5978 dml_bool_t NotUrgentLatencyHiding[],
5979 dml_float_t ReadBandwidthLuma[],
5980 dml_float_t ReadBandwidthChroma[],
5981 dml_float_t PrefetchBandwidthLuma[],
5982 dml_float_t PrefetchBandwidthChroma[],
5983 dml_float_t cursor_bw[],
5984 dml_float_t meta_row_bandwidth[],
5985 dml_float_t dpte_row_bandwidth[],
5986 dml_float_t cursor_bw_pre[],
5987 dml_float_t prefetch_vmrow_bw[],
5988 dml_uint_t NumberOfDPP[],
5989 dml_float_t UrgentBurstFactorLuma[],
5990 dml_float_t UrgentBurstFactorChroma[],
5991 dml_float_t UrgentBurstFactorCursor[],
5992 dml_float_t UrgentBurstFactorLumaPre[],
5993 dml_float_t UrgentBurstFactorChromaPre[],
5994 dml_float_t UrgentBurstFactorCursorPre[],
5995
5996 // Output
5997 dml_float_t *PrefetchBandwidth,
5998 dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch,
5999 dml_float_t *FractionOfUrgentBandwidth,
6000 dml_bool_t *PrefetchBandwidthSupport)
6001 {
6002 dml_bool_t NotEnoughUrgentLatencyHiding = false;
6003 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
6004 if (NotUrgentLatencyHiding[k]) {
6005 NotEnoughUrgentLatencyHiding = true;
6006 }
6007 }
6008
6009 *PrefetchBandwidth = 0;
6010 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
6011 *PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6012 ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]),
6013 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6014 }
6015
6016 *PrefetchBandwidthNotIncludingMALLPrefetch = 0;
6017 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
6018 if (UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe)
6019 *PrefetchBandwidthNotIncludingMALLPrefetch = *PrefetchBandwidthNotIncludingMALLPrefetch
6020 + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6021 ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k]
6022 + cursor_bw[k] * UrgentBurstFactorCursor[k]
6023 + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]),
6024 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k]
6025 + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k])
6026 + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6027 }
6028
6029 *PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6030 *FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW;
6031
6032 #ifdef __DML_VBA_DEBUG__
6033 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6034 dml_print("DML::%s: PrefetchBandwidth = %f\n", __func__, *PrefetchBandwidth);
6035 dml_print("DML::%s: FractionOfUrgentBandwidth = %f\n", __func__, *FractionOfUrgentBandwidth);
6036 dml_print("DML::%s: PrefetchBandwidthSupport = %u\n", __func__, *PrefetchBandwidthSupport);
6037 #endif
6038 }
6039
CalculateBandwidthAvailableForImmediateFlip(dml_uint_t NumberOfActiveSurfaces,dml_float_t ReturnBW,dml_float_t ReadBandwidthLuma[],dml_float_t ReadBandwidthChroma[],dml_float_t PrefetchBandwidthLuma[],dml_float_t PrefetchBandwidthChroma[],dml_float_t cursor_bw[],dml_float_t cursor_bw_pre[],dml_uint_t NumberOfDPP[],dml_float_t UrgentBurstFactorLuma[],dml_float_t UrgentBurstFactorChroma[],dml_float_t UrgentBurstFactorCursor[],dml_float_t UrgentBurstFactorLumaPre[],dml_float_t UrgentBurstFactorChromaPre[],dml_float_t UrgentBurstFactorCursorPre[])6040 static noinline_for_stack dml_float_t CalculateBandwidthAvailableForImmediateFlip(
6041 dml_uint_t NumberOfActiveSurfaces,
6042 dml_float_t ReturnBW,
6043 dml_float_t ReadBandwidthLuma[],
6044 dml_float_t ReadBandwidthChroma[],
6045 dml_float_t PrefetchBandwidthLuma[],
6046 dml_float_t PrefetchBandwidthChroma[],
6047 dml_float_t cursor_bw[],
6048 dml_float_t cursor_bw_pre[],
6049 dml_uint_t NumberOfDPP[],
6050 dml_float_t UrgentBurstFactorLuma[],
6051 dml_float_t UrgentBurstFactorChroma[],
6052 dml_float_t UrgentBurstFactorCursor[],
6053 dml_float_t UrgentBurstFactorLumaPre[],
6054 dml_float_t UrgentBurstFactorChromaPre[],
6055 dml_float_t UrgentBurstFactorCursorPre[])
6056 {
6057 dml_float_t ret_val = ReturnBW;
6058
6059 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
6060 ret_val = ret_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6061 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) +
6062 cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6063 #ifdef __DML_VBA_DEBUG__
6064 dml_print("DML::%s: k=%u\n", __func__, k);
6065 dml_print("DML::%s: NumberOfDPP = %u\n", __func__, NumberOfDPP[k]);
6066 dml_print("DML::%s: ReadBandwidthLuma = %f\n", __func__, ReadBandwidthLuma[k]);
6067 dml_print("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, UrgentBurstFactorLuma[k]);
6068 dml_print("DML::%s: ReadBandwidthChroma = %f\n", __func__, ReadBandwidthChroma[k]);
6069 dml_print("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, UrgentBurstFactorChroma[k]);
6070 dml_print("DML::%s: cursor_bw = %f\n", __func__, cursor_bw[k]);
6071 dml_print("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, UrgentBurstFactorCursor[k]);
6072
6073 dml_print("DML::%s: PrefetchBandwidthLuma = %f\n", __func__, PrefetchBandwidthLuma[k]);
6074 dml_print("DML::%s: UrgentBurstFactorLumaPre = %f\n", __func__, UrgentBurstFactorLumaPre[k]);
6075 dml_print("DML::%s: PrefetchBandwidthChroma = %f\n", __func__, PrefetchBandwidthChroma[k]);
6076 dml_print("DML::%s: UrgentBurstFactorChromaPre = %f\n", __func__, UrgentBurstFactorChromaPre[k]);
6077 dml_print("DML::%s: cursor_bw_pre = %f\n", __func__, cursor_bw_pre[k]);
6078 dml_print("DML::%s: UrgentBurstFactorCursorPre = %f\n", __func__, UrgentBurstFactorCursorPre[k]);
6079 dml_print("DML::%s: ret_val = %f\n", __func__, ret_val);
6080 #endif
6081 }
6082
6083 return ret_val;
6084 }
6085
CalculateImmediateFlipBandwithSupport(dml_uint_t NumberOfActiveSurfaces,dml_float_t ReturnBW,enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],enum dml_immediate_flip_requirement ImmediateFlipRequirement[],dml_float_t final_flip_bw[],dml_float_t ReadBandwidthLuma[],dml_float_t ReadBandwidthChroma[],dml_float_t PrefetchBandwidthLuma[],dml_float_t PrefetchBandwidthChroma[],dml_float_t cursor_bw[],dml_float_t meta_row_bandwidth[],dml_float_t dpte_row_bandwidth[],dml_float_t cursor_bw_pre[],dml_float_t prefetch_vmrow_bw[],dml_uint_t NumberOfDPP[],dml_float_t UrgentBurstFactorLuma[],dml_float_t UrgentBurstFactorChroma[],dml_float_t UrgentBurstFactorCursor[],dml_float_t UrgentBurstFactorLumaPre[],dml_float_t UrgentBurstFactorChromaPre[],dml_float_t UrgentBurstFactorCursorPre[],dml_float_t * TotalBandwidth,dml_float_t * TotalBandwidthNotIncludingMALLPrefetch,dml_float_t * FractionOfUrgentBandwidth,dml_bool_t * ImmediateFlipBandwidthSupport)6086 static void CalculateImmediateFlipBandwithSupport(
6087 dml_uint_t NumberOfActiveSurfaces,
6088 dml_float_t ReturnBW,
6089 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
6090 enum dml_immediate_flip_requirement ImmediateFlipRequirement[],
6091 dml_float_t final_flip_bw[],
6092 dml_float_t ReadBandwidthLuma[],
6093 dml_float_t ReadBandwidthChroma[],
6094 dml_float_t PrefetchBandwidthLuma[],
6095 dml_float_t PrefetchBandwidthChroma[],
6096 dml_float_t cursor_bw[],
6097 dml_float_t meta_row_bandwidth[],
6098 dml_float_t dpte_row_bandwidth[],
6099 dml_float_t cursor_bw_pre[],
6100 dml_float_t prefetch_vmrow_bw[],
6101 dml_uint_t NumberOfDPP[],
6102 dml_float_t UrgentBurstFactorLuma[],
6103 dml_float_t UrgentBurstFactorChroma[],
6104 dml_float_t UrgentBurstFactorCursor[],
6105 dml_float_t UrgentBurstFactorLumaPre[],
6106 dml_float_t UrgentBurstFactorChromaPre[],
6107 dml_float_t UrgentBurstFactorCursorPre[],
6108
6109 // Output
6110 dml_float_t *TotalBandwidth,
6111 dml_float_t *TotalBandwidthNotIncludingMALLPrefetch,
6112 dml_float_t *FractionOfUrgentBandwidth,
6113 dml_bool_t *ImmediateFlipBandwidthSupport)
6114 {
6115 *TotalBandwidth = 0;
6116 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
6117 if (ImmediateFlipRequirement[k] != dml_immediate_flip_not_required) {
6118
6119
6120
6121 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6122 NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6123 NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6124 } else {
6125 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6126 NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6127 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6128 }
6129 #ifdef __DML_VBA_DEBUG__
6130 dml_print("DML::%s: k = %u\n", __func__, k);
6131 dml_print("DML::%s: ImmediateFlipRequirement = %u\n", __func__, ImmediateFlipRequirement[k]);
6132 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, *TotalBandwidth);
6133 dml_print("DML::%s: NumberOfDPP = %u\n", __func__, NumberOfDPP[k]);
6134 dml_print("DML::%s: prefetch_vmrow_bw = %f\n", __func__, prefetch_vmrow_bw[k]);
6135 dml_print("DML::%s: final_flip_bw = %f\n", __func__, final_flip_bw[k]);
6136 dml_print("DML::%s: ReadBandwidthLuma = %f\n", __func__, ReadBandwidthLuma[k]);
6137 dml_print("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, UrgentBurstFactorLuma[k]);
6138 dml_print("DML::%s: ReadBandwidthChroma = %f\n", __func__, ReadBandwidthChroma[k]);
6139 dml_print("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, UrgentBurstFactorChroma[k]);
6140 dml_print("DML::%s: cursor_bw = %f\n", __func__, cursor_bw[k]);
6141 dml_print("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, UrgentBurstFactorCursor[k]);
6142 dml_print("DML::%s: PrefetchBandwidthLuma = %f\n", __func__, PrefetchBandwidthLuma[k]);
6143 dml_print("DML::%s: UrgentBurstFactorLumaPre = %f\n", __func__, UrgentBurstFactorLumaPre[k]);
6144 dml_print("DML::%s: PrefetchBandwidthChroma = %f\n", __func__, PrefetchBandwidthChroma[k]);
6145 dml_print("DML::%s: UrgentBurstFactorChromaPre = %f\n", __func__, UrgentBurstFactorChromaPre[k]);
6146 dml_print("DML::%s: cursor_bw_pre = %f\n", __func__, cursor_bw_pre[k]);
6147 dml_print("DML::%s: UrgentBurstFactorCursorPre = %f\n", __func__, UrgentBurstFactorCursorPre[k]);
6148 dml_print("DML::%s: meta_row_bandwidth = %f\n", __func__, meta_row_bandwidth[k]);
6149 dml_print("DML::%s: dpte_row_bandwidth = %f\n", __func__, dpte_row_bandwidth[k]);
6150 #endif
6151 }
6152
6153 *TotalBandwidthNotIncludingMALLPrefetch = 0;
6154 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
6155 if (UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) {
6156 if (ImmediateFlipRequirement[k] != dml_immediate_flip_not_required)
6157 *TotalBandwidthNotIncludingMALLPrefetch = *TotalBandwidthNotIncludingMALLPrefetch + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6158 NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6159 NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k])
6160 + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6161 else
6162 *TotalBandwidthNotIncludingMALLPrefetch = *TotalBandwidthNotIncludingMALLPrefetch + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6163 NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k])
6164 + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6165 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k])
6166 + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6167 }
6168 }
6169
6170 *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
6171 *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
6172 #ifdef __DML_VBA_DEBUG__
6173 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6174 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, *TotalBandwidth);
6175 dml_print("DML::%s: ImmediateFlipBandwidthSupport = %u\n", __func__, *ImmediateFlipBandwidthSupport);
6176 #endif
6177 }
6178
MicroSecToVertLines(dml_uint_t num_us,dml_uint_t h_total,dml_float_t pixel_clock)6179 static dml_uint_t MicroSecToVertLines(dml_uint_t num_us, dml_uint_t h_total, dml_float_t pixel_clock)
6180 {
6181 dml_uint_t lines_time_in_ns = 1000.0 * (h_total * 1000.0) / (pixel_clock * 1000.0);
6182
6183 return dml_ceil(1000.0 * num_us / lines_time_in_ns, 1.0);
6184 }
6185
6186 /// @brief Calculate the maximum vstartup for mode support and mode programming consideration
6187 /// Bounded by min of actual vblank and input vblank_nom, dont want vstartup/ready to start too early if actual vbllank is huge
CalculateMaxVStartup(dml_uint_t plane_idx,dml_bool_t ptoi_supported,dml_uint_t vblank_nom_default_us,struct dml_timing_cfg_st * timing,dml_float_t write_back_delay_us)6188 static dml_uint_t CalculateMaxVStartup(
6189 dml_uint_t plane_idx,
6190 dml_bool_t ptoi_supported,
6191 dml_uint_t vblank_nom_default_us,
6192 struct dml_timing_cfg_st *timing,
6193 dml_float_t write_back_delay_us)
6194 {
6195 dml_uint_t vblank_size = 0;
6196 dml_uint_t max_vstartup_lines = 0;
6197 const dml_uint_t max_allowed_vblank_nom = 1023;
6198
6199 dml_float_t line_time_us = (dml_float_t) timing->HTotal[plane_idx] / timing->PixelClock[plane_idx];
6200 dml_uint_t vblank_actual = timing->VTotal[plane_idx] - timing->VActive[plane_idx];
6201
6202 dml_uint_t vblank_nom_default_in_line = MicroSecToVertLines(vblank_nom_default_us, timing->HTotal[plane_idx],
6203 timing->PixelClock[plane_idx]);
6204 dml_uint_t vblank_nom_input = (dml_uint_t)dml_min(vblank_actual, vblank_nom_default_in_line);
6205
6206 // vblank_nom should not be smaller than (VSync (VTotal - VActive - VFrontPorch) + 2)
6207 // + 2 is because
6208 // 1 -> VStartup_start should be 1 line before VSync
6209 // 1 -> always reserve 1 line between start of VBlank to VStartup signal
6210 dml_uint_t vblank_nom_vsync_capped = dml_max(vblank_nom_input,
6211 timing->VTotal[plane_idx] - timing->VActive[plane_idx] - timing->VFrontPorch[plane_idx] + 2);
6212 dml_uint_t vblank_nom_max_allowed_capped = dml_min(vblank_nom_vsync_capped, max_allowed_vblank_nom);
6213 dml_uint_t vblank_avail = (vblank_nom_max_allowed_capped == 0) ?
6214 vblank_nom_default_in_line : vblank_nom_max_allowed_capped;
6215
6216 vblank_size = (dml_uint_t) dml_min(vblank_actual, vblank_avail);
6217
6218 if (timing->Interlace[plane_idx] && !ptoi_supported)
6219 max_vstartup_lines = (dml_uint_t) (dml_floor(vblank_size/2.0, 1.0));
6220 else
6221 max_vstartup_lines = vblank_size - (dml_uint_t) dml_max(1.0, dml_ceil(write_back_delay_us/line_time_us, 1.0));
6222 #ifdef __DML_VBA_DEBUG__
6223 dml_print("DML::%s: plane_idx = %u\n", __func__, plane_idx);
6224 dml_print("DML::%s: VBlankNom = %u\n", __func__, timing->VBlankNom[plane_idx]);
6225 dml_print("DML::%s: vblank_nom_default_us = %u\n", __func__, vblank_nom_default_us);
6226 dml_print("DML::%s: line_time_us = %f\n", __func__, line_time_us);
6227 dml_print("DML::%s: vblank_actual = %u\n", __func__, vblank_actual);
6228 dml_print("DML::%s: vblank_avail = %u\n", __func__, vblank_avail);
6229 dml_print("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines);
6230 #endif
6231 max_vstartup_lines = (dml_uint_t) dml_min(max_vstartup_lines, DML_MAX_VSTARTUP_START);
6232 return max_vstartup_lines;
6233 }
6234
set_calculate_prefetch_schedule_params(struct display_mode_lib_st * mode_lib,struct CalculatePrefetchSchedule_params_st * CalculatePrefetchSchedule_params,dml_uint_t j,dml_uint_t k)6235 static noinline_for_stack void set_calculate_prefetch_schedule_params(struct display_mode_lib_st *mode_lib,
6236 struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params,
6237 dml_uint_t j,
6238 dml_uint_t k)
6239 {
6240 CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelayPerState[k];
6241 CalculatePrefetchSchedule_params->EnhancedPrefetchScheduleAccelerationFinal = mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal;
6242 CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ms.ip.dppclk_delay_subtotal + mode_lib->ms.ip.dppclk_delay_cnvc_formatter;
6243 CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ms.ip.dppclk_delay_scl;
6244 CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ms.ip.dppclk_delay_scl_lb_only;
6245 CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ms.ip.dppclk_delay_cnvc_cursor;
6246 CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ms.ip.dispclk_delay_subtotal;
6247 CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (dml_uint_t)(mode_lib->ms.SwathWidthYThisState[k] / mode_lib->ms.cache_display_cfg.plane.HRatio[k]);
6248 CalculatePrefetchSchedule_params->OutputFormat = mode_lib->ms.cache_display_cfg.output.OutputFormat[k];
6249 CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ms.ip.max_inter_dcn_tile_repeaters;
6250 CalculatePrefetchSchedule_params->GPUVMPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
6251 CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
6252 CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
6253 CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
6254 CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
6255 CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k];
6256 CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled;
6257 CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k];
6258 CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes[k];
6259 CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency;
6260 CalculatePrefetchSchedule_params->UrgentExtraLatency = mode_lib->ms.ExtraLatency;
6261 CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc;
6262 CalculatePrefetchSchedule_params->PDEAndMetaPTEBytesFrame = mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k];
6263 CalculatePrefetchSchedule_params->MetaRowByte = mode_lib->ms.MetaRowBytes[j][k];
6264 CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[j][k];
6265 CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[j][k];
6266 CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k];
6267 CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwY[k];
6268 CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[j][k];
6269 CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k];
6270 CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwC[k];
6271 CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub_this_state[k];
6272 CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub_this_state[k];
6273 CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState[k];
6274 CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState[k];
6275 CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait;
6276 CalculatePrefetchSchedule_params->DestinationLinesForPrefetch = &mode_lib->ms.LineTimesForPrefetch[k];
6277 CalculatePrefetchSchedule_params->DestinationLinesToRequestVMInVBlank = &mode_lib->ms.LinesForMetaPTE[k];
6278 CalculatePrefetchSchedule_params->DestinationLinesToRequestRowInVBlank = &mode_lib->ms.LinesForMetaAndDPTERow[k];
6279 CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[j][k];
6280 CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[j][k];
6281 CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k];
6282 CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k];
6283 CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.support.NoTimeForDynamicMetadata[j][k];
6284 CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k];
6285 }
6286
dml_prefetch_check(struct display_mode_lib_st * mode_lib)6287 static noinline_for_stack void dml_prefetch_check(struct display_mode_lib_st *mode_lib)
6288 {
6289 struct dml_core_mode_support_locals_st *s = &mode_lib->scratch.dml_core_mode_support_locals;
6290 struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
6291 struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
6292 struct DmlPipe *myPipe;
6293 dml_uint_t j, k;
6294
6295 for (j = 0; j < 2; ++j) {
6296 mode_lib->ms.TimeCalc = 24 / mode_lib->ms.ProjectedDCFCLKDeepSleep[j];
6297
6298 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
6299 mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k];
6300 mode_lib->ms.swath_width_luma_ub_this_state[k] = mode_lib->ms.swath_width_luma_ub_all_states[j][k];
6301 mode_lib->ms.swath_width_chroma_ub_this_state[k] = mode_lib->ms.swath_width_chroma_ub_all_states[j][k];
6302 mode_lib->ms.SwathWidthYThisState[k] = mode_lib->ms.SwathWidthYAllStates[j][k];
6303 mode_lib->ms.SwathWidthCThisState[k] = mode_lib->ms.SwathWidthCAllStates[j][k];
6304 mode_lib->ms.SwathHeightYThisState[k] = mode_lib->ms.SwathHeightYAllStates[j][k];
6305 mode_lib->ms.SwathHeightCThisState[k] = mode_lib->ms.SwathHeightCAllStates[j][k];
6306 mode_lib->ms.UnboundedRequestEnabledThisState = mode_lib->ms.UnboundedRequestEnabledAllStates[j];
6307 mode_lib->ms.CompressedBufferSizeInkByteThisState = mode_lib->ms.CompressedBufferSizeInkByteAllStates[j];
6308 mode_lib->ms.DETBufferSizeInKByteThisState[k] = mode_lib->ms.DETBufferSizeInKByteAllStates[j][k];
6309 mode_lib->ms.DETBufferSizeYThisState[k] = mode_lib->ms.DETBufferSizeYAllStates[j][k];
6310 mode_lib->ms.DETBufferSizeCThisState[k] = mode_lib->ms.DETBufferSizeCAllStates[j][k];
6311 }
6312
6313 mode_lib->ms.support.VActiveBandwithSupport[j] = CalculateVActiveBandwithSupport(
6314 mode_lib->ms.num_active_planes,
6315 mode_lib->ms.ReturnBWPerState[j],
6316 mode_lib->ms.NotUrgentLatencyHiding,
6317 mode_lib->ms.ReadBandwidthLuma,
6318 mode_lib->ms.ReadBandwidthChroma,
6319 mode_lib->ms.cursor_bw,
6320 mode_lib->ms.meta_row_bandwidth_this_state,
6321 mode_lib->ms.dpte_row_bandwidth_this_state,
6322 mode_lib->ms.NoOfDPPThisState,
6323 mode_lib->ms.UrgentBurstFactorLuma[j],
6324 mode_lib->ms.UrgentBurstFactorChroma[j],
6325 mode_lib->ms.UrgentBurstFactorCursor[j]);
6326
6327 s->VMDataOnlyReturnBWPerState = dml_get_return_bw_mbps_vm_only(
6328 &mode_lib->ms.soc,
6329 mode_lib->ms.state.use_ideal_dram_bw_strobe,
6330 mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
6331 mode_lib->ms.DCFCLKState[j],
6332 mode_lib->ms.state.fabricclk_mhz,
6333 mode_lib->ms.state.dram_speed_mts);
6334
6335 s->HostVMInefficiencyFactor = 1;
6336 if (mode_lib->ms.cache_display_cfg.plane.GPUVMEnable && mode_lib->ms.cache_display_cfg.plane.HostVMEnable)
6337 s->HostVMInefficiencyFactor = mode_lib->ms.ReturnBWPerState[j] / s->VMDataOnlyReturnBWPerState;
6338
6339 mode_lib->ms.ExtraLatency = CalculateExtraLatency(
6340 mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles,
6341 s->ReorderingBytes,
6342 mode_lib->ms.DCFCLKState[j],
6343 mode_lib->ms.TotalNumberOfActiveDPP[j],
6344 mode_lib->ms.ip.pixel_chunk_size_kbytes,
6345 mode_lib->ms.TotalNumberOfDCCActiveDPP[j],
6346 mode_lib->ms.ip.meta_chunk_size_kbytes,
6347 mode_lib->ms.ReturnBWPerState[j],
6348 mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
6349 mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
6350 mode_lib->ms.num_active_planes,
6351 mode_lib->ms.NoOfDPPThisState,
6352 mode_lib->ms.dpte_group_bytes,
6353 s->HostVMInefficiencyFactor,
6354 mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
6355 mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels);
6356
6357 s->NextMaxVStartup = s->MaxVStartupAllPlanes[j];
6358 s->MaxVStartup = 0;
6359 s->AllPrefetchModeTested = true;
6360 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6361 CalculatePrefetchMode(mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k], &s->MinPrefetchMode[k], &s->MaxPrefetchMode[k]);
6362 s->NextPrefetchMode[k] = s->MinPrefetchMode[k];
6363 }
6364
6365 do {
6366 s->MaxVStartup = s->NextMaxVStartup;
6367 s->AllPrefetchModeTested = true;
6368
6369 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6370 mode_lib->ms.PrefetchMode[k] = s->NextPrefetchMode[k];
6371 mode_lib->ms.TWait = CalculateTWait(
6372 mode_lib->ms.PrefetchMode[k],
6373 mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
6374 mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
6375 mode_lib->ms.cache_display_cfg.timing.DRRDisplay[k],
6376 mode_lib->ms.state.dram_clock_change_latency_us,
6377 mode_lib->ms.state.fclk_change_latency_us,
6378 mode_lib->ms.UrgLatency,
6379 mode_lib->ms.state.sr_enter_plus_exit_time_us);
6380
6381 myPipe = &s->myPipe;
6382 myPipe->Dppclk = mode_lib->ms.RequiredDPPCLKPerSurface[j][k];
6383 myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK[j];
6384 myPipe->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k];
6385 myPipe->DCFClkDeepSleep = mode_lib->ms.ProjectedDCFCLKDeepSleep[j];
6386 myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[j][k];
6387 myPipe->ScalerEnabled = mode_lib->ms.cache_display_cfg.plane.ScalerEnabled[k];
6388 myPipe->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k];
6389 myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k];
6390 myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k];
6391 myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k];
6392 myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k];
6393 myPipe->InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k];
6394 myPipe->NumberOfCursors = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k];
6395 myPipe->VBlank = mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k];
6396 myPipe->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k];
6397 myPipe->HActive = mode_lib->ms.cache_display_cfg.timing.HActive[k];
6398 myPipe->DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k];
6399 myPipe->ODMMode = mode_lib->ms.ODMModePerState[k];
6400 myPipe->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k];
6401 myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k];
6402 myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k];
6403 myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
6404
6405 #ifdef __DML_VBA_DEBUG__
6406 dml_print("DML::%s: Calling CalculatePrefetchSchedule for j=%u, k=%u\n", __func__, j, k);
6407 dml_print("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[j][k]);
6408 dml_print("DML::%s: MaxVStartup = %u\n", __func__, s->MaxVStartup);
6409 dml_print("DML::%s: NextPrefetchMode = %u\n", __func__, s->NextPrefetchMode[k]);
6410 dml_print("DML::%s: AllowForPStateChangeOrStutterInVBlank = %u\n", __func__, mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k]);
6411 dml_print("DML::%s: PrefetchMode = %u\n", __func__, mode_lib->ms.PrefetchMode[k]);
6412 #endif
6413
6414 CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactor;
6415 CalculatePrefetchSchedule_params->myPipe = myPipe;
6416 CalculatePrefetchSchedule_params->VStartup = (dml_uint_t)(dml_min(s->MaxVStartup, s->MaximumVStartup[j][k]));
6417 CalculatePrefetchSchedule_params->MaxVStartup = s->MaximumVStartup[j][k];
6418 CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k];
6419 CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k];
6420 CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k];
6421 CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0];
6422 CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1];
6423 CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2];
6424 CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0];
6425 CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1];
6426 CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2];
6427
6428 set_calculate_prefetch_schedule_params(mode_lib, CalculatePrefetchSchedule_params, j, k);
6429
6430 mode_lib->ms.support.NoTimeForPrefetch[j][k] =
6431 CalculatePrefetchSchedule(&mode_lib->scratch,
6432 CalculatePrefetchSchedule_params);
6433 }
6434
6435 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6436 CalculateUrgentBurstFactor(
6437 mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
6438 mode_lib->ms.swath_width_luma_ub_this_state[k],
6439 mode_lib->ms.swath_width_chroma_ub_this_state[k],
6440 mode_lib->ms.SwathHeightYThisState[k],
6441 mode_lib->ms.SwathHeightCThisState[k],
6442 mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
6443 mode_lib->ms.UrgLatency,
6444 mode_lib->ms.ip.cursor_buffer_size,
6445 mode_lib->ms.cache_display_cfg.plane.CursorWidth[k],
6446 mode_lib->ms.cache_display_cfg.plane.CursorBPP[k],
6447 mode_lib->ms.VRatioPreY[j][k],
6448 mode_lib->ms.VRatioPreC[j][k],
6449 mode_lib->ms.BytePerPixelInDETY[k],
6450 mode_lib->ms.BytePerPixelInDETC[k],
6451 mode_lib->ms.DETBufferSizeYThisState[k],
6452 mode_lib->ms.DETBufferSizeCThisState[k],
6453 /* Output */
6454 &mode_lib->ms.UrgentBurstFactorCursorPre[k],
6455 &mode_lib->ms.UrgentBurstFactorLumaPre[k],
6456 &mode_lib->ms.UrgentBurstFactorChromaPre[k],
6457 &mode_lib->ms.NotUrgentLatencyHidingPre[k]);
6458
6459 mode_lib->ms.cursor_bw_pre[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] *
6460 mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] /
6461 mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.VRatioPreY[j][k];
6462 }
6463
6464 {
6465 CalculatePrefetchBandwithSupport(
6466 mode_lib->ms.num_active_planes,
6467 mode_lib->ms.ReturnBWPerState[j],
6468 mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
6469 mode_lib->ms.NotUrgentLatencyHidingPre,
6470 mode_lib->ms.ReadBandwidthLuma,
6471 mode_lib->ms.ReadBandwidthChroma,
6472 mode_lib->ms.RequiredPrefetchPixelDataBWLuma,
6473 mode_lib->ms.RequiredPrefetchPixelDataBWChroma,
6474 mode_lib->ms.cursor_bw,
6475 mode_lib->ms.meta_row_bandwidth_this_state,
6476 mode_lib->ms.dpte_row_bandwidth_this_state,
6477 mode_lib->ms.cursor_bw_pre,
6478 mode_lib->ms.prefetch_vmrow_bw,
6479 mode_lib->ms.NoOfDPPThisState,
6480 mode_lib->ms.UrgentBurstFactorLuma[j],
6481 mode_lib->ms.UrgentBurstFactorChroma[j],
6482 mode_lib->ms.UrgentBurstFactorCursor[j],
6483 mode_lib->ms.UrgentBurstFactorLumaPre,
6484 mode_lib->ms.UrgentBurstFactorChromaPre,
6485 mode_lib->ms.UrgentBurstFactorCursorPre,
6486
6487 /* output */
6488 &s->dummy_single[0], // dml_float_t *PrefetchBandwidth
6489 &s->dummy_single[1], // dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch
6490 &mode_lib->mp.FractionOfUrgentBandwidth, // dml_float_t *FractionOfUrgentBandwidth
6491 &mode_lib->ms.support.PrefetchSupported[j]);
6492 }
6493
6494 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6495 if (mode_lib->ms.LineTimesForPrefetch[k] < 2.0
6496 || mode_lib->ms.LinesForMetaPTE[k] >= 32.0
6497 || mode_lib->ms.LinesForMetaAndDPTERow[k] >= 16.0
6498 || mode_lib->ms.support.NoTimeForPrefetch[j][k] == true) {
6499 mode_lib->ms.support.PrefetchSupported[j] = false;
6500 }
6501 }
6502
6503 mode_lib->ms.support.DynamicMetadataSupported[j] = true;
6504 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
6505 if (mode_lib->ms.support.NoTimeForDynamicMetadata[j][k] == true) {
6506 mode_lib->ms.support.DynamicMetadataSupported[j] = false;
6507 }
6508 }
6509
6510 mode_lib->ms.support.VRatioInPrefetchSupported[j] = true;
6511 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6512 if (mode_lib->ms.support.NoTimeForPrefetch[j][k] == true ||
6513 mode_lib->ms.VRatioPreY[j][k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ ||
6514 mode_lib->ms.VRatioPreC[j][k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ ||
6515 ((s->MaxVStartup < s->MaximumVStartup[j][k] || mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal == 0) &&
6516 (mode_lib->ms.VRatioPreY[j][k] > __DML_MAX_VRATIO_PRE__ || mode_lib->ms.VRatioPreC[j][k] > __DML_MAX_VRATIO_PRE__))) {
6517 mode_lib->ms.support.VRatioInPrefetchSupported[j] = false;
6518 }
6519 }
6520
6521 s->AnyLinesForVMOrRowTooLarge = false;
6522 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
6523 if (mode_lib->ms.LinesForMetaAndDPTERow[k] >= 16 || mode_lib->ms.LinesForMetaPTE[k] >= 32) {
6524 s->AnyLinesForVMOrRowTooLarge = true;
6525 }
6526 }
6527
6528 if (mode_lib->ms.support.PrefetchSupported[j] == true && mode_lib->ms.support.VRatioInPrefetchSupported[j] == true) {
6529 mode_lib->ms.BandwidthAvailableForImmediateFlip = CalculateBandwidthAvailableForImmediateFlip(
6530 mode_lib->ms.num_active_planes,
6531 mode_lib->ms.ReturnBWPerState[j],
6532 mode_lib->ms.ReadBandwidthLuma,
6533 mode_lib->ms.ReadBandwidthChroma,
6534 mode_lib->ms.RequiredPrefetchPixelDataBWLuma,
6535 mode_lib->ms.RequiredPrefetchPixelDataBWChroma,
6536 mode_lib->ms.cursor_bw,
6537 mode_lib->ms.cursor_bw_pre,
6538 mode_lib->ms.NoOfDPPThisState,
6539 mode_lib->ms.UrgentBurstFactorLuma[j],
6540 mode_lib->ms.UrgentBurstFactorChroma[j],
6541 mode_lib->ms.UrgentBurstFactorCursor[j],
6542 mode_lib->ms.UrgentBurstFactorLumaPre,
6543 mode_lib->ms.UrgentBurstFactorChromaPre,
6544 mode_lib->ms.UrgentBurstFactorCursorPre);
6545
6546 mode_lib->ms.TotImmediateFlipBytes = 0;
6547 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6548 if (!(mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_not_required)) {
6549 mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * (mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k] + mode_lib->ms.MetaRowBytes[j][k]);
6550 if (mode_lib->ms.use_one_row_for_frame_flip[j][k]) {
6551 mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * (2 * mode_lib->ms.DPTEBytesPerRow[j][k]);
6552 } else {
6553 mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * mode_lib->ms.DPTEBytesPerRow[j][k];
6554 }
6555 }
6556 }
6557
6558 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6559 CalculateFlipSchedule(
6560 s->HostVMInefficiencyFactor,
6561 mode_lib->ms.ExtraLatency,
6562 mode_lib->ms.UrgLatency,
6563 mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels,
6564 mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
6565 mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels,
6566 mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
6567 mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
6568 mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k],
6569 mode_lib->ms.MetaRowBytes[j][k],
6570 mode_lib->ms.DPTEBytesPerRow[j][k],
6571 mode_lib->ms.BandwidthAvailableForImmediateFlip,
6572 mode_lib->ms.TotImmediateFlipBytes,
6573 mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
6574 (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]),
6575 mode_lib->ms.cache_display_cfg.plane.VRatio[k],
6576 mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
6577 mode_lib->ms.Tno_bw[k],
6578 mode_lib->ms.cache_display_cfg.surface.DCCEnable[k],
6579 mode_lib->ms.dpte_row_height[k],
6580 mode_lib->ms.meta_row_height[k],
6581 mode_lib->ms.dpte_row_height_chroma[k],
6582 mode_lib->ms.meta_row_height_chroma[k],
6583 mode_lib->ms.use_one_row_for_frame_flip[j][k], // 24
6584
6585 /* Output */
6586 &mode_lib->ms.DestinationLinesToRequestVMInImmediateFlip[k],
6587 &mode_lib->ms.DestinationLinesToRequestRowInImmediateFlip[k],
6588 &mode_lib->ms.final_flip_bw[k],
6589 &mode_lib->ms.ImmediateFlipSupportedForPipe[k]);
6590 }
6591
6592 {
6593 CalculateImmediateFlipBandwithSupport(mode_lib->ms.num_active_planes,
6594 mode_lib->ms.ReturnBWPerState[j],
6595 mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
6596 mode_lib->ms.policy.ImmediateFlipRequirement,
6597 mode_lib->ms.final_flip_bw,
6598 mode_lib->ms.ReadBandwidthLuma,
6599 mode_lib->ms.ReadBandwidthChroma,
6600 mode_lib->ms.RequiredPrefetchPixelDataBWLuma,
6601 mode_lib->ms.RequiredPrefetchPixelDataBWChroma,
6602 mode_lib->ms.cursor_bw,
6603 mode_lib->ms.meta_row_bandwidth_this_state,
6604 mode_lib->ms.dpte_row_bandwidth_this_state,
6605 mode_lib->ms.cursor_bw_pre,
6606 mode_lib->ms.prefetch_vmrow_bw,
6607 mode_lib->ms.NoOfDPP[j], // VBA_ERROR DPPPerSurface is not assigned at this point, should use NoOfDpp here
6608 mode_lib->ms.UrgentBurstFactorLuma[j],
6609 mode_lib->ms.UrgentBurstFactorChroma[j],
6610 mode_lib->ms.UrgentBurstFactorCursor[j],
6611 mode_lib->ms.UrgentBurstFactorLumaPre,
6612 mode_lib->ms.UrgentBurstFactorChromaPre,
6613 mode_lib->ms.UrgentBurstFactorCursorPre,
6614
6615 /* output */
6616 &s->dummy_single[0], // dml_float_t *TotalBandwidth
6617 &s->dummy_single[1], // dml_float_t *TotalBandwidthNotIncludingMALLPrefetch
6618 &s->dummy_single[2], // dml_float_t *FractionOfUrgentBandwidth
6619 &mode_lib->ms.support.ImmediateFlipSupportedForState[j]); // dml_bool_t *ImmediateFlipBandwidthSupport
6620 }
6621
6622 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6623 if (!(mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_not_required) && (mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false))
6624 mode_lib->ms.support.ImmediateFlipSupportedForState[j] = false;
6625 }
6626
6627 } else { // if prefetch not support, assume iflip not supported
6628 mode_lib->ms.support.ImmediateFlipSupportedForState[j] = false;
6629 }
6630
6631 if (s->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || s->AnyLinesForVMOrRowTooLarge == false) {
6632 s->NextMaxVStartup = s->MaxVStartupAllPlanes[j];
6633 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6634 s->NextPrefetchMode[k] = s->NextPrefetchMode[k] + 1;
6635
6636 if (s->NextPrefetchMode[k] <= s->MaxPrefetchMode[k])
6637 s->AllPrefetchModeTested = false;
6638 }
6639 } else {
6640 s->NextMaxVStartup = s->NextMaxVStartup - 1;
6641 }
6642 } while (!((mode_lib->ms.support.PrefetchSupported[j] == true && mode_lib->ms.support.DynamicMetadataSupported[j] == true &&
6643 mode_lib->ms.support.VRatioInPrefetchSupported[j] == true &&
6644 // consider flip support is okay if when there is no hostvm and the user does't require a iflip OR the flip bw is ok
6645 // If there is hostvm, DCN needs to support iflip for invalidation
6646 ((s->ImmediateFlipRequiredFinal) || mode_lib->ms.support.ImmediateFlipSupportedForState[j] == true)) ||
6647 (s->NextMaxVStartup == s->MaxVStartupAllPlanes[j] && s->AllPrefetchModeTested)));
6648
6649 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
6650 mode_lib->ms.use_one_row_for_frame_this_state[k] = mode_lib->ms.use_one_row_for_frame[j][k];
6651 }
6652
6653 s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency;
6654 s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency;
6655 s->mSOCParameters.WritebackLatency = mode_lib->ms.state.writeback_latency_us;
6656 s->mSOCParameters.DRAMClockChangeLatency = mode_lib->ms.state.dram_clock_change_latency_us;
6657 s->mSOCParameters.FCLKChangeLatency = mode_lib->ms.state.fclk_change_latency_us;
6658 s->mSOCParameters.SRExitTime = mode_lib->ms.state.sr_exit_time_us;
6659 s->mSOCParameters.SREnterPlusExitTime = mode_lib->ms.state.sr_enter_plus_exit_time_us;
6660 s->mSOCParameters.SRExitZ8Time = mode_lib->ms.state.sr_exit_z8_time_us;
6661 s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->ms.state.sr_enter_plus_exit_z8_time_us;
6662 s->mSOCParameters.USRRetrainingLatency = mode_lib->ms.state.usr_retraining_latency_us;
6663 s->mSOCParameters.SMNLatency = mode_lib->ms.soc.smn_latency_us;
6664
6665 CalculateWatermarks_params->USRRetrainingRequiredFinal = mode_lib->ms.policy.USRRetrainingRequiredFinal;
6666 CalculateWatermarks_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
6667 CalculateWatermarks_params->PrefetchMode = mode_lib->ms.PrefetchMode;
6668 CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
6669 CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ms.ip.max_line_buffer_lines;
6670 CalculateWatermarks_params->LineBufferSize = mode_lib->ms.ip.line_buffer_size_bits;
6671 CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ms.ip.writeback_interface_buffer_size_kbytes;
6672 CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLKState[j];
6673 CalculateWatermarks_params->ReturnBW = mode_lib->ms.ReturnBWPerState[j];
6674 CalculateWatermarks_params->SynchronizeTimingsFinal = mode_lib->ms.policy.SynchronizeTimingsFinal;
6675 CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal;
6676 CalculateWatermarks_params->DRRDisplay = mode_lib->ms.cache_display_cfg.timing.DRRDisplay;
6677 CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
6678 CalculateWatermarks_params->meta_row_height = mode_lib->ms.meta_row_height;
6679 CalculateWatermarks_params->meta_row_height_chroma = mode_lib->ms.meta_row_height_chroma;
6680 CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters;
6681 CalculateWatermarks_params->WritebackChunkSize = mode_lib->ms.ip.writeback_chunk_size_kbytes;
6682 CalculateWatermarks_params->SOCCLK = mode_lib->ms.state.socclk_mhz;
6683 CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.ProjectedDCFCLKDeepSleep[j];
6684 CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeYThisState;
6685 CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeCThisState;
6686 CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState;
6687 CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState;
6688 CalculateWatermarks_params->LBBitPerPixel = mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel;
6689 CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthYThisState;
6690 CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthCThisState;
6691 CalculateWatermarks_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio;
6692 CalculateWatermarks_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma;
6693 CalculateWatermarks_params->VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps;
6694 CalculateWatermarks_params->VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma;
6695 CalculateWatermarks_params->VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio;
6696 CalculateWatermarks_params->VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma;
6697 CalculateWatermarks_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal;
6698 CalculateWatermarks_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal;
6699 CalculateWatermarks_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive;
6700 CalculateWatermarks_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock;
6701 CalculateWatermarks_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
6702 CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPPThisState;
6703 CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY;
6704 CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC;
6705 CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler;
6706 CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler;
6707 CalculateWatermarks_params->WritebackEnable = mode_lib->ms.cache_display_cfg.writeback.WritebackEnable;
6708 CalculateWatermarks_params->WritebackPixelFormat = mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat;
6709 CalculateWatermarks_params->WritebackDestinationWidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth;
6710 CalculateWatermarks_params->WritebackDestinationHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight;
6711 CalculateWatermarks_params->WritebackSourceHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight;
6712 CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabledThisState;
6713 CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByteThisState;
6714
6715 // Output
6716 CalculateWatermarks_params->Watermark = &s->dummy_watermark; // Watermarks *Watermark
6717 CalculateWatermarks_params->DRAMClockChangeSupport = &mode_lib->ms.support.DRAMClockChangeSupport[j];
6718 CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[]
6719 CalculateWatermarks_params->SubViewportLinesNeededInMALL = &mode_lib->ms.SubViewportLinesNeededInMALL[j]; // dml_uint_t SubViewportLinesNeededInMALL[]
6720 CalculateWatermarks_params->FCLKChangeSupport = &mode_lib->ms.support.FCLKChangeSupport[j];
6721 CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // dml_float_t *MaxActiveFCLKChangeLatencySupported
6722 CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport[j];
6723 CalculateWatermarks_params->ActiveDRAMClockChangeLatencyMargin = mode_lib->ms.support.ActiveDRAMClockChangeLatencyMargin;
6724
6725 CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch,
6726 CalculateWatermarks_params);
6727
6728 } // for j
6729 }
6730
set_vm_row_and_swath_parameters(struct display_mode_lib_st * mode_lib)6731 static noinline_for_stack void set_vm_row_and_swath_parameters(struct display_mode_lib_st *mode_lib)
6732 {
6733 struct CalculateVMRowAndSwath_params_st *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
6734 struct dml_core_mode_support_locals_st *s = &mode_lib->scratch.dml_core_mode_support_locals;
6735
6736 CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
6737 CalculateVMRowAndSwath_params->myPipe = s->SurfParameters;
6738 CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->ms.SurfaceSizeInMALL;
6739 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_luma;
6740 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_chroma;
6741 CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ms.ip.dcc_meta_buffer_size_bytes;
6742 CalculateVMRowAndSwath_params->UseMALLForStaticScreen = mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen;
6743 CalculateVMRowAndSwath_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
6744 CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->ms.soc.mall_allocated_for_dcn_mbytes;
6745 CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->ms.SwathWidthYThisState;
6746 CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->ms.SwathWidthCThisState;
6747 CalculateVMRowAndSwath_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
6748 CalculateVMRowAndSwath_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
6749 CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
6750 CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
6751 CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes;
6752 CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
6753 CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn;
6754 CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode;
6755 CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceededPerState;
6756 CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = mode_lib->ms.DCCMetaBufferSizeNotExceededPerState;
6757 CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = s->dummy_integer_array[0];
6758 CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = s->dummy_integer_array[1];
6759 CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->ms.dpte_row_height;
6760 CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->ms.dpte_row_height_chroma;
6761 CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = s->dummy_integer_array[2]; // VBA_DELTA
6762 CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = s->dummy_integer_array[3]; // VBA_DELTA
6763 CalculateVMRowAndSwath_params->meta_req_width = s->dummy_integer_array[4];
6764 CalculateVMRowAndSwath_params->meta_req_width_chroma = s->dummy_integer_array[5];
6765 CalculateVMRowAndSwath_params->meta_req_height = s->dummy_integer_array[6];
6766 CalculateVMRowAndSwath_params->meta_req_height_chroma = s->dummy_integer_array[7];
6767 CalculateVMRowAndSwath_params->meta_row_width = s->dummy_integer_array[8];
6768 CalculateVMRowAndSwath_params->meta_row_width_chroma = s->dummy_integer_array[9];
6769 CalculateVMRowAndSwath_params->meta_row_height = mode_lib->ms.meta_row_height;
6770 CalculateVMRowAndSwath_params->meta_row_height_chroma = mode_lib->ms.meta_row_height_chroma;
6771 CalculateVMRowAndSwath_params->vm_group_bytes = s->dummy_integer_array[10];
6772 CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
6773 CalculateVMRowAndSwath_params->PixelPTEReqWidthY = s->dummy_integer_array[11];
6774 CalculateVMRowAndSwath_params->PixelPTEReqHeightY = s->dummy_integer_array[12];
6775 CalculateVMRowAndSwath_params->PTERequestSizeY = s->dummy_integer_array[13];
6776 CalculateVMRowAndSwath_params->PixelPTEReqWidthC = s->dummy_integer_array[14];
6777 CalculateVMRowAndSwath_params->PixelPTEReqHeightC = s->dummy_integer_array[15];
6778 CalculateVMRowAndSwath_params->PTERequestSizeC = s->dummy_integer_array[16];
6779 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = s->dummy_integer_array[17];
6780 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = s->dummy_integer_array[18];
6781 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = s->dummy_integer_array[19];
6782 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = s->dummy_integer_array[20];
6783 CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesYThisState;
6784 CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesCThisState;
6785 CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->ms.PrefillY;
6786 CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->ms.PrefillC;
6787 CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->ms.MaxNumSwY;
6788 CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->ms.MaxNumSwC;
6789 CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->ms.meta_row_bandwidth_this_state;
6790 CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->ms.dpte_row_bandwidth_this_state;
6791 CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRowThisState;
6792 CalculateVMRowAndSwath_params->PDEAndMetaPTEBytesFrame = mode_lib->ms.PDEAndMetaPTEBytesPerFrameThisState;
6793 CalculateVMRowAndSwath_params->MetaRowByte = mode_lib->ms.MetaRowBytesThisState;
6794 CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->ms.use_one_row_for_frame_this_state;
6795 CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->ms.use_one_row_for_frame_flip_this_state;
6796 CalculateVMRowAndSwath_params->UsesMALLForStaticScreen = s->dummy_boolean_array[0];
6797 CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = s->dummy_boolean_array[1];
6798 CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = s->dummy_integer_array[21];
6799 }
6800
6801 /// @brief The Mode Support function.
dml_core_mode_support(struct display_mode_lib_st * mode_lib)6802 dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib)
6803 {
6804 struct dml_core_mode_support_locals_st *s = &mode_lib->scratch.dml_core_mode_support_locals;
6805 struct UseMinimumDCFCLK_params_st *UseMinimumDCFCLK_params = &mode_lib->scratch.UseMinimumDCFCLK_params;
6806 struct CalculateSwathAndDETConfiguration_params_st *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
6807 struct CalculateVMRowAndSwath_params_st *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
6808
6809 dml_uint_t j, k, m;
6810
6811 mode_lib->ms.num_active_planes = dml_get_num_active_planes(&mode_lib->ms.cache_display_cfg);
6812 dml_print("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes);
6813
6814 CalculateMaxDETAndMinCompressedBufferSize(
6815 mode_lib->ms.ip.config_return_buffer_size_in_kbytes,
6816 mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes,
6817 mode_lib->ms.ip.rob_buffer_size_kbytes,
6818 mode_lib->ms.ip.max_num_dpp,
6819 mode_lib->ms.policy.NomDETInKByteOverrideEnable, // VBA_DELTA
6820 mode_lib->ms.policy.NomDETInKByteOverrideValue, // VBA_DELTA
6821
6822 /* Output */
6823 &mode_lib->ms.MaxTotalDETInKByte,
6824 &mode_lib->ms.NomDETInKByte,
6825 &mode_lib->ms.MinCompressedBufferSizeInKByte);
6826
6827 PixelClockAdjustmentForProgressiveToInterlaceUnit(&mode_lib->ms.cache_display_cfg, mode_lib->ms.ip.ptoi_supported);
6828
6829
6830 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
6831
6832 /*Scale Ratio, taps Support Check*/
6833 mode_lib->ms.support.ScaleRatioAndTapsSupport = true;
6834 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6835 if (mode_lib->ms.cache_display_cfg.plane.ScalerEnabled[k] == false
6836 && ((mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64
6837 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32
6838 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16
6839 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_16
6840 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_8
6841 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe
6842 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe_alpha)
6843 || mode_lib->ms.cache_display_cfg.plane.HRatio[k] != 1.0
6844 || mode_lib->ms.cache_display_cfg.plane.HTaps[k] != 1.0
6845 || mode_lib->ms.cache_display_cfg.plane.VRatio[k] != 1.0
6846 || mode_lib->ms.cache_display_cfg.plane.VTaps[k] != 1.0)) {
6847 mode_lib->ms.support.ScaleRatioAndTapsSupport = false;
6848 } else if (mode_lib->ms.cache_display_cfg.plane.VTaps[k] < 1.0 || mode_lib->ms.cache_display_cfg.plane.VTaps[k] > 8.0
6849 || mode_lib->ms.cache_display_cfg.plane.HTaps[k] < 1.0 || mode_lib->ms.cache_display_cfg.plane.HTaps[k] > 8.0
6850 || (mode_lib->ms.cache_display_cfg.plane.HTaps[k] > 1.0 && (mode_lib->ms.cache_display_cfg.plane.HTaps[k] % 2) == 1)
6851 || mode_lib->ms.cache_display_cfg.plane.HRatio[k] > mode_lib->ms.ip.max_hscl_ratio
6852 || mode_lib->ms.cache_display_cfg.plane.VRatio[k] > mode_lib->ms.ip.max_vscl_ratio
6853 || mode_lib->ms.cache_display_cfg.plane.HRatio[k] > mode_lib->ms.cache_display_cfg.plane.HTaps[k]
6854 || mode_lib->ms.cache_display_cfg.plane.VRatio[k] > mode_lib->ms.cache_display_cfg.plane.VTaps[k]
6855 || (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64
6856 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32
6857 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16
6858 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_16
6859 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_8
6860 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe
6861 && (mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k] < 1 || mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k] > 8 || mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] < 1 || mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] > 8 ||
6862 (mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] > 1 && mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] % 2 == 1) ||
6863 mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k] > mode_lib->ms.ip.max_hscl_ratio ||
6864 mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k] > mode_lib->ms.ip.max_vscl_ratio ||
6865 mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k] > mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] ||
6866 mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k] > mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k]))) {
6867 mode_lib->ms.support.ScaleRatioAndTapsSupport = false;
6868 }
6869 }
6870
6871 /*Source Format, Pixel Format and Scan Support Check*/
6872 mode_lib->ms.support.SourceFormatPixelAndScanSupport = true;
6873 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6874 if (mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k] == dml_sw_linear && (!(!dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k])) || mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true)) {
6875 mode_lib->ms.support.SourceFormatPixelAndScanSupport = false;
6876 }
6877 }
6878
6879 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6880 CalculateBytePerPixelAndBlockSizes(
6881 mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
6882 mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k],
6883
6884 /* Output */
6885 &mode_lib->ms.BytePerPixelY[k],
6886 &mode_lib->ms.BytePerPixelC[k],
6887 &mode_lib->ms.BytePerPixelInDETY[k],
6888 &mode_lib->ms.BytePerPixelInDETC[k],
6889 &mode_lib->ms.Read256BlockHeightY[k],
6890 &mode_lib->ms.Read256BlockHeightC[k],
6891 &mode_lib->ms.Read256BlockWidthY[k],
6892 &mode_lib->ms.Read256BlockWidthC[k],
6893 &mode_lib->ms.MacroTileHeightY[k],
6894 &mode_lib->ms.MacroTileHeightC[k],
6895 &mode_lib->ms.MacroTileWidthY[k],
6896 &mode_lib->ms.MacroTileWidthC[k]);
6897 }
6898
6899 /*Bandwidth Support Check*/
6900 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6901 if (!dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k])) {
6902 mode_lib->ms.SwathWidthYSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportWidth[k];
6903 mode_lib->ms.SwathWidthCSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma[k];
6904 } else {
6905 mode_lib->ms.SwathWidthYSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k];
6906 mode_lib->ms.SwathWidthCSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k];
6907 }
6908 }
6909 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6910 mode_lib->ms.ReadBandwidthLuma[k] = mode_lib->ms.SwathWidthYSingleDPP[k] * dml_ceil(mode_lib->ms.BytePerPixelInDETY[k], 1.0) / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k];
6911 mode_lib->ms.ReadBandwidthChroma[k] = mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * dml_ceil(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k] / 2.0;
6912 }
6913 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6914 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true
6915 && mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k] == dml_444_64) {
6916 mode_lib->ms.WriteBandwidth[k] = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k]
6917 * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k]
6918 / (mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k]
6919 * mode_lib->ms.cache_display_cfg.timing.HTotal[k]
6920 / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 8.0;
6921 } else if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
6922 mode_lib->ms.WriteBandwidth[k] = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k]
6923 * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k]
6924 / (mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k]
6925 * mode_lib->ms.cache_display_cfg.timing.HTotal[k]
6926 / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 4.0;
6927 } else {
6928 mode_lib->ms.WriteBandwidth[k] = 0.0;
6929 }
6930 }
6931
6932 /*Writeback Latency support check*/
6933 mode_lib->ms.support.WritebackLatencySupport = true;
6934 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6935 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true &&
6936 (mode_lib->ms.WriteBandwidth[k] > mode_lib->ms.ip.writeback_interface_buffer_size_kbytes * 1024 / mode_lib->ms.state.writeback_latency_us)) {
6937 mode_lib->ms.support.WritebackLatencySupport = false;
6938 }
6939 }
6940
6941 /*Writeback Mode Support Check*/
6942 s->TotalNumberOfActiveWriteback = 0;
6943 for (k = 0; k <= (dml_uint_t) mode_lib->ms.num_active_planes - 1; k++) {
6944 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
6945 s->TotalNumberOfActiveWriteback = s->TotalNumberOfActiveWriteback + 1;
6946 }
6947 }
6948
6949 mode_lib->ms.support.EnoughWritebackUnits = 1;
6950 if (s->TotalNumberOfActiveWriteback > (dml_uint_t) mode_lib->ms.ip.max_num_wb) {
6951 mode_lib->ms.support.EnoughWritebackUnits = false;
6952 }
6953
6954 /*Writeback Scale Ratio and Taps Support Check*/
6955 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = true;
6956 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6957 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
6958 if (mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k] > mode_lib->ms.ip.writeback_max_hscl_ratio
6959 || mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k] > mode_lib->ms.ip.writeback_max_vscl_ratio
6960 || mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k] < mode_lib->ms.ip.writeback_min_hscl_ratio
6961 || mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k] < mode_lib->ms.ip.writeback_min_vscl_ratio
6962 || mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k] > (dml_uint_t) mode_lib->ms.ip.writeback_max_hscl_taps
6963 || mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k] > (dml_uint_t) mode_lib->ms.ip.writeback_max_vscl_taps
6964 || mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k] > (dml_uint_t) mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k]
6965 || mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k] > (dml_uint_t) mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k]
6966 || (mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k] > 2.0 && ((mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k] % 2) == 1))) {
6967 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false;
6968 }
6969 if (2.0 * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] * (mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k] - 1) * 57 > mode_lib->ms.ip.writeback_line_buffer_buffer_size) {
6970 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false;
6971 }
6972 }
6973 }
6974
6975 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6976 CalculateSinglePipeDPPCLKAndSCLThroughput(
6977 mode_lib->ms.cache_display_cfg.plane.HRatio[k],
6978 mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k],
6979 mode_lib->ms.cache_display_cfg.plane.VRatio[k],
6980 mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
6981 mode_lib->ms.ip.max_dchub_pscl_bw_pix_per_clk,
6982 mode_lib->ms.ip.max_pscl_lb_bw_pix_per_clk,
6983 mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
6984 mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
6985 mode_lib->ms.cache_display_cfg.plane.HTaps[k],
6986 mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k],
6987 mode_lib->ms.cache_display_cfg.plane.VTaps[k],
6988 mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k],
6989 /* Output */
6990 &mode_lib->ms.PSCL_FACTOR[k],
6991 &mode_lib->ms.PSCL_FACTOR_CHROMA[k],
6992 &mode_lib->ms.MinDPPCLKUsingSingleDPP[k]);
6993 }
6994
6995 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6996 if (mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k] == dml_sw_linear) {
6997 s->MaximumSwathWidthSupportLuma = 8192;
6998 } else if (!dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k]) && mode_lib->ms.BytePerPixelC[k] > 0 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe_alpha) {
6999 s->MaximumSwathWidthSupportLuma = 7680;
7000 } else if (dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k]) && mode_lib->ms.BytePerPixelC[k] > 0 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe_alpha) {
7001 s->MaximumSwathWidthSupportLuma = 4320;
7002 } else if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_rgbe_alpha) {
7003 s->MaximumSwathWidthSupportLuma = 3840;
7004 } else if (dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k]) && mode_lib->ms.BytePerPixelY[k] == 8 && mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) {
7005 s->MaximumSwathWidthSupportLuma = 3072;
7006 } else {
7007 s->MaximumSwathWidthSupportLuma = 6144;
7008 }
7009
7010 if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_8 || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_10 || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_12) {
7011 s->MaximumSwathWidthSupportChroma = (dml_uint_t)(s->MaximumSwathWidthSupportLuma / 2.0);
7012 } else {
7013 s->MaximumSwathWidthSupportChroma = s->MaximumSwathWidthSupportLuma;
7014 }
7015 mode_lib->ms.MaximumSwathWidthInLineBufferLuma = mode_lib->ms.ip.line_buffer_size_bits * dml_max(mode_lib->ms.cache_display_cfg.plane.HRatio[k], 1.0) / mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel[k] /
7016 (mode_lib->ms.cache_display_cfg.plane.VTaps[k] + dml_max(dml_ceil(mode_lib->ms.cache_display_cfg.plane.VRatio[k], 1.0) - 2, 0.0));
7017 if (mode_lib->ms.BytePerPixelC[k] == 0.0) {
7018 mode_lib->ms.MaximumSwathWidthInLineBufferChroma = 0;
7019 } else {
7020 mode_lib->ms.MaximumSwathWidthInLineBufferChroma =
7021 mode_lib->ms.ip.line_buffer_size_bits
7022 * dml_max(mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k], 1.0)
7023 / mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel[k]
7024 / (mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k]
7025 + dml_max(dml_ceil(mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k], 1.0) - 2, 0.0));
7026 }
7027 mode_lib->ms.MaximumSwathWidthLuma[k] = dml_min(s->MaximumSwathWidthSupportLuma, mode_lib->ms.MaximumSwathWidthInLineBufferLuma);
7028 mode_lib->ms.MaximumSwathWidthChroma[k] = dml_min(s->MaximumSwathWidthSupportChroma, mode_lib->ms.MaximumSwathWidthInLineBufferChroma);
7029 }
7030
7031 /*Number Of DSC Slices*/
7032 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7033 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k &&
7034 mode_lib->ms.cache_display_cfg.output.DSCEnable[k] != dml_dsc_disable) {
7035 mode_lib->ms.support.NumberOfDSCSlices[k] = mode_lib->ms.cache_display_cfg.output.DSCSlices[k];
7036
7037 if (mode_lib->ms.support.NumberOfDSCSlices[k] == 0) {
7038 if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 4800) {
7039 mode_lib->ms.support.NumberOfDSCSlices[k] = (dml_uint_t)(dml_ceil(mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 600, 4));
7040 } else if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 2400) {
7041 mode_lib->ms.support.NumberOfDSCSlices[k] = 8;
7042 } else if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 1200) {
7043 mode_lib->ms.support.NumberOfDSCSlices[k] = 4;
7044 } else if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 340) {
7045 mode_lib->ms.support.NumberOfDSCSlices[k] = 2;
7046 } else {
7047 mode_lib->ms.support.NumberOfDSCSlices[k] = 1;
7048 }
7049 }
7050 } else {
7051 mode_lib->ms.support.NumberOfDSCSlices[k] = 1;
7052 }
7053 }
7054
7055 CalculateSwathAndDETConfiguration_params->DETSizeOverride = mode_lib->ms.cache_display_cfg.plane.DETSizeOverride;
7056 CalculateSwathAndDETConfiguration_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
7057 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ms.ip.config_return_buffer_size_in_kbytes;
7058 CalculateSwathAndDETConfiguration_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes;
7059 CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte;
7060 CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte;
7061 CalculateSwathAndDETConfiguration_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes;
7062 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = 1;
7063 CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
7064 CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte;
7065 CalculateSwathAndDETConfiguration_params->UseUnboundedRequestingFinal = mode_lib->ms.policy.UseUnboundedRequesting;
7066 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes;
7067 CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByteFinal = mode_lib->ms.ip.compressed_buffer_segment_size_in_kbytes;
7068 CalculateSwathAndDETConfiguration_params->Output = mode_lib->ms.cache_display_cfg.output.OutputEncoder;
7069 CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.ReadBandwidthLuma;
7070 CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.ReadBandwidthChroma;
7071 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma;
7072 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma;
7073 CalculateSwathAndDETConfiguration_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan;
7074 CalculateSwathAndDETConfiguration_params->ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary;
7075 CalculateSwathAndDETConfiguration_params->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat;
7076 CalculateSwathAndDETConfiguration_params->SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling;
7077 CalculateSwathAndDETConfiguration_params->ViewportWidth = mode_lib->ms.cache_display_cfg.plane.ViewportWidth;
7078 CalculateSwathAndDETConfiguration_params->ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight;
7079 CalculateSwathAndDETConfiguration_params->ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart;
7080 CalculateSwathAndDETConfiguration_params->ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart;
7081 CalculateSwathAndDETConfiguration_params->ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC;
7082 CalculateSwathAndDETConfiguration_params->ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC;
7083 CalculateSwathAndDETConfiguration_params->SurfaceWidthY = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY;
7084 CalculateSwathAndDETConfiguration_params->SurfaceWidthC = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC;
7085 CalculateSwathAndDETConfiguration_params->SurfaceHeightY = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY;
7086 CalculateSwathAndDETConfiguration_params->SurfaceHeightC = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC;
7087 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY;
7088 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC;
7089 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY;
7090 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC;
7091 CalculateSwathAndDETConfiguration_params->ODMMode = s->dummy_odm_mode;
7092 CalculateSwathAndDETConfiguration_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
7093 CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY;
7094 CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC;
7095 CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY;
7096 CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC;
7097 CalculateSwathAndDETConfiguration_params->HActive = mode_lib->ms.cache_display_cfg.timing.HActive;
7098 CalculateSwathAndDETConfiguration_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio;
7099 CalculateSwathAndDETConfiguration_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma;
7100 CalculateSwathAndDETConfiguration_params->DPPPerSurface = s->dummy_integer_array[0];
7101 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_integer_array[1];
7102 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_integer_array[2];
7103 CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_integer_array[3];
7104 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_integer_array[4];
7105 CalculateSwathAndDETConfiguration_params->SwathHeightY = s->dummy_integer_array[5];
7106 CalculateSwathAndDETConfiguration_params->SwathHeightC = s->dummy_integer_array[6];
7107 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = s->dummy_integer_array[7];
7108 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY;
7109 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC;
7110 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &s->dummy_boolean[0];
7111 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[2];
7112 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_zs = &s->dummy_integer[1];
7113 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &s->dummy_integer[0];
7114 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = mode_lib->ms.SingleDPPViewportSizeSupportPerSurface;
7115 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[1];
7116
7117 CalculateSwathAndDETConfiguration(&mode_lib->scratch,
7118 CalculateSwathAndDETConfiguration_params); /* dml_bool_t *ViewportSizeSupport */
7119
7120 s->MPCCombineMethodAsNeededForPStateChangeAndVoltage = false;
7121 s->MPCCombineMethodAsPossible = false;
7122 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7123 if (mode_lib->ms.policy.MPCCombineUse[k] == dml_mpc_as_needed_for_pstate_and_voltage)
7124 s->MPCCombineMethodAsNeededForPStateChangeAndVoltage = true;
7125 if (mode_lib->ms.policy.MPCCombineUse[k] == dml_mpc_as_possible)
7126 s->MPCCombineMethodAsPossible = true;
7127 }
7128 mode_lib->ms.support.MPCCombineMethodIncompatible = s->MPCCombineMethodAsNeededForPStateChangeAndVoltage && s->MPCCombineMethodAsPossible;
7129
7130 for (j = 0; j < 2; j++) {
7131 mode_lib->ms.TotalNumberOfActiveDPP[j] = 0;
7132 mode_lib->ms.support.TotalAvailablePipesSupport[j] = true;
7133
7134 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7135 CalculateODMMode(
7136 mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit,
7137 mode_lib->ms.cache_display_cfg.timing.HActive[k],
7138 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k],
7139 mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
7140 mode_lib->ms.policy.ODMUse[k],
7141 mode_lib->ms.state.dispclk_mhz,
7142 mode_lib->ms.max_state.dispclk_mhz,
7143 false, // DSCEnable
7144 mode_lib->ms.TotalNumberOfActiveDPP[j],
7145 mode_lib->ms.ip.max_num_dpp,
7146 mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
7147 mode_lib->ms.soc.dcn_downspread_percent,
7148 mode_lib->ms.ip.dispclk_ramp_margin_percent,
7149 mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz,
7150 mode_lib->ms.support.NumberOfDSCSlices[k],
7151
7152 /* Output */
7153 &s->TotalAvailablePipesSupportNoDSC,
7154 &s->NumberOfDPPNoDSC,
7155 &s->ODMModeNoDSC,
7156 &s->RequiredDISPCLKPerSurfaceNoDSC);
7157
7158 CalculateODMMode(
7159 mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit,
7160 mode_lib->ms.cache_display_cfg.timing.HActive[k],
7161 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k],
7162 mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
7163 mode_lib->ms.policy.ODMUse[k],
7164 mode_lib->ms.state.dispclk_mhz,
7165 mode_lib->ms.max_state.dispclk_mhz,
7166 true, // DSCEnable
7167 mode_lib->ms.TotalNumberOfActiveDPP[j],
7168 mode_lib->ms.ip.max_num_dpp,
7169 mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
7170 mode_lib->ms.soc.dcn_downspread_percent,
7171 mode_lib->ms.ip.dispclk_ramp_margin_percent,
7172 mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz,
7173 mode_lib->ms.support.NumberOfDSCSlices[k],
7174
7175 /* Output */
7176 &s->TotalAvailablePipesSupportDSC,
7177 &s->NumberOfDPPDSC,
7178 &s->ODMModeDSC,
7179 &s->RequiredDISPCLKPerSurfaceDSC);
7180
7181 CalculateOutputLink(
7182 mode_lib->ms.state.phyclk_mhz,
7183 mode_lib->ms.state.phyclk_d18_mhz,
7184 mode_lib->ms.state.phyclk_d32_mhz,
7185 mode_lib->ms.soc.phy_downspread_percent,
7186 (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k),
7187 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k],
7188 mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
7189 mode_lib->ms.cache_display_cfg.timing.HTotal[k],
7190 mode_lib->ms.cache_display_cfg.timing.HActive[k],
7191 mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k],
7192 mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k],
7193 mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k],
7194 mode_lib->ms.support.NumberOfDSCSlices[k],
7195 mode_lib->ms.cache_display_cfg.output.AudioSampleRate[k],
7196 mode_lib->ms.cache_display_cfg.output.AudioSampleLayout[k],
7197 s->ODMModeNoDSC,
7198 s->ODMModeDSC,
7199 mode_lib->ms.cache_display_cfg.output.DSCEnable[k],
7200 mode_lib->ms.cache_display_cfg.output.OutputLinkDPLanes[k],
7201 mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k],
7202
7203 /* Output */
7204 &mode_lib->ms.RequiresDSC[k],
7205 &mode_lib->ms.RequiresFEC[k],
7206 &mode_lib->ms.OutputBppPerState[k],
7207 &mode_lib->ms.OutputTypePerState[k], // VBA_DELTA, VBA uses a string to represent type and rate, but DML uses enum, don't want to rely on strng
7208 &mode_lib->ms.OutputRatePerState[k],
7209 &mode_lib->ms.RequiredSlots[k]);
7210
7211 if (mode_lib->ms.RequiresDSC[k] == false) {
7212 mode_lib->ms.ODMModePerState[k] = s->ODMModeNoDSC;
7213 mode_lib->ms.RequiredDISPCLKPerSurface[j][k] = s->RequiredDISPCLKPerSurfaceNoDSC;
7214 if (!s->TotalAvailablePipesSupportNoDSC)
7215 mode_lib->ms.support.TotalAvailablePipesSupport[j] = false;
7216 mode_lib->ms.TotalNumberOfActiveDPP[j] = mode_lib->ms.TotalNumberOfActiveDPP[j] + s->NumberOfDPPNoDSC;
7217 } else {
7218 mode_lib->ms.ODMModePerState[k] = s->ODMModeDSC;
7219 mode_lib->ms.RequiredDISPCLKPerSurface[j][k] = s->RequiredDISPCLKPerSurfaceDSC;
7220 if (!s->TotalAvailablePipesSupportDSC)
7221 mode_lib->ms.support.TotalAvailablePipesSupport[j] = false;
7222 mode_lib->ms.TotalNumberOfActiveDPP[j] = mode_lib->ms.TotalNumberOfActiveDPP[j] + s->NumberOfDPPDSC;
7223 }
7224 }
7225
7226 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7227 if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1) {
7228 mode_lib->ms.MPCCombine[j][k] = false;
7229 mode_lib->ms.NoOfDPP[j][k] = 4;
7230 } else if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1) {
7231 mode_lib->ms.MPCCombine[j][k] = false;
7232 mode_lib->ms.NoOfDPP[j][k] = 2;
7233 } else if (mode_lib->ms.policy.MPCCombineUse[k] == dml_mpc_disabled) {
7234 mode_lib->ms.MPCCombine[j][k] = false;
7235 mode_lib->ms.NoOfDPP[j][k] = 1;
7236 } else if (RoundToDFSGranularity(mode_lib->ms.MinDPPCLKUsingSingleDPP[k] * (1 + mode_lib->ms.soc.dcn_downspread_percent / 100),
7237 1, mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz) <= mode_lib->ms.state.dppclk_mhz &&
7238 mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k] == true) {
7239 mode_lib->ms.MPCCombine[j][k] = false;
7240 mode_lib->ms.NoOfDPP[j][k] = 1;
7241 } else if (mode_lib->ms.TotalNumberOfActiveDPP[j] < (dml_uint_t) mode_lib->ms.ip.max_num_dpp) {
7242 mode_lib->ms.MPCCombine[j][k] = true;
7243 mode_lib->ms.NoOfDPP[j][k] = 2;
7244 mode_lib->ms.TotalNumberOfActiveDPP[j] = (dml_uint_t) mode_lib->ms.TotalNumberOfActiveDPP[j] + 1;
7245 } else {
7246 mode_lib->ms.MPCCombine[j][k] = false;
7247 mode_lib->ms.NoOfDPP[j][k] = 1;
7248 mode_lib->ms.support.TotalAvailablePipesSupport[j] = false;
7249 }
7250 }
7251
7252 mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] = 0;
7253 s->NoChromaOrLinear = true;
7254 for (k = 0; k < (dml_uint_t) mode_lib->ms.num_active_planes; ++k) {
7255 if (mode_lib->ms.NoOfDPP[j][k] == 1)
7256 mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] = mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] + 1;
7257 if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_8
7258 || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_10
7259 || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_12
7260 || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_rgbe_alpha
7261 || mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k] == dml_sw_linear) {
7262 s->NoChromaOrLinear = false;
7263 }
7264 }
7265
7266 if (j == 1 && !UnboundedRequest(mode_lib->ms.policy.UseUnboundedRequesting,
7267 mode_lib->ms.TotalNumberOfActiveDPP[j], s->NoChromaOrLinear,
7268 mode_lib->ms.cache_display_cfg.output.OutputEncoder[0])) {
7269 while (!(mode_lib->ms.TotalNumberOfActiveDPP[j] >= (dml_uint_t) mode_lib->ms.ip.max_num_dpp || mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] == 0)) {
7270 s->BWOfNonCombinedSurfaceOfMaximumBandwidth = 0;
7271 s->NumberOfNonCombinedSurfaceOfMaximumBandwidth = 0;
7272 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7273 if (mode_lib->ms.policy.MPCCombineUse[k] != dml_mpc_disabled && mode_lib->ms.policy.MPCCombineUse[k] != dml_mpc_as_needed_for_voltage &&
7274 mode_lib->ms.ReadBandwidthLuma[k] + mode_lib->ms.ReadBandwidthChroma[k] > s->BWOfNonCombinedSurfaceOfMaximumBandwidth &&
7275 (mode_lib->ms.ODMModePerState[k] != dml_odm_mode_combine_2to1 && mode_lib->ms.ODMModePerState[k] != dml_odm_mode_combine_4to1) &&
7276 mode_lib->ms.MPCCombine[j][k] == false) {
7277 s->BWOfNonCombinedSurfaceOfMaximumBandwidth = mode_lib->ms.ReadBandwidthLuma[k] + mode_lib->ms.ReadBandwidthChroma[k];
7278 s->NumberOfNonCombinedSurfaceOfMaximumBandwidth = k;
7279 }
7280 }
7281 mode_lib->ms.MPCCombine[j][s->NumberOfNonCombinedSurfaceOfMaximumBandwidth] = true;
7282 mode_lib->ms.NoOfDPP[j][s->NumberOfNonCombinedSurfaceOfMaximumBandwidth] = 2;
7283 mode_lib->ms.TotalNumberOfActiveDPP[j] = mode_lib->ms.TotalNumberOfActiveDPP[j] + 1;
7284 mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] = mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] - 1;
7285 }
7286 }
7287
7288 //DISPCLK/DPPCLK
7289 mode_lib->ms.WritebackRequiredDISPCLK = 0;
7290 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7291 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k]) {
7292 mode_lib->ms.WritebackRequiredDISPCLK = dml_max(mode_lib->ms.WritebackRequiredDISPCLK,
7293 CalculateWriteBackDISPCLK(mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k],
7294 mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
7295 mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k],
7296 mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k],
7297 mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k],
7298 mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k],
7299 mode_lib->ms.cache_display_cfg.writeback.WritebackSourceWidth[k],
7300 mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k],
7301 mode_lib->ms.cache_display_cfg.timing.HTotal[k],
7302 mode_lib->ms.ip.writeback_line_buffer_buffer_size,
7303 mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz));
7304 }
7305 }
7306
7307 mode_lib->ms.RequiredDISPCLK[j] = mode_lib->ms.WritebackRequiredDISPCLK;
7308 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7309 mode_lib->ms.RequiredDISPCLK[j] = dml_max(mode_lib->ms.RequiredDISPCLK[j], mode_lib->ms.RequiredDISPCLKPerSurface[j][k]);
7310 }
7311
7312 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7313 mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k];
7314 }
7315
7316 CalculateDPPCLK(mode_lib->ms.num_active_planes,
7317 mode_lib->ms.soc.dcn_downspread_percent,
7318 mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz,
7319 mode_lib->ms.MinDPPCLKUsingSingleDPP,
7320 mode_lib->ms.NoOfDPPThisState,
7321 /* Output */
7322 &mode_lib->ms.GlobalDPPCLK,
7323 mode_lib->ms.RequiredDPPCLKThisState);
7324
7325 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7326 mode_lib->ms.RequiredDPPCLKPerSurface[j][k] = mode_lib->ms.RequiredDPPCLKThisState[k];
7327 }
7328
7329 mode_lib->ms.support.DISPCLK_DPPCLK_Support[j] = !((mode_lib->ms.RequiredDISPCLK[j] > mode_lib->ms.state.dispclk_mhz) || (mode_lib->ms.GlobalDPPCLK > mode_lib->ms.state.dppclk_mhz));
7330
7331 if (mode_lib->ms.TotalNumberOfActiveDPP[j] > (dml_uint_t) mode_lib->ms.ip.max_num_dpp) {
7332 mode_lib->ms.support.TotalAvailablePipesSupport[j] = false;
7333 }
7334 } // j
7335
7336 /* Total Available OTG, HDMIFRL, DP Support Check */
7337 s->TotalNumberOfActiveOTG = 0;
7338 s->TotalNumberOfActiveHDMIFRL = 0;
7339 s->TotalNumberOfActiveDP2p0 = 0;
7340 s->TotalNumberOfActiveDP2p0Outputs = 0;
7341
7342 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7343 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
7344 s->TotalNumberOfActiveOTG = s->TotalNumberOfActiveOTG + 1;
7345 if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl)
7346 s->TotalNumberOfActiveHDMIFRL = s->TotalNumberOfActiveHDMIFRL + 1;
7347 if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0) {
7348 s->TotalNumberOfActiveDP2p0 = s->TotalNumberOfActiveDP2p0 + 1;
7349 if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k || mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == false) {
7350 s->TotalNumberOfActiveDP2p0Outputs = s->TotalNumberOfActiveDP2p0Outputs + 1;
7351 }
7352 }
7353 }
7354 }
7355
7356 mode_lib->ms.support.NumberOfOTGSupport = (s->TotalNumberOfActiveOTG <= (dml_uint_t) mode_lib->ms.ip.max_num_otg);
7357 mode_lib->ms.support.NumberOfHDMIFRLSupport = (s->TotalNumberOfActiveHDMIFRL <= (dml_uint_t) mode_lib->ms.ip.max_num_hdmi_frl_outputs);
7358 mode_lib->ms.support.NumberOfDP2p0Support = (s->TotalNumberOfActiveDP2p0 <= (dml_uint_t) mode_lib->ms.ip.max_num_dp2p0_streams && s->TotalNumberOfActiveDP2p0Outputs <= (dml_uint_t) mode_lib->ms.ip.max_num_dp2p0_outputs);
7359
7360 /* Display IO and DSC Support Check */
7361 mode_lib->ms.support.NonsupportedDSCInputBPC = false;
7362 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7363 if (mode_lib->ms.cache_display_cfg.output.OutputDisabled[k] == false &&
7364 !(mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] == 12.0
7365 || mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] == 10.0
7366 || mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] == 8.0
7367 || mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] > (dml_uint_t) mode_lib->ms.ip.maximum_dsc_bits_per_component
7368 )) {
7369 mode_lib->ms.support.NonsupportedDSCInputBPC = true;
7370 }
7371 }
7372
7373 mode_lib->ms.support.ExceededMultistreamSlots = false;
7374 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7375 if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k) {
7376 s->TotalSlots = mode_lib->ms.RequiredSlots[k];
7377 for (j = 0; j < mode_lib->ms.num_active_planes; ++j) {
7378 if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[j] == k)
7379 s->TotalSlots = s->TotalSlots + mode_lib->ms.RequiredSlots[j];
7380 }
7381 if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp && s->TotalSlots > 63)
7382 mode_lib->ms.support.ExceededMultistreamSlots = true;
7383 if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 && s->TotalSlots > 64)
7384 mode_lib->ms.support.ExceededMultistreamSlots = true;
7385 }
7386 }
7387 mode_lib->ms.support.LinkCapacitySupport = true;
7388 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7389 if (mode_lib->ms.cache_display_cfg.output.OutputDisabled[k] == false &&
7390 mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp ||
7391 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) && mode_lib->ms.OutputBppPerState[k] == 0) {
7392 mode_lib->ms.support.LinkCapacitySupport = false;
7393 }
7394 }
7395
7396 mode_lib->ms.support.P2IWith420 = false;
7397 mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = false;
7398 mode_lib->ms.support.DSC422NativeNotSupported = false;
7399 mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = false;
7400 mode_lib->ms.support.LinkRateForMultistreamNotIndicated = false;
7401 mode_lib->ms.support.BPPForMultistreamNotIndicated = false;
7402 mode_lib->ms.support.MultistreamWithHDMIOreDP = false;
7403 mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = false;
7404 mode_lib->ms.support.NotEnoughLanesForMSO = false;
7405
7406 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7407 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp ||
7408 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl)) {
7409 if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_420 && mode_lib->ms.cache_display_cfg.timing.Interlace[k] == 1 && mode_lib->ms.ip.ptoi_supported == true)
7410 mode_lib->ms.support.P2IWith420 = true;
7411
7412 if (mode_lib->ms.cache_display_cfg.output.DSCEnable[k] == dml_dsc_enable_if_necessary && mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k] != 0)
7413 mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = true;
7414 if ((mode_lib->ms.cache_display_cfg.output.DSCEnable[k] == dml_dsc_enable || mode_lib->ms.cache_display_cfg.output.DSCEnable[k] == dml_dsc_enable_if_necessary) && mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_n422 && !mode_lib->ms.ip.dsc422_native_support)
7415 mode_lib->ms.support.DSC422NativeNotSupported = true;
7416
7417 if (((mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_hbr || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_hbr2 || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_hbr3) &&
7418 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_dp && mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_edp) ||
7419 ((mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_uhbr10 || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_uhbr13p5 || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_uhbr20) &&
7420 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_dp2p0))
7421 mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = true;
7422
7423 if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == 1) {
7424 if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k && mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_na)
7425 mode_lib->ms.support.LinkRateForMultistreamNotIndicated = true;
7426 if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k && mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k] == 0)
7427 mode_lib->ms.support.BPPForMultistreamNotIndicated = true;
7428 for (j = 0; j < mode_lib->ms.num_active_planes; ++j) {
7429 if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == j && mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k] == 0)
7430 mode_lib->ms.support.BPPForMultistreamNotIndicated = true;
7431 }
7432 }
7433
7434 if ((mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl)) {
7435 if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == 1 && mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k)
7436 mode_lib->ms.support.MultistreamWithHDMIOreDP = true;
7437 for (j = 0; j < mode_lib->ms.num_active_planes; ++j) {
7438 if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == 1 && mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == j)
7439 mode_lib->ms.support.MultistreamWithHDMIOreDP = true;
7440 }
7441 }
7442 if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_dp && (mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_split_1to2 ||
7443 mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to2 || mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to4))
7444 mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = true;
7445
7446 if ((mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to2 && mode_lib->ms.cache_display_cfg.output.OutputLinkDPLanes[k] < 2) ||
7447 (mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to4 && mode_lib->ms.cache_display_cfg.output.OutputLinkDPLanes[k] < 4))
7448 mode_lib->ms.support.NotEnoughLanesForMSO = true;
7449 }
7450 }
7451
7452 mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = false;
7453 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7454 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k &&
7455 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl &&
7456 RequiredDTBCLK(
7457 mode_lib->ms.RequiresDSC[k],
7458 mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k],
7459 mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
7460 mode_lib->ms.OutputBppPerState[k],
7461 mode_lib->ms.support.NumberOfDSCSlices[k],
7462 mode_lib->ms.cache_display_cfg.timing.HTotal[k],
7463 mode_lib->ms.cache_display_cfg.timing.HActive[k],
7464 mode_lib->ms.cache_display_cfg.output.AudioSampleRate[k],
7465 mode_lib->ms.cache_display_cfg.output.AudioSampleLayout[k]) > mode_lib->ms.state.dtbclk_mhz) {
7466 mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = true;
7467 }
7468 }
7469
7470 mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK = true;
7471 mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK = true;
7472 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7473 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1 && mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi) {
7474 mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK = false;
7475 }
7476 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1 && (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp ||
7477 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi)) {
7478 mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK = false;
7479 }
7480 }
7481
7482 mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = false;
7483 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7484 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
7485 if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp ||
7486 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 ||
7487 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp ||
7488 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) {
7489 if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_420) {
7490 s->DSCFormatFactor = 2;
7491 } else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_444) {
7492 s->DSCFormatFactor = 1;
7493 } else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_n422 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) {
7494 s->DSCFormatFactor = 2;
7495 } else {
7496 s->DSCFormatFactor = 1;
7497 }
7498 #ifdef __DML_VBA_DEBUG__
7499 dml_print("DML::%s: k=%u, RequiresDSC = %u\n", __func__, k, mode_lib->ms.RequiresDSC[k]);
7500 #endif
7501 if (mode_lib->ms.RequiresDSC[k] == true) {
7502 if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1) {
7503 if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 12.0 / (dml_float_t)s->DSCFormatFactor > (1.0 - mode_lib->ms.soc.dcn_downspread_percent / 100.0) * mode_lib->ms.state.dscclk_mhz) {
7504 #ifdef __DML_VBA_DEBUG__
7505 dml_print("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k]);
7506 dml_print("DML::%s: k=%u, DSCCLKPerState = %f\n", __func__, k, mode_lib->ms.state.dscclk_mhz);
7507 dml_print("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor);
7508 #endif
7509 mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true;
7510 }
7511 } else if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1) {
7512 if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 6.0 / (dml_float_t)s->DSCFormatFactor > (1.0 - mode_lib->ms.soc.dcn_downspread_percent / 100.0) * mode_lib->ms.state.dscclk_mhz) {
7513 mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true;
7514 }
7515 } else {
7516 if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 3.0 / (dml_float_t)s->DSCFormatFactor > (1.0 - mode_lib->ms.soc.dcn_downspread_percent / 100.0) * mode_lib->ms.state.dscclk_mhz) {
7517 mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true;
7518 }
7519 }
7520 }
7521 }
7522 }
7523 }
7524 #ifdef __DML_VBA_DEBUG__
7525 dml_print("DML::%s: DSCCLKRequiredMoreThanSupported = %u\n", __func__, mode_lib->ms.support.DSCCLKRequiredMoreThanSupported);
7526 #endif
7527
7528 /* Check DSC Unit and Slices Support */
7529 mode_lib->ms.support.NotEnoughDSCUnits = false;
7530 mode_lib->ms.support.NotEnoughDSCSlices = false;
7531 s->TotalDSCUnitsRequired = 0;
7532 mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = true;
7533 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7534 if (mode_lib->ms.RequiresDSC[k] == true) {
7535 if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1) {
7536 if (mode_lib->ms.cache_display_cfg.timing.HActive[k] > 4 * (dml_uint_t) mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit)
7537 mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false;
7538 s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + 4;
7539 if (mode_lib->ms.support.NumberOfDSCSlices[k] > 16)
7540 mode_lib->ms.support.NotEnoughDSCSlices = true;
7541 } else if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1) {
7542 if (mode_lib->ms.cache_display_cfg.timing.HActive[k] > 2 * (dml_uint_t) mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit)
7543 mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false;
7544 s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + 2;
7545 if (mode_lib->ms.support.NumberOfDSCSlices[k] > 8)
7546 mode_lib->ms.support.NotEnoughDSCSlices = true;
7547 } else {
7548 if (mode_lib->ms.cache_display_cfg.timing.HActive[k] > (dml_uint_t) mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit)
7549 mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false;
7550 s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + 1;
7551 if (mode_lib->ms.support.NumberOfDSCSlices[k] > 4)
7552 mode_lib->ms.support.NotEnoughDSCSlices = true;
7553 }
7554 }
7555 }
7556 if (s->TotalDSCUnitsRequired > (dml_uint_t) mode_lib->ms.ip.num_dsc) {
7557 mode_lib->ms.support.NotEnoughDSCUnits = true;
7558 }
7559
7560 /*DSC Delay per state*/
7561 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7562 mode_lib->ms.DSCDelayPerState[k] = DSCDelayRequirement(mode_lib->ms.RequiresDSC[k],
7563 mode_lib->ms.ODMModePerState[k],
7564 mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k],
7565 mode_lib->ms.OutputBppPerState[k],
7566 mode_lib->ms.cache_display_cfg.timing.HActive[k],
7567 mode_lib->ms.cache_display_cfg.timing.HTotal[k],
7568 mode_lib->ms.support.NumberOfDSCSlices[k],
7569 mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
7570 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k],
7571 mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
7572 mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k]);
7573 }
7574
7575 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7576 for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) {
7577 for (j = 0; j <= mode_lib->ms.num_active_planes - 1; j++) {
7578 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == m && mode_lib->ms.RequiresDSC[m] == true) {
7579 mode_lib->ms.DSCDelayPerState[k] = mode_lib->ms.DSCDelayPerState[m];
7580 }
7581 }
7582 }
7583 }
7584
7585 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
7586 //
7587 for (j = 0; j < 2; ++j) {
7588 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7589 mode_lib->ms.RequiredDPPCLKThisState[k] = mode_lib->ms.RequiredDPPCLKPerSurface[j][k];
7590 mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k];
7591 mode_lib->ms.ODMModeThisState[k] = mode_lib->ms.ODMModePerState[k];
7592 }
7593
7594 CalculateSwathAndDETConfiguration_params->DETSizeOverride = mode_lib->ms.cache_display_cfg.plane.DETSizeOverride;
7595 CalculateSwathAndDETConfiguration_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
7596 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ms.ip.config_return_buffer_size_in_kbytes;
7597 CalculateSwathAndDETConfiguration_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes;
7598 CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte;
7599 CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte;
7600 CalculateSwathAndDETConfiguration_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes;
7601 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false;
7602 CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
7603 CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte;
7604 CalculateSwathAndDETConfiguration_params->UseUnboundedRequestingFinal = mode_lib->ms.policy.UseUnboundedRequesting;
7605 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes;
7606 CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByteFinal = mode_lib->ms.ip.compressed_buffer_segment_size_in_kbytes;
7607 CalculateSwathAndDETConfiguration_params->Output = mode_lib->ms.cache_display_cfg.output.OutputEncoder;
7608 CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.ReadBandwidthLuma;
7609 CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.ReadBandwidthChroma;
7610 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma;
7611 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma;
7612 CalculateSwathAndDETConfiguration_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan;
7613 CalculateSwathAndDETConfiguration_params->ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary;
7614 CalculateSwathAndDETConfiguration_params->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat;
7615 CalculateSwathAndDETConfiguration_params->SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling;
7616 CalculateSwathAndDETConfiguration_params->ViewportWidth = mode_lib->ms.cache_display_cfg.plane.ViewportWidth;
7617 CalculateSwathAndDETConfiguration_params->ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight;
7618 CalculateSwathAndDETConfiguration_params->ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart;
7619 CalculateSwathAndDETConfiguration_params->ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart;
7620 CalculateSwathAndDETConfiguration_params->ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC;
7621 CalculateSwathAndDETConfiguration_params->ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC;
7622 CalculateSwathAndDETConfiguration_params->SurfaceWidthY = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY;
7623 CalculateSwathAndDETConfiguration_params->SurfaceWidthC = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC;
7624 CalculateSwathAndDETConfiguration_params->SurfaceHeightY = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY;
7625 CalculateSwathAndDETConfiguration_params->SurfaceHeightC = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC;
7626 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY;
7627 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC;
7628 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY;
7629 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC;
7630 CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.ODMModeThisState;
7631 CalculateSwathAndDETConfiguration_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
7632 CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY;
7633 CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC;
7634 CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY;
7635 CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC;
7636 CalculateSwathAndDETConfiguration_params->HActive = mode_lib->ms.cache_display_cfg.timing.HActive;
7637 CalculateSwathAndDETConfiguration_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio;
7638 CalculateSwathAndDETConfiguration_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma;
7639 CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.NoOfDPPThisState;
7640 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub_this_state;
7641 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub_this_state;
7642 CalculateSwathAndDETConfiguration_params->SwathWidth = mode_lib->ms.SwathWidthYThisState;
7643 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = mode_lib->ms.SwathWidthCThisState;
7644 CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState;
7645 CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState;
7646 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->ms.DETBufferSizeInKByteThisState;
7647 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeYThisState;
7648 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeCThisState;
7649 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->ms.UnboundedRequestEnabledThisState;
7650 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[2];
7651 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_zs = &s->dummy_integer[1];
7652 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->ms.CompressedBufferSizeInkByteThisState;
7653 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = s->dummy_boolean_array[0];
7654 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &mode_lib->ms.support.ViewportSizeSupport[j];
7655
7656 CalculateSwathAndDETConfiguration(&mode_lib->scratch,
7657 CalculateSwathAndDETConfiguration_params);
7658
7659 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7660 mode_lib->ms.swath_width_luma_ub_all_states[j][k] = mode_lib->ms.swath_width_luma_ub_this_state[k];
7661 mode_lib->ms.swath_width_chroma_ub_all_states[j][k] = mode_lib->ms.swath_width_chroma_ub_this_state[k];
7662 mode_lib->ms.SwathWidthYAllStates[j][k] = mode_lib->ms.SwathWidthYThisState[k];
7663 mode_lib->ms.SwathWidthCAllStates[j][k] = mode_lib->ms.SwathWidthCThisState[k];
7664 mode_lib->ms.SwathHeightYAllStates[j][k] = mode_lib->ms.SwathHeightYThisState[k];
7665 mode_lib->ms.SwathHeightCAllStates[j][k] = mode_lib->ms.SwathHeightCThisState[k];
7666 mode_lib->ms.UnboundedRequestEnabledAllStates[j] = mode_lib->ms.UnboundedRequestEnabledThisState;
7667 mode_lib->ms.CompressedBufferSizeInkByteAllStates[j] = mode_lib->ms.CompressedBufferSizeInkByteThisState;
7668 mode_lib->ms.DETBufferSizeInKByteAllStates[j][k] = mode_lib->ms.DETBufferSizeInKByteThisState[k];
7669 mode_lib->ms.DETBufferSizeYAllStates[j][k] = mode_lib->ms.DETBufferSizeYThisState[k];
7670 mode_lib->ms.DETBufferSizeCAllStates[j][k] = mode_lib->ms.DETBufferSizeCThisState[k];
7671 }
7672 }
7673
7674 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7675 mode_lib->ms.cursor_bw[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k];
7676 }
7677
7678 CalculateSurfaceSizeInMall(
7679 mode_lib->ms.num_active_planes,
7680 mode_lib->ms.soc.mall_allocated_for_dcn_mbytes,
7681 mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen,
7682 mode_lib->ms.cache_display_cfg.surface.DCCEnable,
7683 mode_lib->ms.cache_display_cfg.plane.ViewportStationary,
7684 mode_lib->ms.cache_display_cfg.plane.ViewportXStart,
7685 mode_lib->ms.cache_display_cfg.plane.ViewportYStart,
7686 mode_lib->ms.cache_display_cfg.plane.ViewportXStartC,
7687 mode_lib->ms.cache_display_cfg.plane.ViewportYStartC,
7688 mode_lib->ms.cache_display_cfg.plane.ViewportWidth,
7689 mode_lib->ms.cache_display_cfg.plane.ViewportHeight,
7690 mode_lib->ms.BytePerPixelY,
7691 mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma,
7692 mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma,
7693 mode_lib->ms.BytePerPixelC,
7694 mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY,
7695 mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC,
7696 mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY,
7697 mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC,
7698 mode_lib->ms.Read256BlockWidthY,
7699 mode_lib->ms.Read256BlockWidthC,
7700 mode_lib->ms.Read256BlockHeightY,
7701 mode_lib->ms.Read256BlockHeightC,
7702 mode_lib->ms.MacroTileWidthY,
7703 mode_lib->ms.MacroTileWidthC,
7704 mode_lib->ms.MacroTileHeightY,
7705 mode_lib->ms.MacroTileHeightC,
7706
7707 /* Output */
7708 mode_lib->ms.SurfaceSizeInMALL,
7709 &mode_lib->ms.support.ExceededMALLSize);
7710
7711 for (j = 0; j < 2; j++) {
7712 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7713 mode_lib->ms.swath_width_luma_ub_this_state[k] = mode_lib->ms.swath_width_luma_ub_all_states[j][k];
7714 mode_lib->ms.swath_width_chroma_ub_this_state[k] = mode_lib->ms.swath_width_chroma_ub_all_states[j][k];
7715 mode_lib->ms.SwathWidthYThisState[k] = mode_lib->ms.SwathWidthYAllStates[j][k];
7716 mode_lib->ms.SwathWidthCThisState[k] = mode_lib->ms.SwathWidthCAllStates[j][k];
7717 mode_lib->ms.SwathHeightYThisState[k] = mode_lib->ms.SwathHeightYAllStates[j][k];
7718 mode_lib->ms.SwathHeightCThisState[k] = mode_lib->ms.SwathHeightCAllStates[j][k];
7719 mode_lib->ms.DETBufferSizeInKByteThisState[k] = mode_lib->ms.DETBufferSizeInKByteAllStates[j][k];
7720 mode_lib->ms.DETBufferSizeYThisState[k] = mode_lib->ms.DETBufferSizeYAllStates[j][k];
7721 mode_lib->ms.DETBufferSizeCThisState[k] = mode_lib->ms.DETBufferSizeCAllStates[j][k];
7722 mode_lib->ms.RequiredDPPCLKThisState[k] = mode_lib->ms.RequiredDPPCLKPerSurface[j][k];
7723 mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k];
7724 }
7725
7726 mode_lib->ms.TotalNumberOfDCCActiveDPP[j] = 0;
7727 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7728 if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) {
7729 mode_lib->ms.TotalNumberOfDCCActiveDPP[j] = mode_lib->ms.TotalNumberOfDCCActiveDPP[j] + mode_lib->ms.NoOfDPP[j][k];
7730 }
7731 }
7732
7733 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7734 s->SurfParameters[k].PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k];
7735 s->SurfParameters[k].DPPPerSurface = mode_lib->ms.NoOfDPP[j][k];
7736 s->SurfParameters[k].SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k];
7737 s->SurfParameters[k].ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k];
7738 s->SurfParameters[k].ViewportHeightChroma = mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k];
7739 s->SurfParameters[k].BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k];
7740 s->SurfParameters[k].BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k];
7741 s->SurfParameters[k].BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k];
7742 s->SurfParameters[k].BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k];
7743 s->SurfParameters[k].BlockWidthY = mode_lib->ms.MacroTileWidthY[k];
7744 s->SurfParameters[k].BlockHeightY = mode_lib->ms.MacroTileHeightY[k];
7745 s->SurfParameters[k].BlockWidthC = mode_lib->ms.MacroTileWidthC[k];
7746 s->SurfParameters[k].BlockHeightC = mode_lib->ms.MacroTileHeightC[k];
7747 s->SurfParameters[k].InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k];
7748 s->SurfParameters[k].HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k];
7749 s->SurfParameters[k].DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k];
7750 s->SurfParameters[k].SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k];
7751 s->SurfParameters[k].SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k];
7752 s->SurfParameters[k].BytePerPixelY = mode_lib->ms.BytePerPixelY[k];
7753 s->SurfParameters[k].BytePerPixelC = mode_lib->ms.BytePerPixelC[k];
7754 s->SurfParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
7755 s->SurfParameters[k].VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio[k];
7756 s->SurfParameters[k].VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k];
7757 s->SurfParameters[k].VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps[k];
7758 s->SurfParameters[k].VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k];
7759 s->SurfParameters[k].PitchY = mode_lib->ms.cache_display_cfg.surface.PitchY[k];
7760 s->SurfParameters[k].DCCMetaPitchY = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k];
7761 s->SurfParameters[k].PitchC = mode_lib->ms.cache_display_cfg.surface.PitchC[k];
7762 s->SurfParameters[k].DCCMetaPitchC = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k];
7763 s->SurfParameters[k].ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary[k];
7764 s->SurfParameters[k].ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart[k];
7765 s->SurfParameters[k].ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart[k];
7766 s->SurfParameters[k].ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC[k];
7767 s->SurfParameters[k].ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC[k];
7768 s->SurfParameters[k].FORCE_ONE_ROW_FOR_FRAME = mode_lib->ms.cache_display_cfg.plane.ForceOneRowForFrame[k];
7769 s->SurfParameters[k].SwathHeightY = mode_lib->ms.SwathHeightYThisState[k];
7770 s->SurfParameters[k].SwathHeightC = mode_lib->ms.SwathHeightCThisState[k];
7771 }
7772
7773 set_vm_row_and_swath_parameters(mode_lib);
7774
7775 CalculateVMRowAndSwath(&mode_lib->scratch,
7776 CalculateVMRowAndSwath_params);
7777
7778 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7779 mode_lib->ms.PrefetchLinesY[j][k] = mode_lib->ms.PrefetchLinesYThisState[k];
7780 mode_lib->ms.PrefetchLinesC[j][k] = mode_lib->ms.PrefetchLinesCThisState[k];
7781 mode_lib->ms.meta_row_bandwidth[j][k] = mode_lib->ms.meta_row_bandwidth_this_state[k];
7782 mode_lib->ms.dpte_row_bandwidth[j][k] = mode_lib->ms.dpte_row_bandwidth_this_state[k];
7783 mode_lib->ms.DPTEBytesPerRow[j][k] = mode_lib->ms.DPTEBytesPerRowThisState[k];
7784 mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k] = mode_lib->ms.PDEAndMetaPTEBytesPerFrameThisState[k];
7785 mode_lib->ms.MetaRowBytes[j][k] = mode_lib->ms.MetaRowBytesThisState[k];
7786 mode_lib->ms.use_one_row_for_frame[j][k] = mode_lib->ms.use_one_row_for_frame_this_state[k];
7787 mode_lib->ms.use_one_row_for_frame_flip[j][k] = mode_lib->ms.use_one_row_for_frame_flip_this_state[k];
7788 }
7789
7790 mode_lib->ms.support.PTEBufferSizeNotExceeded[j] = true;
7791
7792 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7793 if (mode_lib->ms.PTEBufferSizeNotExceededPerState[k] == false)
7794 mode_lib->ms.support.PTEBufferSizeNotExceeded[j] = false;
7795 #ifdef __DML_VBA_DEBUG__
7796 dml_print("DML::%s: j=%u k=%u, PTEBufferSizeNotExceededPerState[%u] = %u\n", __func__, j, k, k, mode_lib->ms.PTEBufferSizeNotExceededPerState[k]);
7797 #endif
7798 }
7799 #ifdef __DML_VBA_DEBUG__
7800 dml_print("DML::%s: PTEBufferSizeNotExceeded[%u] = %u\n", __func__, j, mode_lib->ms.support.PTEBufferSizeNotExceeded[j]);
7801 #endif
7802
7803 mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] = true;
7804 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7805 if (mode_lib->ms.DCCMetaBufferSizeNotExceededPerState[k] == false)
7806 mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] = false;
7807 }
7808
7809 mode_lib->ms.UrgLatency = CalculateUrgentLatency(mode_lib->ms.state.urgent_latency_pixel_data_only_us,
7810 mode_lib->ms.state.urgent_latency_pixel_mixed_with_vm_data_us,
7811 mode_lib->ms.state.urgent_latency_vm_data_only_us,
7812 mode_lib->ms.soc.do_urgent_latency_adjustment,
7813 mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_component_us,
7814 mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_reference_mhz,
7815 mode_lib->ms.state.fabricclk_mhz);
7816
7817 /* Getter functions work at mp interface so copy the urgent latency to mp*/
7818 mode_lib->mp.UrgentLatency = mode_lib->ms.UrgLatency;
7819
7820 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7821 CalculateUrgentBurstFactor(
7822 mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
7823 mode_lib->ms.swath_width_luma_ub_this_state[k],
7824 mode_lib->ms.swath_width_chroma_ub_this_state[k],
7825 mode_lib->ms.SwathHeightYThisState[k],
7826 mode_lib->ms.SwathHeightCThisState[k],
7827 (dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
7828 mode_lib->ms.UrgLatency,
7829 mode_lib->ms.ip.cursor_buffer_size,
7830 mode_lib->ms.cache_display_cfg.plane.CursorWidth[k],
7831 mode_lib->ms.cache_display_cfg.plane.CursorBPP[k],
7832 mode_lib->ms.cache_display_cfg.plane.VRatio[k],
7833 mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
7834 mode_lib->ms.BytePerPixelInDETY[k],
7835 mode_lib->ms.BytePerPixelInDETC[k],
7836 mode_lib->ms.DETBufferSizeYThisState[k],
7837 mode_lib->ms.DETBufferSizeCThisState[k],
7838 /* Output */
7839 &mode_lib->ms.UrgentBurstFactorCursor[j][k],
7840 &mode_lib->ms.UrgentBurstFactorLuma[j][k],
7841 &mode_lib->ms.UrgentBurstFactorChroma[j][k],
7842 &mode_lib->ms.NotUrgentLatencyHiding[k]);
7843 }
7844
7845 CalculateDCFCLKDeepSleep(
7846 mode_lib->ms.num_active_planes,
7847 mode_lib->ms.BytePerPixelY,
7848 mode_lib->ms.BytePerPixelC,
7849 mode_lib->ms.cache_display_cfg.plane.VRatio,
7850 mode_lib->ms.cache_display_cfg.plane.VRatioChroma,
7851 mode_lib->ms.SwathWidthYThisState,
7852 mode_lib->ms.SwathWidthCThisState,
7853 mode_lib->ms.NoOfDPPThisState,
7854 mode_lib->ms.cache_display_cfg.plane.HRatio,
7855 mode_lib->ms.cache_display_cfg.plane.HRatioChroma,
7856 mode_lib->ms.cache_display_cfg.timing.PixelClock,
7857 mode_lib->ms.PSCL_FACTOR,
7858 mode_lib->ms.PSCL_FACTOR_CHROMA,
7859 mode_lib->ms.RequiredDPPCLKThisState,
7860 mode_lib->ms.ReadBandwidthLuma,
7861 mode_lib->ms.ReadBandwidthChroma,
7862 mode_lib->ms.soc.return_bus_width_bytes,
7863
7864 /* Output */
7865 &mode_lib->ms.ProjectedDCFCLKDeepSleep[j]);
7866 }
7867
7868 //Calculate Return BW
7869 for (j = 0; j < 2; ++j) {
7870 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7871 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
7872 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
7873 mode_lib->ms.WritebackDelayTime[k] = mode_lib->ms.state.writeback_latency_us + CalculateWriteBackDelay(
7874 mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k],
7875 mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k],
7876 mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k],
7877 mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k],
7878 mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k],
7879 mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k],
7880 mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k],
7881 mode_lib->ms.cache_display_cfg.timing.HTotal[k]) / mode_lib->ms.RequiredDISPCLK[j];
7882 } else {
7883 mode_lib->ms.WritebackDelayTime[k] = 0.0;
7884 }
7885 for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) {
7886 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[m] == k && mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[m] == true) {
7887 mode_lib->ms.WritebackDelayTime[k] = dml_max(mode_lib->ms.WritebackDelayTime[k],
7888 mode_lib->ms.state.writeback_latency_us + CalculateWriteBackDelay(
7889 mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[m],
7890 mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[m],
7891 mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[m],
7892 mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[m],
7893 mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[m],
7894 mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[m],
7895 mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[m],
7896 mode_lib->ms.cache_display_cfg.timing.HTotal[m]) / mode_lib->ms.RequiredDISPCLK[j]);
7897 }
7898 }
7899 }
7900 }
7901 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7902 for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) {
7903 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == m) {
7904 mode_lib->ms.WritebackDelayTime[k] = mode_lib->ms.WritebackDelayTime[m];
7905 }
7906 }
7907 }
7908 s->MaxVStartupAllPlanes[j] = 0; // max vstartup among all planes
7909
7910 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7911 s->MaximumVStartup[j][k] = CalculateMaxVStartup(k,
7912 mode_lib->ms.ip.ptoi_supported,
7913 mode_lib->ms.ip.vblank_nom_default_us,
7914 &mode_lib->ms.cache_display_cfg.timing,
7915 mode_lib->ms.WritebackDelayTime[k]);
7916
7917 s->MaxVStartupAllPlanes[j] = (dml_uint_t)(dml_max(s->MaxVStartupAllPlanes[j], s->MaximumVStartup[j][k]));
7918 #ifdef __DML_VBA_DEBUG__
7919 dml_print("DML::%s: k=%u, MaxVStartupAllPlanes[%u] = %u\n", __func__, k, j, s->MaxVStartupAllPlanes[j]);
7920 dml_print("DML::%s: k=%u, MaximumVStartup[%u][%u] = %u\n", __func__, k, j, k, s->MaximumVStartup[j][k]);
7921 #endif
7922 }
7923 }
7924
7925 s->ReorderingBytes = (dml_uint_t)(mode_lib->ms.soc.num_chans * dml_max3(mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_only_bytes,
7926 mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes,
7927 mode_lib->ms.soc.urgent_out_of_order_return_per_channel_vm_only_bytes));
7928
7929 for (j = 0; j < 2; ++j) {
7930 mode_lib->ms.DCFCLKState[j] = mode_lib->ms.state.dcfclk_mhz;
7931 }
7932
7933 /* Immediate Flip and MALL parameters */
7934 s->ImmediateFlipRequiredFinal = false;
7935 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7936 s->ImmediateFlipRequiredFinal = s->ImmediateFlipRequiredFinal || (mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_required);
7937 }
7938
7939 mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = false;
7940 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7941 mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified ||
7942 ((mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_required) &&
7943 (mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required));
7944 }
7945 mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified && s->ImmediateFlipRequiredFinal;
7946
7947 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = false;
7948 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7949 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe =
7950 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe || ((mode_lib->ms.cache_display_cfg.plane.HostVMEnable == true || mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required) &&
7951 (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame || mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe));
7952 }
7953
7954 mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = false;
7955 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7956 mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen ||
7957 ((mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_enable || mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_optimize) && (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe)) ||
7958 ((mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_disable || mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_optimize) && (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame));
7959 }
7960
7961 s->FullFrameMALLPStateMethod = false;
7962 s->SubViewportMALLPStateMethod = false;
7963 s->PhantomPipeMALLPStateMethod = false;
7964 s->SubViewportMALLRefreshGreaterThan120Hz = false;
7965 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7966 if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame)
7967 s->FullFrameMALLPStateMethod = true;
7968 if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport) {
7969 s->SubViewportMALLPStateMethod = true;
7970 if (mode_lib->ms.cache_display_cfg.timing.RefreshRate[k] > 120)
7971 s->SubViewportMALLRefreshGreaterThan120Hz = true;
7972 }
7973 if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe)
7974 s->PhantomPipeMALLPStateMethod = true;
7975 }
7976 mode_lib->ms.support.InvalidCombinationOfMALLUseForPState = (s->SubViewportMALLPStateMethod != s->PhantomPipeMALLPStateMethod)
7977 || (s->SubViewportMALLPStateMethod && s->FullFrameMALLPStateMethod) || s->SubViewportMALLRefreshGreaterThan120Hz;
7978
7979 if (mode_lib->ms.policy.UseMinimumRequiredDCFCLK == true) {
7980 UseMinimumDCFCLK_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
7981 UseMinimumDCFCLK_params->DRRDisplay = mode_lib->ms.cache_display_cfg.timing.DRRDisplay;
7982 UseMinimumDCFCLK_params->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal;
7983 UseMinimumDCFCLK_params->MaxInterDCNTileRepeaters = mode_lib->ms.ip.max_inter_dcn_tile_repeaters;
7984 UseMinimumDCFCLK_params->MaxPrefetchMode = dml_prefetch_support_stutter;
7985 UseMinimumDCFCLK_params->DRAMClockChangeLatencyFinal = mode_lib->ms.state.dram_clock_change_latency_us;
7986 UseMinimumDCFCLK_params->FCLKChangeLatency = mode_lib->ms.state.fclk_change_latency_us;
7987 UseMinimumDCFCLK_params->SREnterPlusExitTime = mode_lib->ms.state.sr_enter_plus_exit_time_us;
7988 UseMinimumDCFCLK_params->ReturnBusWidth = mode_lib->ms.soc.return_bus_width_bytes;
7989 UseMinimumDCFCLK_params->RoundTripPingLatencyCycles = mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles;
7990 UseMinimumDCFCLK_params->ReorderingBytes = s->ReorderingBytes;
7991 UseMinimumDCFCLK_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes;
7992 UseMinimumDCFCLK_params->MetaChunkSize = mode_lib->ms.ip.meta_chunk_size_kbytes;
7993 UseMinimumDCFCLK_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
7994 UseMinimumDCFCLK_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
7995 UseMinimumDCFCLK_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
7996 UseMinimumDCFCLK_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
7997 UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
7998 UseMinimumDCFCLK_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
7999 UseMinimumDCFCLK_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled;
8000 UseMinimumDCFCLK_params->ImmediateFlipRequirement = s->ImmediateFlipRequiredFinal;
8001 UseMinimumDCFCLK_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
8002 UseMinimumDCFCLK_params->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation = mode_lib->ms.soc.max_avg_sdp_bw_use_normal_percent;
8003 UseMinimumDCFCLK_params->PercentOfIdealSDPPortBWReceivedAfterUrgLatency = mode_lib->ms.soc.pct_ideal_sdp_bw_after_urgent;
8004 UseMinimumDCFCLK_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal;
8005 UseMinimumDCFCLK_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive;
8006 UseMinimumDCFCLK_params->DynamicMetadataTransmittedBytes = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes;
8007 UseMinimumDCFCLK_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired;
8008 UseMinimumDCFCLK_params->Interlace = mode_lib->ms.cache_display_cfg.timing.Interlace;
8009 UseMinimumDCFCLK_params->RequiredDPPCLKPerSurface = mode_lib->ms.RequiredDPPCLKPerSurface;
8010 UseMinimumDCFCLK_params->RequiredDISPCLK = mode_lib->ms.RequiredDISPCLK;
8011 UseMinimumDCFCLK_params->UrgLatency = mode_lib->ms.UrgLatency;
8012 UseMinimumDCFCLK_params->NoOfDPP = mode_lib->ms.NoOfDPP;
8013 UseMinimumDCFCLK_params->ProjectedDCFCLKDeepSleep = mode_lib->ms.ProjectedDCFCLKDeepSleep;
8014 UseMinimumDCFCLK_params->MaximumVStartup = s->MaximumVStartup;
8015 UseMinimumDCFCLK_params->TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP;
8016 UseMinimumDCFCLK_params->TotalNumberOfDCCActiveDPP = mode_lib->ms.TotalNumberOfDCCActiveDPP;
8017 UseMinimumDCFCLK_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
8018 UseMinimumDCFCLK_params->PrefetchLinesY = mode_lib->ms.PrefetchLinesY;
8019 UseMinimumDCFCLK_params->PrefetchLinesC = mode_lib->ms.PrefetchLinesC;
8020 UseMinimumDCFCLK_params->swath_width_luma_ub_all_states = mode_lib->ms.swath_width_luma_ub_all_states;
8021 UseMinimumDCFCLK_params->swath_width_chroma_ub_all_states = mode_lib->ms.swath_width_chroma_ub_all_states;
8022 UseMinimumDCFCLK_params->BytePerPixelY = mode_lib->ms.BytePerPixelY;
8023 UseMinimumDCFCLK_params->BytePerPixelC = mode_lib->ms.BytePerPixelC;
8024 UseMinimumDCFCLK_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal;
8025 UseMinimumDCFCLK_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock;
8026 UseMinimumDCFCLK_params->PDEAndMetaPTEBytesPerFrame = mode_lib->ms.PDEAndMetaPTEBytesPerFrame;
8027 UseMinimumDCFCLK_params->DPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow;
8028 UseMinimumDCFCLK_params->MetaRowBytes = mode_lib->ms.MetaRowBytes;
8029 UseMinimumDCFCLK_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable;
8030 UseMinimumDCFCLK_params->ReadBandwidthLuma = mode_lib->ms.ReadBandwidthLuma;
8031 UseMinimumDCFCLK_params->ReadBandwidthChroma = mode_lib->ms.ReadBandwidthChroma;
8032 UseMinimumDCFCLK_params->DCFCLKPerState = mode_lib->ms.state.dcfclk_mhz;
8033 UseMinimumDCFCLK_params->DCFCLKState = mode_lib->ms.DCFCLKState;
8034
8035 UseMinimumDCFCLK(&mode_lib->scratch,
8036 UseMinimumDCFCLK_params);
8037
8038 } // UseMinimumRequiredDCFCLK == true
8039
8040 for (j = 0; j < 2; ++j) {
8041 mode_lib->ms.ReturnBWPerState[j] = dml_get_return_bw_mbps(&mode_lib->ms.soc, mode_lib->ms.state.use_ideal_dram_bw_strobe,
8042 mode_lib->ms.cache_display_cfg.plane.HostVMEnable, mode_lib->ms.DCFCLKState[j], mode_lib->ms.state.fabricclk_mhz,
8043 mode_lib->ms.state.dram_speed_mts);
8044 mode_lib->ms.ReturnDRAMBWPerState[j] = dml_get_return_dram_bw_mbps(&mode_lib->ms.soc, mode_lib->ms.state.use_ideal_dram_bw_strobe,
8045 mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
8046 mode_lib->ms.state.dram_speed_mts);
8047 }
8048
8049 //Re-ordering Buffer Support Check
8050 for (j = 0; j < 2; ++j) {
8051 if ((mode_lib->ms.ip.rob_buffer_size_kbytes - mode_lib->ms.ip.pixel_chunk_size_kbytes) * 1024 / mode_lib->ms.ReturnBWPerState[j] >
8052 (mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles + 32) / mode_lib->ms.DCFCLKState[j] + s->ReorderingBytes / mode_lib->ms.ReturnBWPerState[j]) {
8053 mode_lib->ms.support.ROBSupport[j] = true;
8054 } else {
8055 mode_lib->ms.support.ROBSupport[j] = false;
8056 }
8057 dml_print("DML::%s: DEBUG ROBSupport[%u] = %u (%u)\n", __func__, j, mode_lib->ms.support.ROBSupport[j], __LINE__);
8058 }
8059
8060 //Vertical Active BW support check
8061 s->MaxTotalVActiveRDBandwidth = 0;
8062 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8063 s->MaxTotalVActiveRDBandwidth = s->MaxTotalVActiveRDBandwidth + mode_lib->ms.ReadBandwidthLuma[k] + mode_lib->ms.ReadBandwidthChroma[k];
8064 }
8065
8066 for (j = 0; j < 2; ++j) {
8067 mode_lib->ms.support.MaxTotalVerticalActiveAvailableBandwidth[j] = dml_min3(mode_lib->ms.soc.return_bus_width_bytes * mode_lib->ms.DCFCLKState[j] * mode_lib->ms.soc.max_avg_sdp_bw_use_normal_percent / 100.0,
8068 mode_lib->ms.state.fabricclk_mhz * mode_lib->ms.soc.fabric_datapath_to_dcn_data_return_bytes * mode_lib->ms.soc.max_avg_fabric_bw_use_normal_percent / 100.0,
8069 mode_lib->ms.state.dram_speed_mts * mode_lib->ms.soc.num_chans * mode_lib->ms.soc.dram_channel_width_bytes *
8070 ((mode_lib->ms.state.use_ideal_dram_bw_strobe && !mode_lib->ms.cache_display_cfg.plane.HostVMEnable) ?
8071 mode_lib->ms.soc.max_avg_dram_bw_use_normal_strobe_percent : mode_lib->ms.soc.max_avg_dram_bw_use_normal_percent) / 100.0);
8072
8073 if (s->MaxTotalVActiveRDBandwidth <= mode_lib->ms.support.MaxTotalVerticalActiveAvailableBandwidth[j]) {
8074 mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] = true;
8075 } else {
8076 mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] = false;
8077 }
8078 }
8079
8080 /* Prefetch Check */
8081 dml_prefetch_check(mode_lib);
8082
8083 // End of Prefetch Check
8084 dml_print("DML::%s: Done prefetch calculation\n", __func__);
8085
8086 /*Cursor Support Check*/
8087 mode_lib->ms.support.CursorSupport = true;
8088 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8089 if (mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] > 0.0) {
8090 if (mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] == 64 && mode_lib->ms.ip.cursor_64bpp_support == false) {
8091 mode_lib->ms.support.CursorSupport = false;
8092 }
8093 }
8094 }
8095
8096 /*Valid Pitch Check*/
8097 mode_lib->ms.support.PitchSupport = true;
8098 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8099 mode_lib->ms.support.AlignedYPitch[k] = dml_ceil(
8100 dml_max(mode_lib->ms.cache_display_cfg.surface.PitchY[k], mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k]),
8101 mode_lib->ms.MacroTileWidthY[k]);
8102 if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) {
8103 mode_lib->ms.support.AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k], mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k]), 64.0 * mode_lib->ms.Read256BlockWidthY[k]);
8104 } else {
8105 mode_lib->ms.support.AlignedDCCMetaPitchY[k] = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k];
8106 }
8107 if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64
8108 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32
8109 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16
8110 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_16
8111 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe
8112 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_8) {
8113 mode_lib->ms.support.AlignedCPitch[k] = dml_ceil(dml_max(mode_lib->ms.cache_display_cfg.surface.PitchC[k], mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k]), mode_lib->ms.MacroTileWidthC[k]);
8114 if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) {
8115 mode_lib->ms.support.AlignedDCCMetaPitchC[k] = dml_ceil(dml_max(mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k], mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k]), 64.0 * mode_lib->ms.Read256BlockWidthC[k]);
8116 } else {
8117 mode_lib->ms.support.AlignedDCCMetaPitchC[k] = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k];
8118 }
8119 } else {
8120 mode_lib->ms.support.AlignedCPitch[k] = mode_lib->ms.cache_display_cfg.surface.PitchC[k];
8121 mode_lib->ms.support.AlignedDCCMetaPitchC[k] = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k];
8122 }
8123 if (mode_lib->ms.support.AlignedYPitch[k] > mode_lib->ms.cache_display_cfg.surface.PitchY[k] || mode_lib->ms.support.AlignedCPitch[k] > mode_lib->ms.cache_display_cfg.surface.PitchC[k] ||
8124 mode_lib->ms.support.AlignedDCCMetaPitchY[k] > mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k] || mode_lib->ms.support.AlignedDCCMetaPitchC[k] > mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k]) {
8125 mode_lib->ms.support.PitchSupport = false;
8126 }
8127 }
8128
8129 mode_lib->ms.support.ViewportExceedsSurface = false;
8130 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8131 if (mode_lib->ms.cache_display_cfg.plane.ViewportWidth[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k] || mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY[k]) {
8132 mode_lib->ms.support.ViewportExceedsSurface = true;
8133 if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32 &&
8134 mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_8 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe) {
8135 if (mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k] || mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC[k]) {
8136 mode_lib->ms.support.ViewportExceedsSurface = true;
8137 }
8138 }
8139 }
8140 }
8141
8142 /*Mode Support, Voltage State and SOC Configuration*/
8143 for (j = 0; j < 2; j++) { // j iterator is for the combine mode off or on
8144 dml_print("DML::%s: checking support for j=%u\n", __func__, j);
8145 dml_print("DML::%s: state_idx=%0d max_state_idx=%0d\n", __func__, mode_lib->ms.state_idx, mode_lib->ms.max_state_idx);
8146
8147 s->is_max_pwr_state = (mode_lib->ms.max_state_idx == mode_lib->ms.state_idx);
8148 s->is_max_dram_pwr_state = (mode_lib->ms.max_state.dram_speed_mts == mode_lib->ms.state.dram_speed_mts);
8149
8150 s->dram_clock_change_support = (!mode_lib->ms.policy.DRAMClockChangeRequirementFinal ||
8151 (s->is_max_dram_pwr_state && mode_lib->policy.AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported) ||
8152 mode_lib->ms.support.DRAMClockChangeSupport[j] != dml_dram_clock_change_unsupported);
8153 s->f_clock_change_support = (!mode_lib->ms.policy.FCLKChangeRequirementFinal ||
8154 (s->is_max_pwr_state && mode_lib->policy.AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported) ||
8155 mode_lib->ms.support.FCLKChangeSupport[j] != dml_fclock_change_unsupported);
8156
8157 if (mode_lib->ms.support.ScaleRatioAndTapsSupport == true
8158 && mode_lib->ms.support.SourceFormatPixelAndScanSupport == true
8159 && mode_lib->ms.support.ViewportSizeSupport[j] == true
8160 && !mode_lib->ms.support.LinkRateDoesNotMatchDPVersion
8161 && !mode_lib->ms.support.LinkRateForMultistreamNotIndicated
8162 && !mode_lib->ms.support.BPPForMultistreamNotIndicated
8163 && !mode_lib->ms.support.MultistreamWithHDMIOreDP
8164 && !mode_lib->ms.support.ExceededMultistreamSlots
8165 && !mode_lib->ms.support.MSOOrODMSplitWithNonDPLink
8166 && !mode_lib->ms.support.NotEnoughLanesForMSO
8167 && mode_lib->ms.support.LinkCapacitySupport == true
8168 && !mode_lib->ms.support.P2IWith420
8169 && !mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP
8170 && !mode_lib->ms.support.DSC422NativeNotSupported
8171 && !mode_lib->ms.support.MPCCombineMethodIncompatible
8172 && mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK == true
8173 && mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK == true
8174 && mode_lib->ms.support.NotEnoughDSCUnits == false
8175 && !mode_lib->ms.support.NotEnoughDSCSlices
8176 && !mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe
8177 && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen
8178 && mode_lib->ms.support.DSCCLKRequiredMoreThanSupported == false
8179 && mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport
8180 && mode_lib->ms.support.DTBCLKRequiredMoreThanSupported == false
8181 && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPState
8182 && !mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified
8183 && mode_lib->ms.support.ROBSupport[j] == true
8184 && mode_lib->ms.support.DISPCLK_DPPCLK_Support[j] == true
8185 && mode_lib->ms.support.TotalAvailablePipesSupport[j] == true
8186 && mode_lib->ms.support.NumberOfOTGSupport == true
8187 && mode_lib->ms.support.NumberOfHDMIFRLSupport == true
8188 && mode_lib->ms.support.NumberOfDP2p0Support == true
8189 && mode_lib->ms.support.EnoughWritebackUnits == true
8190 && mode_lib->ms.support.WritebackLatencySupport == true
8191 && mode_lib->ms.support.WritebackScaleRatioAndTapsSupport == true
8192 && mode_lib->ms.support.CursorSupport == true
8193 && mode_lib->ms.support.PitchSupport == true
8194 && mode_lib->ms.support.ViewportExceedsSurface == false
8195 && mode_lib->ms.support.PrefetchSupported[j] == true
8196 && mode_lib->ms.support.VActiveBandwithSupport[j] == true
8197 && mode_lib->ms.support.DynamicMetadataSupported[j] == true
8198 && mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] == true
8199 && mode_lib->ms.support.VRatioInPrefetchSupported[j] == true
8200 && mode_lib->ms.support.PTEBufferSizeNotExceeded[j] == true
8201 && mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] == true
8202 && mode_lib->ms.support.NonsupportedDSCInputBPC == false
8203 && !mode_lib->ms.support.ExceededMALLSize
8204 && ((mode_lib->ms.cache_display_cfg.plane.HostVMEnable == false && !s->ImmediateFlipRequiredFinal) || mode_lib->ms.support.ImmediateFlipSupportedForState[j])
8205 && s->dram_clock_change_support == true
8206 && s->f_clock_change_support == true
8207 && (!mode_lib->ms.policy.USRRetrainingRequiredFinal || mode_lib->ms.support.USRRetrainingSupport[j])) {
8208 dml_print("DML::%s: mode is supported\n", __func__);
8209 mode_lib->ms.support.ModeSupport[j] = true;
8210 } else {
8211 dml_print("DML::%s: mode is NOT supported\n", __func__);
8212 mode_lib->ms.support.ModeSupport[j] = false;
8213 dml_print_mode_support(mode_lib, j);
8214 }
8215 }
8216
8217 mode_lib->ms.support.MaximumMPCCombine = 0;
8218 mode_lib->ms.support.ModeIsSupported = 0;
8219 if (mode_lib->ms.support.ModeSupport[0] == true || mode_lib->ms.support.ModeSupport[1] == true) { // if the mode is supported by either no combine or mpccombine
8220 mode_lib->ms.support.ModeIsSupported = mode_lib->ms.support.ModeSupport[0] == true || mode_lib->ms.support.ModeSupport[1] == true;
8221
8222 // Determine if MPC combine is necessary, depends on if using MPC combine will help dram clock change or fclk change, etc.
8223 if ((mode_lib->ms.support.ModeSupport[0] == false && mode_lib->ms.support.ModeSupport[1] == true) || s->MPCCombineMethodAsPossible ||
8224 (s->MPCCombineMethodAsNeededForPStateChangeAndVoltage && mode_lib->ms.policy.DRAMClockChangeRequirementFinal &&
8225 (((mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vactive || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vactive_w_mall_full_frame || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vactive_w_mall_sub_vp) &&
8226 !(mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive || mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive_w_mall_full_frame || mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive_w_mall_sub_vp)) ||
8227 ((mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_drr
8228 || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_w_mall_full_frame || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_drr_w_mall_full_frame
8229 || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_w_mall_sub_vp || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_drr_w_mall_sub_vp
8230 ) &&
8231 mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_unsupported)))
8232 || (s->MPCCombineMethodAsNeededForPStateChangeAndVoltage && mode_lib->ms.policy.FCLKChangeRequirementFinal &&
8233 ((mode_lib->ms.support.FCLKChangeSupport[1] == dml_fclock_change_vactive && mode_lib->ms.support.FCLKChangeSupport[0] != dml_fclock_change_vactive) ||
8234 (mode_lib->ms.support.FCLKChangeSupport[1] == dml_fclock_change_vblank && mode_lib->ms.support.FCLKChangeSupport[0] == dml_fclock_change_unsupported)))) {
8235 mode_lib->ms.support.MaximumMPCCombine = 1;
8236 } else {
8237 mode_lib->ms.support.MaximumMPCCombine = 0;
8238 }
8239 }
8240
8241 // Since now the mode_support work on 1 particular power state, so there is only 1 state idx (index 0).
8242 mode_lib->ms.support.ImmediateFlipSupport = mode_lib->ms.support.ImmediateFlipSupportedForState[mode_lib->ms.support.MaximumMPCCombine]; // Consider flip support if max combine support imm flip
8243 mode_lib->ms.support.UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabledAllStates[mode_lib->ms.support.MaximumMPCCombine]; // Not used, informational
8244 mode_lib->ms.support.CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByteAllStates[mode_lib->ms.support.MaximumMPCCombine]; // Not used, informational
8245
8246 dml_print("DML::%s: ModeIsSupported = %u\n", __func__, mode_lib->ms.support.ModeIsSupported);
8247 dml_print("DML::%s: MaximumMPCCombine = %u\n", __func__, mode_lib->ms.support.MaximumMPCCombine);
8248 dml_print("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport);
8249 dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, mode_lib->ms.support.UnboundedRequestEnabled);
8250 dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, mode_lib->ms.support.CompressedBufferSizeInkByte);
8251
8252 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8253 mode_lib->ms.support.MPCCombineEnable[k] = mode_lib->ms.MPCCombine[mode_lib->ms.support.MaximumMPCCombine][k];
8254 mode_lib->ms.support.DPPPerSurface[k] = mode_lib->ms.NoOfDPP[mode_lib->ms.support.MaximumMPCCombine][k];
8255 mode_lib->ms.SwathHeightY[k] = mode_lib->ms.SwathHeightYAllStates[mode_lib->ms.support.MaximumMPCCombine][k];
8256 mode_lib->ms.SwathHeightC[k] = mode_lib->ms.SwathHeightCAllStates[mode_lib->ms.support.MaximumMPCCombine][k];
8257 mode_lib->ms.DETBufferSizeInKByte[k] = mode_lib->ms.DETBufferSizeInKByteAllStates[mode_lib->ms.support.MaximumMPCCombine][k];
8258 mode_lib->ms.DETBufferSizeY[k] = mode_lib->ms.DETBufferSizeYAllStates[mode_lib->ms.support.MaximumMPCCombine][k];
8259 mode_lib->ms.DETBufferSizeC[k] = mode_lib->ms.DETBufferSizeCAllStates[mode_lib->ms.support.MaximumMPCCombine][k];
8260 }
8261
8262 mode_lib->ms.DRAMSpeed = mode_lib->ms.state.dram_speed_mts;
8263 mode_lib->ms.FabricClock = mode_lib->ms.state.fabricclk_mhz;
8264 mode_lib->ms.SOCCLK = mode_lib->ms.state.socclk_mhz;
8265 mode_lib->ms.DCFCLK = mode_lib->ms.DCFCLKState[mode_lib->ms.support.MaximumMPCCombine];
8266 mode_lib->ms.ReturnBW = mode_lib->ms.ReturnBWPerState[mode_lib->ms.support.MaximumMPCCombine];
8267 mode_lib->ms.ReturnDRAMBW = mode_lib->ms.ReturnDRAMBWPerState[mode_lib->ms.support.MaximumMPCCombine];
8268
8269 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8270 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
8271 mode_lib->ms.support.ODMMode[k] = mode_lib->ms.ODMModePerState[k];
8272 } else {
8273 mode_lib->ms.support.ODMMode[k] = dml_odm_mode_bypass;
8274 }
8275
8276 mode_lib->ms.support.DSCEnabled[k] = mode_lib->ms.RequiresDSC[k];
8277 mode_lib->ms.support.FECEnabled[k] = mode_lib->ms.RequiresFEC[k];
8278 mode_lib->ms.support.OutputBpp[k] = mode_lib->ms.OutputBppPerState[k];
8279 mode_lib->ms.support.OutputType[k] = mode_lib->ms.OutputTypePerState[k];
8280 mode_lib->ms.support.OutputRate[k] = mode_lib->ms.OutputRatePerState[k];
8281 mode_lib->ms.support.SubViewportLinesNeededInMALL[k] = mode_lib->ms.SubViewportLinesNeededInMALL[k];
8282 }
8283
8284 return mode_lib->ms.support.ModeIsSupported;
8285 } // dml_core_mode_support
8286
8287 /// @brief This function calculates some parameters thats are needed ahead of the mode programming function all
dml_core_mode_support_partial(struct display_mode_lib_st * mode_lib)8288 void dml_core_mode_support_partial(struct display_mode_lib_st *mode_lib)
8289 {
8290 CalculateMaxDETAndMinCompressedBufferSize(
8291 mode_lib->ms.ip.config_return_buffer_size_in_kbytes,
8292 mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes,
8293 mode_lib->ms.ip.rob_buffer_size_kbytes,
8294 mode_lib->ms.ip.max_num_dpp,
8295 mode_lib->ms.policy.NomDETInKByteOverrideEnable,
8296 mode_lib->ms.policy.NomDETInKByteOverrideValue,
8297
8298 /* Output */
8299 &mode_lib->ms.MaxTotalDETInKByte,
8300 &mode_lib->ms.NomDETInKByte,
8301 &mode_lib->ms.MinCompressedBufferSizeInKByte);
8302
8303 PixelClockAdjustmentForProgressiveToInterlaceUnit(&mode_lib->ms.cache_display_cfg, mode_lib->ms.ip.ptoi_supported);
8304
8305 mode_lib->ms.ReturnBW = dml_get_return_bw_mbps(&mode_lib->ms.soc,
8306 mode_lib->ms.state.use_ideal_dram_bw_strobe,
8307 mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
8308 mode_lib->ms.DCFCLK,
8309 mode_lib->ms.FabricClock,
8310 mode_lib->ms.DRAMSpeed);
8311 dml_print("DML::%s: ReturnBW = %f\n", __func__, mode_lib->ms.ReturnBW);
8312
8313 } // dml_core_mode_support_partial
8314
8315 /// @brief This is the mode programming function. It is assumed the display cfg is support at the given power state
dml_core_mode_programming(struct display_mode_lib_st * mode_lib,const struct dml_clk_cfg_st * clk_cfg)8316 void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struct dml_clk_cfg_st *clk_cfg)
8317 {
8318 struct dml_core_mode_programming_locals_st *s = &mode_lib->scratch.dml_core_mode_programming_locals;
8319 struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
8320 struct CalculateVMRowAndSwath_params_st *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
8321 struct CalculateSwathAndDETConfiguration_params_st *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
8322 struct CalculateStutterEfficiency_params_st *CalculateStutterEfficiency_params = &mode_lib->scratch.CalculateStutterEfficiency_params;
8323 struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
8324
8325 struct mode_program_st *locals = &mode_lib->mp;
8326 struct DmlPipe *myPipe;
8327 dml_uint_t j = 0, k = 0;
8328 dml_float_t TWait;
8329 dml_bool_t isInterlaceTiming;
8330
8331 mode_lib->ms.num_active_planes = dml_get_num_active_planes(&mode_lib->ms.cache_display_cfg);
8332 mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(&mode_lib->ms.cache_display_cfg);
8333 dml_calc_pipe_plane_mapping(&mode_lib->ms.cache_display_cfg.hw, mode_lib->mp.pipe_plane);
8334
8335 #ifdef __DML_VBA_DEBUG__
8336 dml_print("DML::%s: --- START --- \n", __func__);
8337 dml_print("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes);
8338 dml_print("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes);
8339 #endif
8340
8341 s->DSCFormatFactor = 0;
8342
8343 // Unlike dppclk and dispclk which can be calculated in mode_programming
8344 // DCFCLK is calculated in mode_support (which is the state bbox dcfclk or min dcfclk if min dcfclk option is used in mode support calculation)
8345 if (clk_cfg->dcfclk_option != dml_use_override_freq)
8346 locals->Dcfclk = mode_lib->ms.DCFCLK;
8347 else
8348 locals->Dcfclk = clk_cfg->dcfclk_mhz;
8349
8350 #ifdef __DML_VBA_DEBUG__
8351 dml_print_dml_policy(&mode_lib->ms.policy);
8352 dml_print_soc_state_bounding_box(&mode_lib->ms.state);
8353 dml_print_soc_bounding_box(&mode_lib->ms.soc);
8354 dml_print_clk_cfg(clk_cfg);
8355
8356 dml_print("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport);
8357 dml_print("DML::%s: Using DCFCLK = %f\n", __func__, locals->Dcfclk);
8358 dml_print("DML::%s: Using SOCCLK = %f\n", __func__, mode_lib->ms.SOCCLK);
8359 #endif
8360
8361 locals->WritebackDISPCLK = 0.0;
8362 locals->GlobalDPPCLK = 0.0;
8363
8364 // DISPCLK and DPPCLK Calculation
8365 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8366 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k]) {
8367 locals->WritebackDISPCLK =
8368 dml_max(
8369 locals->WritebackDISPCLK,
8370 CalculateWriteBackDISPCLK(
8371 mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k],
8372 mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
8373 mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k],
8374 mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k],
8375 mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k],
8376 mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k],
8377 mode_lib->ms.cache_display_cfg.writeback.WritebackSourceWidth[k],
8378 mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k],
8379 mode_lib->ms.cache_display_cfg.timing.HTotal[k],
8380 mode_lib->ms.ip.writeback_line_buffer_buffer_size,
8381 mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz));
8382 }
8383 }
8384
8385 locals->Dispclk_calculated = locals->WritebackDISPCLK;
8386
8387 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8388 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
8389 locals->Dispclk_calculated = dml_max(locals->Dispclk_calculated, CalculateRequiredDispclk(
8390 mode_lib->ms.cache_display_cfg.hw.ODMMode[k],
8391 mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
8392 mode_lib->ms.soc.dcn_downspread_percent,
8393 mode_lib->ms.ip.dispclk_ramp_margin_percent,
8394 mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz,
8395 mode_lib->ms.max_state.dispclk_mhz));
8396 }
8397 }
8398 if (clk_cfg->dispclk_option == dml_use_required_freq)
8399 locals->Dispclk = locals->Dispclk_calculated;
8400 else if (clk_cfg->dispclk_option == dml_use_override_freq)
8401 locals->Dispclk = clk_cfg->dispclk_mhz;
8402 else
8403 locals->Dispclk = mode_lib->ms.state.dispclk_mhz;
8404 #ifdef __DML_VBA_DEBUG__
8405 dml_print("DML::%s: Using Dispclk = %f\n", __func__, locals->Dispclk);
8406 #endif
8407
8408 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8409 CalculateSinglePipeDPPCLKAndSCLThroughput(
8410 mode_lib->ms.cache_display_cfg.plane.HRatio[k],
8411 mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k],
8412 mode_lib->ms.cache_display_cfg.plane.VRatio[k],
8413 mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
8414 mode_lib->ms.ip.max_dchub_pscl_bw_pix_per_clk,
8415 mode_lib->ms.ip.max_pscl_lb_bw_pix_per_clk,
8416 mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
8417 mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
8418 mode_lib->ms.cache_display_cfg.plane.HTaps[k],
8419 mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k],
8420 mode_lib->ms.cache_display_cfg.plane.VTaps[k],
8421 mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k],
8422
8423 /* Output */
8424 &locals->PSCL_THROUGHPUT[k],
8425 &locals->PSCL_THROUGHPUT_CHROMA[k],
8426 &locals->DPPCLKUsingSingleDPP[k]);
8427 }
8428
8429 CalculateDPPCLK(mode_lib->ms.num_active_planes,
8430 mode_lib->ms.soc.dcn_downspread_percent,
8431 mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz,
8432 locals->DPPCLKUsingSingleDPP,
8433 mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
8434 /* Output */
8435 &locals->GlobalDPPCLK,
8436 locals->Dppclk_calculated);
8437
8438 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8439 if (clk_cfg->dppclk_option[k] == dml_use_required_freq)
8440 locals->Dppclk[k] = locals->Dppclk_calculated[k];
8441 else if (clk_cfg->dppclk_option[k] == dml_use_override_freq)
8442 locals->Dppclk[k] = clk_cfg->dppclk_mhz[k];
8443 else
8444 locals->Dppclk[k] = mode_lib->ms.state.dppclk_mhz;
8445 #ifdef __DML_VBA_DEBUG__
8446 dml_print("DML::%s: Using Dppclk[%0d] = %f\n", __func__, k, locals->Dppclk[k]);
8447 #endif
8448 }
8449
8450 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8451 CalculateBytePerPixelAndBlockSizes(
8452 mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
8453 mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k],
8454
8455 /* Output */
8456 &locals->BytePerPixelY[k],
8457 &locals->BytePerPixelC[k],
8458 &locals->BytePerPixelDETY[k],
8459 &locals->BytePerPixelDETC[k],
8460 &locals->BlockHeight256BytesY[k],
8461 &locals->BlockHeight256BytesC[k],
8462 &locals->BlockWidth256BytesY[k],
8463 &locals->BlockWidth256BytesC[k],
8464 &locals->BlockHeightY[k],
8465 &locals->BlockHeightC[k],
8466 &locals->BlockWidthY[k],
8467 &locals->BlockWidthC[k]);
8468 }
8469
8470
8471 dml_print("DML::%s: %u\n", __func__, __LINE__);
8472 CalculateSwathWidth(
8473 false, // ForceSingleDPP
8474 mode_lib->ms.num_active_planes,
8475 mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat,
8476 mode_lib->ms.cache_display_cfg.plane.SourceScan,
8477 mode_lib->ms.cache_display_cfg.plane.ViewportStationary,
8478 mode_lib->ms.cache_display_cfg.plane.ViewportWidth,
8479 mode_lib->ms.cache_display_cfg.plane.ViewportHeight,
8480 mode_lib->ms.cache_display_cfg.plane.ViewportXStart,
8481 mode_lib->ms.cache_display_cfg.plane.ViewportYStart,
8482 mode_lib->ms.cache_display_cfg.plane.ViewportXStartC,
8483 mode_lib->ms.cache_display_cfg.plane.ViewportYStartC,
8484 mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY,
8485 mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC,
8486 mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY,
8487 mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC,
8488 mode_lib->ms.cache_display_cfg.hw.ODMMode,
8489 locals->BytePerPixelY,
8490 locals->BytePerPixelC,
8491 locals->BlockHeight256BytesY,
8492 locals->BlockHeight256BytesC,
8493 locals->BlockWidth256BytesY,
8494 locals->BlockWidth256BytesC,
8495 mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming,
8496 mode_lib->ms.cache_display_cfg.timing.HActive,
8497 mode_lib->ms.cache_display_cfg.plane.HRatio,
8498 mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
8499
8500 /* Output */
8501 locals->SwathWidthSingleDPPY,
8502 locals->SwathWidthSingleDPPC,
8503 locals->SwathWidthY,
8504 locals->SwathWidthC,
8505 s->dummy_integer_array[0], // dml_uint_t MaximumSwathHeightY[]
8506 s->dummy_integer_array[1], // dml_uint_t MaximumSwathHeightC[]
8507 locals->swath_width_luma_ub,
8508 locals->swath_width_chroma_ub);
8509
8510 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8511 locals->ReadBandwidthSurfaceLuma[k] = locals->SwathWidthSingleDPPY[k] * locals->BytePerPixelY[k] / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k];
8512 locals->ReadBandwidthSurfaceChroma[k] = locals->SwathWidthSingleDPPC[k] * locals->BytePerPixelC[k] / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k];
8513 dml_print("DML::%s: ReadBandwidthSurfaceLuma[%i] = %fBps\n", __func__, k, locals->ReadBandwidthSurfaceLuma[k]);
8514 dml_print("DML::%s: ReadBandwidthSurfaceChroma[%i] = %fBps\n", __func__, k, locals->ReadBandwidthSurfaceChroma[k]);
8515 }
8516
8517 CalculateSwathAndDETConfiguration_params->DETSizeOverride = mode_lib->ms.cache_display_cfg.plane.DETSizeOverride;
8518 CalculateSwathAndDETConfiguration_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
8519 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ms.ip.config_return_buffer_size_in_kbytes;
8520 CalculateSwathAndDETConfiguration_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes;
8521 CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte;
8522 CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte;
8523 CalculateSwathAndDETConfiguration_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes;
8524 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false;
8525 CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
8526 CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte;
8527 CalculateSwathAndDETConfiguration_params->UseUnboundedRequestingFinal = mode_lib->ms.policy.UseUnboundedRequesting;
8528 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes;
8529 CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByteFinal = mode_lib->ms.ip.compressed_buffer_segment_size_in_kbytes;
8530 CalculateSwathAndDETConfiguration_params->Output = s->dummy_output_encoder_array;
8531 CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = locals->ReadBandwidthSurfaceLuma;
8532 CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = locals->ReadBandwidthSurfaceChroma;
8533 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = s->dummy_single_array[0];
8534 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = s->dummy_single_array[1];
8535 CalculateSwathAndDETConfiguration_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan;
8536 CalculateSwathAndDETConfiguration_params->ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary;
8537 CalculateSwathAndDETConfiguration_params->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat;
8538 CalculateSwathAndDETConfiguration_params->SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling;
8539 CalculateSwathAndDETConfiguration_params->ViewportWidth = mode_lib->ms.cache_display_cfg.plane.ViewportWidth;
8540 CalculateSwathAndDETConfiguration_params->ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight;
8541 CalculateSwathAndDETConfiguration_params->ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart;
8542 CalculateSwathAndDETConfiguration_params->ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart;
8543 CalculateSwathAndDETConfiguration_params->ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC;
8544 CalculateSwathAndDETConfiguration_params->ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC;
8545 CalculateSwathAndDETConfiguration_params->SurfaceWidthY = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY;
8546 CalculateSwathAndDETConfiguration_params->SurfaceWidthC = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC;
8547 CalculateSwathAndDETConfiguration_params->SurfaceHeightY = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY;
8548 CalculateSwathAndDETConfiguration_params->SurfaceHeightC = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC;
8549 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = locals->BlockHeight256BytesY;
8550 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = locals->BlockHeight256BytesC;
8551 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = locals->BlockWidth256BytesY;
8552 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = locals->BlockWidth256BytesC;
8553 CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.cache_display_cfg.hw.ODMMode;
8554 CalculateSwathAndDETConfiguration_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
8555 CalculateSwathAndDETConfiguration_params->BytePerPixY = locals->BytePerPixelY;
8556 CalculateSwathAndDETConfiguration_params->BytePerPixC = locals->BytePerPixelC;
8557 CalculateSwathAndDETConfiguration_params->BytePerPixDETY = locals->BytePerPixelDETY;
8558 CalculateSwathAndDETConfiguration_params->BytePerPixDETC = locals->BytePerPixelDETC;
8559 CalculateSwathAndDETConfiguration_params->HActive = mode_lib->ms.cache_display_cfg.timing.HActive;
8560 CalculateSwathAndDETConfiguration_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio;
8561 CalculateSwathAndDETConfiguration_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma;
8562 CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface;
8563 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_long_array[0];
8564 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_long_array[1];
8565 CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_long_array[2];
8566 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_long_array[3];
8567 CalculateSwathAndDETConfiguration_params->SwathHeightY = locals->SwathHeightY;
8568 CalculateSwathAndDETConfiguration_params->SwathHeightC = locals->SwathHeightC;
8569 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = locals->DETBufferSizeInKByte;
8570 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = locals->DETBufferSizeY;
8571 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = locals->DETBufferSizeC;
8572 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &locals->UnboundedRequestEnabled;
8573 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &locals->compbuf_reserved_space_64b;
8574 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_zs = &locals->compbuf_reserved_space_zs;
8575 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &locals->CompressedBufferSizeInkByte;
8576 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = &s->dummy_boolean_array[0][0];
8577 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[0];
8578
8579 // VBA_DELTA
8580 // Calculate DET size, swath height here. In VBA, they are calculated in mode check stage
8581 CalculateSwathAndDETConfiguration(&mode_lib->scratch,
8582 CalculateSwathAndDETConfiguration_params);
8583
8584 // DCFCLK Deep Sleep
8585 CalculateDCFCLKDeepSleep(
8586 mode_lib->ms.num_active_planes,
8587 locals->BytePerPixelY,
8588 locals->BytePerPixelC,
8589 mode_lib->ms.cache_display_cfg.plane.VRatio,
8590 mode_lib->ms.cache_display_cfg.plane.VRatioChroma,
8591 locals->SwathWidthY,
8592 locals->SwathWidthC,
8593 mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
8594 mode_lib->ms.cache_display_cfg.plane.HRatio,
8595 mode_lib->ms.cache_display_cfg.plane.HRatioChroma,
8596 mode_lib->ms.cache_display_cfg.timing.PixelClock,
8597 locals->PSCL_THROUGHPUT,
8598 locals->PSCL_THROUGHPUT_CHROMA,
8599 locals->Dppclk,
8600 locals->ReadBandwidthSurfaceLuma,
8601 locals->ReadBandwidthSurfaceChroma,
8602 mode_lib->ms.soc.return_bus_width_bytes,
8603
8604 /* Output */
8605 &locals->DCFCLKDeepSleep);
8606
8607 // DSCCLK
8608 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8609 if ((mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] != k) || !mode_lib->ms.cache_display_cfg.hw.DSCEnabled[k]) {
8610 locals->DSCCLK_calculated[k] = 0.0;
8611 } else {
8612 if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_420)
8613 s->DSCFormatFactor = 2;
8614 else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_444)
8615 s->DSCFormatFactor = 1;
8616 else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_n422 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl)
8617 s->DSCFormatFactor = 2;
8618 else
8619 s->DSCFormatFactor = 1;
8620 if (mode_lib->ms.cache_display_cfg.hw.ODMMode[k] == dml_odm_mode_combine_4to1)
8621 locals->DSCCLK_calculated[k] = mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 12 / s->DSCFormatFactor / (1 - mode_lib->ms.soc.dcn_downspread_percent / 100);
8622 else if (mode_lib->ms.cache_display_cfg.hw.ODMMode[k] == dml_odm_mode_combine_2to1)
8623 locals->DSCCLK_calculated[k] = mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 6 / s->DSCFormatFactor / (1 - mode_lib->ms.soc.dcn_downspread_percent / 100);
8624 else
8625 locals->DSCCLK_calculated[k] = mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 3 / s->DSCFormatFactor / (1 - mode_lib->ms.soc.dcn_downspread_percent / 100);
8626 }
8627 }
8628
8629 // DSC Delay
8630 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8631 locals->DSCDelay[k] = DSCDelayRequirement(mode_lib->ms.cache_display_cfg.hw.DSCEnabled[k],
8632 mode_lib->ms.cache_display_cfg.hw.ODMMode[k],
8633 mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k],
8634 mode_lib->ms.cache_display_cfg.output.OutputBpp[k],
8635 mode_lib->ms.cache_display_cfg.timing.HActive[k],
8636 mode_lib->ms.cache_display_cfg.timing.HTotal[k],
8637 mode_lib->ms.cache_display_cfg.hw.NumberOfDSCSlices[k],
8638 mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
8639 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k],
8640 mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
8641 mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k]);
8642 }
8643
8644 for (k = 0; k < mode_lib->ms.num_active_planes; ++k)
8645 for (j = 0; j < mode_lib->ms.num_active_planes; ++j) // NumberOfSurfaces
8646 if (j != k && mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == j && mode_lib->ms.cache_display_cfg.hw.DSCEnabled[j])
8647 locals->DSCDelay[k] = locals->DSCDelay[j];
8648
8649 // Prefetch
8650 CalculateSurfaceSizeInMall(
8651 mode_lib->ms.num_active_planes,
8652 mode_lib->ms.soc.mall_allocated_for_dcn_mbytes,
8653 mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen,
8654 mode_lib->ms.cache_display_cfg.surface.DCCEnable,
8655 mode_lib->ms.cache_display_cfg.plane.ViewportStationary,
8656 mode_lib->ms.cache_display_cfg.plane.ViewportXStart,
8657 mode_lib->ms.cache_display_cfg.plane.ViewportYStart,
8658 mode_lib->ms.cache_display_cfg.plane.ViewportXStartC,
8659 mode_lib->ms.cache_display_cfg.plane.ViewportYStartC,
8660 mode_lib->ms.cache_display_cfg.plane.ViewportWidth,
8661 mode_lib->ms.cache_display_cfg.plane.ViewportHeight,
8662 locals->BytePerPixelY,
8663 mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma,
8664 mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma,
8665 locals->BytePerPixelC,
8666 mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY,
8667 mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC,
8668 mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY,
8669 mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC,
8670 locals->BlockWidth256BytesY,
8671 locals->BlockWidth256BytesC,
8672 locals->BlockHeight256BytesY,
8673 locals->BlockHeight256BytesC,
8674 locals->BlockWidthY,
8675 locals->BlockWidthC,
8676 locals->BlockHeightY,
8677 locals->BlockHeightC,
8678
8679 /* Output */
8680 locals->SurfaceSizeInTheMALL,
8681 &s->dummy_boolean[0]); /* dml_bool_t *ExceededMALLSize */
8682
8683 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8684 s->SurfaceParameters[k].PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k];
8685 s->SurfaceParameters[k].DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k];
8686 s->SurfaceParameters[k].SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k];
8687 s->SurfaceParameters[k].ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k];
8688 s->SurfaceParameters[k].ViewportHeightChroma = mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k];
8689 s->SurfaceParameters[k].BlockWidth256BytesY = locals->BlockWidth256BytesY[k];
8690 s->SurfaceParameters[k].BlockHeight256BytesY = locals->BlockHeight256BytesY[k];
8691 s->SurfaceParameters[k].BlockWidth256BytesC = locals->BlockWidth256BytesC[k];
8692 s->SurfaceParameters[k].BlockHeight256BytesC = locals->BlockHeight256BytesC[k];
8693 s->SurfaceParameters[k].BlockWidthY = locals->BlockWidthY[k];
8694 s->SurfaceParameters[k].BlockHeightY = locals->BlockHeightY[k];
8695 s->SurfaceParameters[k].BlockWidthC = locals->BlockWidthC[k];
8696 s->SurfaceParameters[k].BlockHeightC = locals->BlockHeightC[k];
8697 s->SurfaceParameters[k].InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k];
8698 s->SurfaceParameters[k].HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k];
8699 s->SurfaceParameters[k].DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k];
8700 s->SurfaceParameters[k].SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k];
8701 s->SurfaceParameters[k].SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k];
8702 s->SurfaceParameters[k].BytePerPixelY = locals->BytePerPixelY[k];
8703 s->SurfaceParameters[k].BytePerPixelC = locals->BytePerPixelC[k];
8704 s->SurfaceParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
8705 s->SurfaceParameters[k].VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio[k];
8706 s->SurfaceParameters[k].VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k];
8707 s->SurfaceParameters[k].VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps[k];
8708 s->SurfaceParameters[k].VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k];
8709 s->SurfaceParameters[k].PitchY = mode_lib->ms.cache_display_cfg.surface.PitchY[k];
8710 s->SurfaceParameters[k].DCCMetaPitchY = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k];
8711 s->SurfaceParameters[k].PitchC = mode_lib->ms.cache_display_cfg.surface.PitchC[k];
8712 s->SurfaceParameters[k].DCCMetaPitchC = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k];
8713 s->SurfaceParameters[k].ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary[k];
8714 s->SurfaceParameters[k].ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart[k];
8715 s->SurfaceParameters[k].ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart[k];
8716 s->SurfaceParameters[k].ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC[k];
8717 s->SurfaceParameters[k].ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC[k];
8718 s->SurfaceParameters[k].FORCE_ONE_ROW_FOR_FRAME = mode_lib->ms.cache_display_cfg.plane.ForceOneRowForFrame[k];
8719 s->SurfaceParameters[k].SwathHeightY = locals->SwathHeightY[k];
8720 s->SurfaceParameters[k].SwathHeightC = locals->SwathHeightC[k];
8721 }
8722
8723 CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
8724 CalculateVMRowAndSwath_params->myPipe = s->SurfaceParameters;
8725 CalculateVMRowAndSwath_params->SurfaceSizeInMALL = locals->SurfaceSizeInTheMALL;
8726 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_luma;
8727 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_chroma;
8728 CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ms.ip.dcc_meta_buffer_size_bytes;
8729 CalculateVMRowAndSwath_params->UseMALLForStaticScreen = mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen;
8730 CalculateVMRowAndSwath_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
8731 CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->ms.soc.mall_allocated_for_dcn_mbytes;
8732 CalculateVMRowAndSwath_params->SwathWidthY = locals->SwathWidthY;
8733 CalculateVMRowAndSwath_params->SwathWidthC = locals->SwathWidthC;
8734 CalculateVMRowAndSwath_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
8735 CalculateVMRowAndSwath_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
8736 CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
8737 CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
8738 CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes;
8739 CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
8740 CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn;
8741 CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode;
8742 CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0];
8743 CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = s->dummy_boolean_array[1];
8744 CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = locals->dpte_row_width_luma_ub;
8745 CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = locals->dpte_row_width_chroma_ub;
8746 CalculateVMRowAndSwath_params->dpte_row_height_luma = locals->dpte_row_height;
8747 CalculateVMRowAndSwath_params->dpte_row_height_chroma = locals->dpte_row_height_chroma;
8748 CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = locals->dpte_row_height_linear;
8749 CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = locals->dpte_row_height_linear_chroma;
8750 CalculateVMRowAndSwath_params->meta_req_width = locals->meta_req_width;
8751 CalculateVMRowAndSwath_params->meta_req_width_chroma = locals->meta_req_width_chroma;
8752 CalculateVMRowAndSwath_params->meta_req_height = locals->meta_req_height;
8753 CalculateVMRowAndSwath_params->meta_req_height_chroma = locals->meta_req_height_chroma;
8754 CalculateVMRowAndSwath_params->meta_row_width = locals->meta_row_width;
8755 CalculateVMRowAndSwath_params->meta_row_width_chroma = locals->meta_row_width_chroma;
8756 CalculateVMRowAndSwath_params->meta_row_height = locals->meta_row_height;
8757 CalculateVMRowAndSwath_params->meta_row_height_chroma = locals->meta_row_height_chroma;
8758 CalculateVMRowAndSwath_params->vm_group_bytes = locals->vm_group_bytes;
8759 CalculateVMRowAndSwath_params->dpte_group_bytes = locals->dpte_group_bytes;
8760 CalculateVMRowAndSwath_params->PixelPTEReqWidthY = locals->PixelPTEReqWidthY;
8761 CalculateVMRowAndSwath_params->PixelPTEReqHeightY = locals->PixelPTEReqHeightY;
8762 CalculateVMRowAndSwath_params->PTERequestSizeY = locals->PTERequestSizeY;
8763 CalculateVMRowAndSwath_params->PixelPTEReqWidthC = locals->PixelPTEReqWidthC;
8764 CalculateVMRowAndSwath_params->PixelPTEReqHeightC = locals->PixelPTEReqHeightC;
8765 CalculateVMRowAndSwath_params->PTERequestSizeC = locals->PTERequestSizeC;
8766 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = locals->dpde0_bytes_per_frame_ub_l;
8767 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = locals->meta_pte_bytes_per_frame_ub_l;
8768 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = locals->dpde0_bytes_per_frame_ub_c;
8769 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = locals->meta_pte_bytes_per_frame_ub_c;
8770 CalculateVMRowAndSwath_params->PrefetchSourceLinesY = locals->PrefetchSourceLinesY;
8771 CalculateVMRowAndSwath_params->PrefetchSourceLinesC = locals->PrefetchSourceLinesC;
8772 CalculateVMRowAndSwath_params->VInitPreFillY = locals->VInitPreFillY;
8773 CalculateVMRowAndSwath_params->VInitPreFillC = locals->VInitPreFillC;
8774 CalculateVMRowAndSwath_params->MaxNumSwathY = locals->MaxNumSwathY;
8775 CalculateVMRowAndSwath_params->MaxNumSwathC = locals->MaxNumSwathC;
8776 CalculateVMRowAndSwath_params->meta_row_bw = locals->meta_row_bw;
8777 CalculateVMRowAndSwath_params->dpte_row_bw = locals->dpte_row_bw;
8778 CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = locals->PixelPTEBytesPerRow;
8779 CalculateVMRowAndSwath_params->PDEAndMetaPTEBytesFrame = locals->PDEAndMetaPTEBytesFrame;
8780 CalculateVMRowAndSwath_params->MetaRowByte = locals->MetaRowByte;
8781 CalculateVMRowAndSwath_params->use_one_row_for_frame = locals->use_one_row_for_frame;
8782 CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = locals->use_one_row_for_frame_flip;
8783 CalculateVMRowAndSwath_params->UsesMALLForStaticScreen = locals->UsesMALLForStaticScreen;
8784 CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = locals->PTE_BUFFER_MODE;
8785 CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = locals->BIGK_FRAGMENT_SIZE;
8786
8787 CalculateVMRowAndSwath(&mode_lib->scratch,
8788 CalculateVMRowAndSwath_params);
8789
8790 s->ReorderBytes = (dml_uint_t)(mode_lib->ms.soc.num_chans * dml_max3(
8791 mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_only_bytes,
8792 mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes,
8793 mode_lib->ms.soc.urgent_out_of_order_return_per_channel_vm_only_bytes));
8794
8795 s->VMDataOnlyReturnBW = dml_get_return_bw_mbps_vm_only(&mode_lib->ms.soc,
8796 mode_lib->ms.state.use_ideal_dram_bw_strobe,
8797 mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
8798 locals->Dcfclk,
8799 mode_lib->ms.FabricClock,
8800 mode_lib->ms.DRAMSpeed);
8801
8802 #ifdef __DML_VBA_DEBUG__
8803 dml_print("DML::%s: locals->Dcfclk = %f\n", __func__, locals->Dcfclk);
8804 dml_print("DML::%s: mode_lib->ms.soc.return_bus_width_bytes = %u\n", __func__, mode_lib->ms.soc.return_bus_width_bytes);
8805 dml_print("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock);
8806 dml_print("DML::%s: mode_lib->ms.soc.fabric_datapath_to_dcn_data_return_bytes = %u\n", __func__, mode_lib->ms.soc.fabric_datapath_to_dcn_data_return_bytes);
8807 dml_print("DML::%s: mode_lib->ms.soc.pct_ideal_sdp_bw_after_urgent = %f\n", __func__, mode_lib->ms.soc.pct_ideal_sdp_bw_after_urgent);
8808 dml_print("DML::%s: mode_lib->ms.DRAMSpeed = %f\n", __func__, mode_lib->ms.DRAMSpeed);
8809 dml_print("DML::%s: mode_lib->ms.soc.num_chans = %u\n", __func__, mode_lib->ms.soc.num_chans);
8810 dml_print("DML::%s: mode_lib->ms.soc.dram_channel_width_bytes = %u\n", __func__, mode_lib->ms.soc.dram_channel_width_bytes);
8811 dml_print("DML::%s: mode_lib->ms.state_idx = %u\n", __func__, mode_lib->ms.state_idx);
8812 dml_print("DML::%s: mode_lib->ms.max_state_idx = %u\n", __func__, mode_lib->ms.max_state_idx);
8813 dml_print("DML::%s: mode_lib->ms.state.use_ideal_dram_bw_strobe = %u\n", __func__, mode_lib->ms.state.use_ideal_dram_bw_strobe);
8814 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, s->VMDataOnlyReturnBW);
8815 dml_print("DML::%s: ReturnBW = %f\n", __func__, mode_lib->ms.ReturnBW);
8816 #endif
8817
8818 s->HostVMInefficiencyFactor = 1.0;
8819 if (mode_lib->ms.cache_display_cfg.plane.GPUVMEnable && mode_lib->ms.cache_display_cfg.plane.HostVMEnable)
8820 s->HostVMInefficiencyFactor = mode_lib->ms.ReturnBW / s->VMDataOnlyReturnBW;
8821
8822 s->TotalDCCActiveDPP = 0;
8823 s->TotalActiveDPP = 0;
8824 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8825 s->TotalActiveDPP = s->TotalActiveDPP + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k];
8826 if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k])
8827 s->TotalDCCActiveDPP = s->TotalDCCActiveDPP + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k];
8828 }
8829
8830 locals->UrgentExtraLatency = CalculateExtraLatency(
8831 mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles,
8832 s->ReorderBytes,
8833 locals->Dcfclk,
8834 s->TotalActiveDPP,
8835 mode_lib->ms.ip.pixel_chunk_size_kbytes,
8836 s->TotalDCCActiveDPP,
8837 mode_lib->ms.ip.meta_chunk_size_kbytes,
8838 mode_lib->ms.ReturnBW,
8839 mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
8840 mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
8841 mode_lib->ms.num_active_planes,
8842 mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
8843 locals->dpte_group_bytes,
8844 s->HostVMInefficiencyFactor,
8845 mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
8846 mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels);
8847
8848 locals->TCalc = 24.0 / locals->DCFCLKDeepSleep;
8849
8850 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8851 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
8852 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
8853 locals->WritebackDelay[k] =
8854 mode_lib->ms.state.writeback_latency_us
8855 + CalculateWriteBackDelay(
8856 mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k],
8857 mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k],
8858 mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k],
8859 mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k],
8860 mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k],
8861 mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k],
8862 mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k],
8863 mode_lib->ms.cache_display_cfg.timing.HTotal[k]) / locals->Dispclk;
8864 } else
8865 locals->WritebackDelay[k] = 0;
8866 for (j = 0; j < mode_lib->ms.num_active_planes; ++j) {
8867 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[j] == k
8868 && mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[j] == true) {
8869 locals->WritebackDelay[k] =
8870 dml_max(
8871 locals->WritebackDelay[k],
8872 mode_lib->ms.state.writeback_latency_us
8873 + CalculateWriteBackDelay(
8874 mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[j],
8875 mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[j],
8876 mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[j],
8877 mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[j],
8878 mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[j],
8879 mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[j],
8880 mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[j],
8881 mode_lib->ms.cache_display_cfg.timing.HTotal[k]) / locals->Dispclk);
8882 }
8883 }
8884 }
8885 }
8886
8887 for (k = 0; k < mode_lib->ms.num_active_planes; ++k)
8888 for (j = 0; j < mode_lib->ms.num_active_planes; ++j)
8889 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == j)
8890 locals->WritebackDelay[k] = locals->WritebackDelay[j];
8891
8892 locals->UrgentLatency = CalculateUrgentLatency(mode_lib->ms.state.urgent_latency_pixel_data_only_us,
8893 mode_lib->ms.state.urgent_latency_pixel_mixed_with_vm_data_us,
8894 mode_lib->ms.state.urgent_latency_vm_data_only_us,
8895 mode_lib->ms.soc.do_urgent_latency_adjustment,
8896 mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_component_us,
8897 mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_reference_mhz,
8898 mode_lib->ms.FabricClock);
8899
8900 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8901 CalculateUrgentBurstFactor(mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
8902 locals->swath_width_luma_ub[k],
8903 locals->swath_width_chroma_ub[k],
8904 locals->SwathHeightY[k],
8905 locals->SwathHeightC[k],
8906 mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
8907 locals->UrgentLatency,
8908 mode_lib->ms.ip.cursor_buffer_size,
8909 mode_lib->ms.cache_display_cfg.plane.CursorWidth[k],
8910 mode_lib->ms.cache_display_cfg.plane.CursorBPP[k],
8911 mode_lib->ms.cache_display_cfg.plane.VRatio[k],
8912 mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
8913 locals->BytePerPixelDETY[k],
8914 locals->BytePerPixelDETC[k],
8915 locals->DETBufferSizeY[k],
8916 locals->DETBufferSizeC[k],
8917
8918 /* output */
8919 &locals->UrgBurstFactorCursor[k],
8920 &locals->UrgBurstFactorLuma[k],
8921 &locals->UrgBurstFactorChroma[k],
8922 &locals->NoUrgentLatencyHiding[k]);
8923
8924 locals->cursor_bw[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 /
8925 ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k];
8926 }
8927
8928 s->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
8929 s->MaxVStartupAllPlanes = 0;
8930
8931 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8932 s->MaxVStartupLines[k] = CalculateMaxVStartup(k,
8933 mode_lib->ms.ip.ptoi_supported,
8934 mode_lib->ms.ip.vblank_nom_default_us,
8935 &mode_lib->ms.cache_display_cfg.timing,
8936 locals->WritebackDelay[k]);
8937
8938 #ifdef __DML_VBA_DEBUG__
8939 dml_print("DML::%s: k=%u MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
8940 dml_print("DML::%s: k=%u WritebackDelay = %f\n", __func__, k, locals->WritebackDelay[k]);
8941 #endif
8942 }
8943
8944 for (k = 0; k < mode_lib->ms.num_active_planes; ++k)
8945 s->MaxVStartupAllPlanes = (dml_uint_t)(dml_max(s->MaxVStartupAllPlanes, s->MaxVStartupLines[k]));
8946
8947 s->ImmediateFlipRequirementFinal = false;
8948 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8949 s->ImmediateFlipRequirementFinal = s->ImmediateFlipRequirementFinal || (mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_required);
8950 }
8951 #ifdef __DML_VBA_DEBUG__
8952 dml_print("DML::%s: ImmediateFlipRequirementFinal = %u\n", __func__, s->ImmediateFlipRequirementFinal);
8953 #endif
8954
8955 // The prefetch scheduling should only be calculated once as per AllowForPStateChangeOrStutterInVBlank requirement
8956 // If the AllowForPStateChangeOrStutterInVBlank requirement is not strict (i.e. only try those power saving feature
8957 // if possible, then will try to program for the best power saving features in order of difficulty (dram, fclk, stutter)
8958 s->iteration = 0;
8959 s->MaxTotalRDBandwidth = 0;
8960 s->AllPrefetchModeTested = false;
8961 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8962 CalculatePrefetchMode(mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k], &s->MinPrefetchMode[k], &s->MaxPrefetchMode[k]);
8963 s->NextPrefetchMode[k] = s->MinPrefetchMode[k];
8964 }
8965
8966 do {
8967 s->MaxTotalRDBandwidthNoUrgentBurst = 0.0;
8968 s->DestinationLineTimesForPrefetchLessThan2 = false;
8969 s->VRatioPrefetchMoreThanMax = false;
8970
8971 dml_print("DML::%s: Start one iteration: VStartupLines = %u\n", __func__, s->VStartupLines);
8972
8973 s->AllPrefetchModeTested = true;
8974 s->MaxTotalRDBandwidth = 0;
8975 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8976 locals->PrefetchMode[k] = s->NextPrefetchMode[k];
8977 TWait = CalculateTWait(
8978 locals->PrefetchMode[k],
8979 mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
8980 mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
8981 mode_lib->ms.cache_display_cfg.timing.DRRDisplay[k],
8982 mode_lib->ms.state.dram_clock_change_latency_us,
8983 mode_lib->ms.state.fclk_change_latency_us,
8984 locals->UrgentLatency,
8985 mode_lib->ms.state.sr_enter_plus_exit_time_us);
8986
8987 myPipe = &s->myPipe;
8988 myPipe->Dppclk = locals->Dppclk[k];
8989 myPipe->Dispclk = locals->Dispclk;
8990 myPipe->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k];
8991 myPipe->DCFClkDeepSleep = locals->DCFCLKDeepSleep;
8992 myPipe->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k];
8993 myPipe->ScalerEnabled = mode_lib->ms.cache_display_cfg.plane.ScalerEnabled[k];
8994 myPipe->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k];
8995 myPipe->BlockWidth256BytesY = locals->BlockWidth256BytesY[k];
8996 myPipe->BlockHeight256BytesY = locals->BlockHeight256BytesY[k];
8997 myPipe->BlockWidth256BytesC = locals->BlockWidth256BytesC[k];
8998 myPipe->BlockHeight256BytesC = locals->BlockHeight256BytesC[k];
8999 myPipe->InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k];
9000 myPipe->NumberOfCursors = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k];
9001 myPipe->VBlank = mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k];
9002 myPipe->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k];
9003 myPipe->HActive = mode_lib->ms.cache_display_cfg.timing.HActive[k];
9004 myPipe->DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k];
9005 myPipe->ODMMode = mode_lib->ms.cache_display_cfg.hw.ODMMode[k];
9006 myPipe->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k];
9007 myPipe->BytePerPixelY = locals->BytePerPixelY[k];
9008 myPipe->BytePerPixelC = locals->BytePerPixelC[k];
9009 myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
9010
9011 #ifdef __DML_VBA_DEBUG__
9012 dml_print("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k);
9013 dml_print("DML::%s: AllowForPStateChangeOrStutterInVBlank = %u\n", __func__, mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k]);
9014 dml_print("DML::%s: PrefetchMode[k] = %u (Min=%u Max=%u)\n", __func__, locals->PrefetchMode[k], s->MinPrefetchMode[k], s->MaxPrefetchMode[k]);
9015 #endif
9016
9017 CalculatePrefetchSchedule_params->EnhancedPrefetchScheduleAccelerationFinal = mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal;
9018 CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactor;
9019 CalculatePrefetchSchedule_params->myPipe = myPipe;
9020 CalculatePrefetchSchedule_params->DSCDelay = locals->DSCDelay[k];
9021 CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ms.ip.dppclk_delay_subtotal + mode_lib->ms.ip.dppclk_delay_cnvc_formatter;
9022 CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ms.ip.dppclk_delay_scl;
9023 CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ms.ip.dppclk_delay_scl_lb_only;
9024 CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ms.ip.dppclk_delay_cnvc_cursor;
9025 CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ms.ip.dispclk_delay_subtotal;
9026 CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (dml_uint_t)(locals->SwathWidthY[k] / mode_lib->ms.cache_display_cfg.plane.HRatio[k]);
9027 CalculatePrefetchSchedule_params->OutputFormat = mode_lib->ms.cache_display_cfg.output.OutputFormat[k];
9028 CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ms.ip.max_inter_dcn_tile_repeaters;
9029 CalculatePrefetchSchedule_params->VStartup = (dml_uint_t)(dml_min(s->VStartupLines, s->MaxVStartupLines[k]));
9030 CalculatePrefetchSchedule_params->MaxVStartup = s->MaxVStartupLines[k];
9031 CalculatePrefetchSchedule_params->GPUVMPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
9032 CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
9033 CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
9034 CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
9035 CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
9036 CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k];
9037 CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled;
9038 CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k];
9039 CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes[k];
9040 CalculatePrefetchSchedule_params->UrgentLatency = locals->UrgentLatency;
9041 CalculatePrefetchSchedule_params->UrgentExtraLatency = locals->UrgentExtraLatency;
9042 CalculatePrefetchSchedule_params->TCalc = locals->TCalc;
9043 CalculatePrefetchSchedule_params->PDEAndMetaPTEBytesFrame = locals->PDEAndMetaPTEBytesFrame[k];
9044 CalculatePrefetchSchedule_params->MetaRowByte = locals->MetaRowByte[k];
9045 CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = locals->PixelPTEBytesPerRow[k];
9046 CalculatePrefetchSchedule_params->PrefetchSourceLinesY = locals->PrefetchSourceLinesY[k];
9047 CalculatePrefetchSchedule_params->VInitPreFillY = locals->VInitPreFillY[k];
9048 CalculatePrefetchSchedule_params->MaxNumSwathY = locals->MaxNumSwathY[k];
9049 CalculatePrefetchSchedule_params->PrefetchSourceLinesC = locals->PrefetchSourceLinesC[k];
9050 CalculatePrefetchSchedule_params->VInitPreFillC = locals->VInitPreFillC[k];
9051 CalculatePrefetchSchedule_params->MaxNumSwathC = locals->MaxNumSwathC[k];
9052 CalculatePrefetchSchedule_params->swath_width_luma_ub = locals->swath_width_luma_ub[k];
9053 CalculatePrefetchSchedule_params->swath_width_chroma_ub = locals->swath_width_chroma_ub[k];
9054 CalculatePrefetchSchedule_params->SwathHeightY = locals->SwathHeightY[k];
9055 CalculatePrefetchSchedule_params->SwathHeightC = locals->SwathHeightC[k];
9056 CalculatePrefetchSchedule_params->TWait = TWait;
9057 CalculatePrefetchSchedule_params->DSTXAfterScaler = &locals->DSTXAfterScaler[k];
9058 CalculatePrefetchSchedule_params->DSTYAfterScaler = &locals->DSTYAfterScaler[k];
9059 CalculatePrefetchSchedule_params->DestinationLinesForPrefetch = &locals->DestinationLinesForPrefetch[k];
9060 CalculatePrefetchSchedule_params->DestinationLinesToRequestVMInVBlank = &locals->DestinationLinesToRequestVMInVBlank[k];
9061 CalculatePrefetchSchedule_params->DestinationLinesToRequestRowInVBlank = &locals->DestinationLinesToRequestRowInVBlank[k];
9062 CalculatePrefetchSchedule_params->VRatioPrefetchY = &locals->VRatioPrefetchY[k];
9063 CalculatePrefetchSchedule_params->VRatioPrefetchC = &locals->VRatioPrefetchC[k];
9064 CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWLuma = &locals->RequiredPrefetchPixDataBWLuma[k];
9065 CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWChroma = &locals->RequiredPrefetchPixDataBWChroma[k];
9066 CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &locals->NotEnoughTimeForDynamicMetadata[k];
9067 CalculatePrefetchSchedule_params->Tno_bw = &locals->Tno_bw[k];
9068 CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &locals->prefetch_vmrow_bw[k];
9069 CalculatePrefetchSchedule_params->Tdmdl_vm = &locals->Tdmdl_vm[k];
9070 CalculatePrefetchSchedule_params->Tdmdl = &locals->Tdmdl[k];
9071 CalculatePrefetchSchedule_params->TSetup = &locals->TSetup[k];
9072 CalculatePrefetchSchedule_params->VUpdateOffsetPix = &locals->VUpdateOffsetPix[k];
9073 CalculatePrefetchSchedule_params->VUpdateWidthPix = &locals->VUpdateWidthPix[k];
9074 CalculatePrefetchSchedule_params->VReadyOffsetPix = &locals->VReadyOffsetPix[k];
9075
9076 locals->NoTimeToPrefetch[k] =
9077 CalculatePrefetchSchedule(&mode_lib->scratch,
9078 CalculatePrefetchSchedule_params);
9079
9080 #ifdef __DML_VBA_DEBUG__
9081 dml_print("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, locals->NoTimeToPrefetch[k]);
9082 #endif
9083 locals->VStartup[k] = (dml_uint_t)(dml_min(s->VStartupLines, s->MaxVStartupLines[k]));
9084 locals->VStartupMin[k] = locals->VStartup[k];
9085 }
9086
9087 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9088 CalculateUrgentBurstFactor(
9089 mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
9090 locals->swath_width_luma_ub[k],
9091 locals->swath_width_chroma_ub[k],
9092 locals->SwathHeightY[k],
9093 locals->SwathHeightC[k],
9094 mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
9095 locals->UrgentLatency,
9096 mode_lib->ms.ip.cursor_buffer_size,
9097 mode_lib->ms.cache_display_cfg.plane.CursorWidth[k],
9098 mode_lib->ms.cache_display_cfg.plane.CursorBPP[k],
9099 locals->VRatioPrefetchY[k],
9100 locals->VRatioPrefetchC[k],
9101 locals->BytePerPixelDETY[k],
9102 locals->BytePerPixelDETC[k],
9103 locals->DETBufferSizeY[k],
9104 locals->DETBufferSizeC[k],
9105 /* Output */
9106 &locals->UrgBurstFactorCursorPre[k],
9107 &locals->UrgBurstFactorLumaPre[k],
9108 &locals->UrgBurstFactorChromaPre[k],
9109 &locals->NoUrgentLatencyHidingPre[k]);
9110
9111 locals->cursor_bw_pre[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * locals->VRatioPrefetchY[k];
9112
9113 #ifdef __DML_VBA_DEBUG__
9114 dml_print("DML::%s: k=%0u DPPPerSurface=%u\n", __func__, k, mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k]);
9115 dml_print("DML::%s: k=%0u UrgBurstFactorLuma=%f\n", __func__, k, locals->UrgBurstFactorLuma[k]);
9116 dml_print("DML::%s: k=%0u UrgBurstFactorChroma=%f\n", __func__, k, locals->UrgBurstFactorChroma[k]);
9117 dml_print("DML::%s: k=%0u UrgBurstFactorLumaPre=%f\n", __func__, k, locals->UrgBurstFactorLumaPre[k]);
9118 dml_print("DML::%s: k=%0u UrgBurstFactorChromaPre=%f\n", __func__, k, locals->UrgBurstFactorChromaPre[k]);
9119
9120 dml_print("DML::%s: k=%0u VRatioPrefetchY=%f\n", __func__, k, locals->VRatioPrefetchY[k]);
9121 dml_print("DML::%s: k=%0u VRatioY=%f\n", __func__, k, mode_lib->ms.cache_display_cfg.plane.VRatio[k]);
9122
9123 dml_print("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, locals->prefetch_vmrow_bw[k]);
9124 dml_print("DML::%s: k=%0u ReadBandwidthSurfaceLuma=%f\n", __func__, k, locals->ReadBandwidthSurfaceLuma[k]);
9125 dml_print("DML::%s: k=%0u ReadBandwidthSurfaceChroma=%f\n", __func__, k, locals->ReadBandwidthSurfaceChroma[k]);
9126 dml_print("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, locals->cursor_bw[k]);
9127 dml_print("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, locals->meta_row_bw[k]);
9128 dml_print("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, locals->dpte_row_bw[k]);
9129 dml_print("DML::%s: k=%0u RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, locals->RequiredPrefetchPixDataBWLuma[k]);
9130 dml_print("DML::%s: k=%0u RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, locals->RequiredPrefetchPixDataBWChroma[k]);
9131 dml_print("DML::%s: k=%0u cursor_bw_pre=%f\n", __func__, k, locals->cursor_bw_pre[k]);
9132 dml_print("DML::%s: k=%0u MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, s->MaxTotalRDBandwidthNoUrgentBurst);
9133 #endif
9134 if (locals->DestinationLinesForPrefetch[k] < 2)
9135 s->DestinationLineTimesForPrefetchLessThan2 = true;
9136
9137 if (locals->VRatioPrefetchY[k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ ||
9138 locals->VRatioPrefetchC[k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ ||
9139 ((s->VStartupLines < s->MaxVStartupLines[k] || mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal == 0) &&
9140 (locals->VRatioPrefetchY[k] > __DML_MAX_VRATIO_PRE__ || locals->VRatioPrefetchC[k] > __DML_MAX_VRATIO_PRE__)))
9141 s->VRatioPrefetchMoreThanMax = true;
9142
9143 //dml_bool_t DestinationLinesToRequestVMInVBlankEqualOrMoreThan32 = false;
9144 //dml_bool_t DestinationLinesToRequestRowInVBlankEqualOrMoreThan16 = false;
9145 //if (locals->DestinationLinesToRequestVMInVBlank[k] >= 32) {
9146 // DestinationLinesToRequestVMInVBlankEqualOrMoreThan32 = true;
9147 //}
9148
9149 //if (locals->DestinationLinesToRequestRowInVBlank[k] >= 16) {
9150 // DestinationLinesToRequestRowInVBlankEqualOrMoreThan16 = true;
9151 //}
9152 }
9153
9154 locals->FractionOfUrgentBandwidth = s->MaxTotalRDBandwidthNoUrgentBurst / mode_lib->ms.ReturnBW;
9155
9156 #ifdef __DML_VBA_DEBUG__
9157 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, s->MaxTotalRDBandwidthNoUrgentBurst);
9158 dml_print("DML::%s: ReturnBW=%f \n", __func__, mode_lib->ms.ReturnBW);
9159 dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, locals->FractionOfUrgentBandwidth);
9160 #endif
9161
9162 CalculatePrefetchBandwithSupport(
9163 mode_lib->ms.num_active_planes,
9164 mode_lib->ms.ReturnBW,
9165 mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
9166 locals->NoUrgentLatencyHidingPre,
9167 locals->ReadBandwidthSurfaceLuma,
9168 locals->ReadBandwidthSurfaceChroma,
9169 locals->RequiredPrefetchPixDataBWLuma,
9170 locals->RequiredPrefetchPixDataBWChroma,
9171 locals->cursor_bw,
9172 locals->meta_row_bw,
9173 locals->dpte_row_bw,
9174 locals->cursor_bw_pre,
9175 locals->prefetch_vmrow_bw,
9176 mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
9177 locals->UrgBurstFactorLuma,
9178 locals->UrgBurstFactorChroma,
9179 locals->UrgBurstFactorCursor,
9180 locals->UrgBurstFactorLumaPre,
9181 locals->UrgBurstFactorChromaPre,
9182 locals->UrgBurstFactorCursorPre,
9183
9184 /* output */
9185 &s->MaxTotalRDBandwidth, // dml_float_t *PrefetchBandwidth
9186 &s->MaxTotalRDBandwidthNotIncludingMALLPrefetch, // dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch
9187 &s->dummy_single[0], // dml_float_t *FractionOfUrgentBandwidth
9188 &locals->PrefetchModeSupported);
9189
9190 for (k = 0; k < mode_lib->ms.num_active_planes; ++k)
9191 s->dummy_unit_vector[k] = 1.0;
9192
9193 CalculatePrefetchBandwithSupport(mode_lib->ms.num_active_planes,
9194 mode_lib->ms.ReturnBW,
9195 mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
9196 locals->NoUrgentLatencyHidingPre,
9197 locals->ReadBandwidthSurfaceLuma,
9198 locals->ReadBandwidthSurfaceChroma,
9199 locals->RequiredPrefetchPixDataBWLuma,
9200 locals->RequiredPrefetchPixDataBWChroma,
9201 locals->cursor_bw,
9202 locals->meta_row_bw,
9203 locals->dpte_row_bw,
9204 locals->cursor_bw_pre,
9205 locals->prefetch_vmrow_bw,
9206 mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
9207 s->dummy_unit_vector,
9208 s->dummy_unit_vector,
9209 s->dummy_unit_vector,
9210 s->dummy_unit_vector,
9211 s->dummy_unit_vector,
9212 s->dummy_unit_vector,
9213
9214 /* output */
9215 &s->NonUrgentMaxTotalRDBandwidth, // dml_float_t *PrefetchBandwidth
9216 &s->NonUrgentMaxTotalRDBandwidthNotIncludingMALLPrefetch, // dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch
9217 &locals->FractionOfUrgentBandwidth,
9218 &s->dummy_boolean[0]); // dml_bool_t *PrefetchBandwidthSupport
9219
9220
9221
9222 if (s->VRatioPrefetchMoreThanMax != false || s->DestinationLineTimesForPrefetchLessThan2 != false) {
9223 dml_print("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax);
9224 dml_print("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2);
9225 locals->PrefetchModeSupported = false;
9226 }
9227
9228 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9229 if (locals->NoTimeToPrefetch[k] == true || locals->NotEnoughTimeForDynamicMetadata[k]) {
9230 dml_print("DML::%s: k=%u, NoTimeToPrefetch = %0d\n", __func__, k, locals->NoTimeToPrefetch[k]);
9231 dml_print("DML::%s: k=%u, NotEnoughTimeForDynamicMetadata=%u\n", __func__, k, locals->NotEnoughTimeForDynamicMetadata[k]);
9232 locals->PrefetchModeSupported = false;
9233 }
9234 }
9235
9236
9237 if (locals->PrefetchModeSupported == true && mode_lib->ms.support.ImmediateFlipSupport == true) {
9238 locals->BandwidthAvailableForImmediateFlip = CalculateBandwidthAvailableForImmediateFlip(
9239 mode_lib->ms.num_active_planes,
9240 mode_lib->ms.ReturnBW,
9241 locals->ReadBandwidthSurfaceLuma,
9242 locals->ReadBandwidthSurfaceChroma,
9243 locals->RequiredPrefetchPixDataBWLuma,
9244 locals->RequiredPrefetchPixDataBWChroma,
9245 locals->cursor_bw,
9246 locals->cursor_bw_pre,
9247 mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
9248 locals->UrgBurstFactorLuma,
9249 locals->UrgBurstFactorChroma,
9250 locals->UrgBurstFactorCursor,
9251 locals->UrgBurstFactorLumaPre,
9252 locals->UrgBurstFactorChromaPre,
9253 locals->UrgBurstFactorCursorPre);
9254
9255 locals->TotImmediateFlipBytes = 0;
9256 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9257 if (mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required) {
9258 locals->TotImmediateFlipBytes = locals->TotImmediateFlipBytes + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k] * (locals->PDEAndMetaPTEBytesFrame[k] + locals->MetaRowByte[k]);
9259 if (locals->use_one_row_for_frame_flip[k]) {
9260 locals->TotImmediateFlipBytes = locals->TotImmediateFlipBytes + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k] * (2 * locals->PixelPTEBytesPerRow[k]);
9261 } else {
9262 locals->TotImmediateFlipBytes = locals->TotImmediateFlipBytes + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k] * locals->PixelPTEBytesPerRow[k];
9263 }
9264 #ifdef __DML_VBA_DEBUG__
9265 dml_print("DML::%s: k = %u\n", __func__, k);
9266 dml_print("DML::%s: DPPPerSurface = %u\n", __func__, mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k]);
9267 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, locals->PDEAndMetaPTEBytesFrame[k]);
9268 dml_print("DML::%s: MetaRowByte = %u\n", __func__, locals->MetaRowByte[k]);
9269 dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, locals->PixelPTEBytesPerRow[k]);
9270 dml_print("DML::%s: TotImmediateFlipBytes = %u\n", __func__, locals->TotImmediateFlipBytes);
9271 #endif
9272 }
9273 }
9274 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9275 CalculateFlipSchedule(
9276 s->HostVMInefficiencyFactor,
9277 locals->UrgentExtraLatency,
9278 locals->UrgentLatency,
9279 mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels,
9280 mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
9281 mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels,
9282 mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
9283 mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
9284 locals->PDEAndMetaPTEBytesFrame[k],
9285 locals->MetaRowByte[k],
9286 locals->PixelPTEBytesPerRow[k],
9287 locals->BandwidthAvailableForImmediateFlip,
9288 locals->TotImmediateFlipBytes,
9289 mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
9290 mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
9291 mode_lib->ms.cache_display_cfg.plane.VRatio[k],
9292 mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
9293 locals->Tno_bw[k],
9294 mode_lib->ms.cache_display_cfg.surface.DCCEnable[k],
9295 locals->dpte_row_height[k],
9296 locals->meta_row_height[k],
9297 locals->dpte_row_height_chroma[k],
9298 locals->meta_row_height_chroma[k],
9299 locals->use_one_row_for_frame_flip[k],
9300
9301 /* Output */
9302 &locals->DestinationLinesToRequestVMInImmediateFlip[k],
9303 &locals->DestinationLinesToRequestRowInImmediateFlip[k],
9304 &locals->final_flip_bw[k],
9305 &locals->ImmediateFlipSupportedForPipe[k]);
9306 }
9307
9308 CalculateImmediateFlipBandwithSupport(mode_lib->ms.num_active_planes,
9309 mode_lib->ms.ReturnBW,
9310 mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
9311 mode_lib->ms.policy.ImmediateFlipRequirement,
9312 locals->final_flip_bw,
9313 locals->ReadBandwidthSurfaceLuma,
9314 locals->ReadBandwidthSurfaceChroma,
9315 locals->RequiredPrefetchPixDataBWLuma,
9316 locals->RequiredPrefetchPixDataBWChroma,
9317 locals->cursor_bw,
9318 locals->meta_row_bw,
9319 locals->dpte_row_bw,
9320 locals->cursor_bw_pre,
9321 locals->prefetch_vmrow_bw,
9322 mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
9323 locals->UrgBurstFactorLuma,
9324 locals->UrgBurstFactorChroma,
9325 locals->UrgBurstFactorCursor,
9326 locals->UrgBurstFactorLumaPre,
9327 locals->UrgBurstFactorChromaPre,
9328 locals->UrgBurstFactorCursorPre,
9329
9330 /* output */
9331 &locals->total_dcn_read_bw_with_flip, // dml_float_t *TotalBandwidth
9332 &locals->total_dcn_read_bw_with_flip_not_including_MALL_prefetch, // dml_float_t TotalBandwidthNotIncludingMALLPrefetch
9333 &s->dummy_single[0], // dml_float_t *FractionOfUrgentBandwidth
9334 &locals->ImmediateFlipSupported); // dml_bool_t *ImmediateFlipBandwidthSupport
9335
9336 CalculateImmediateFlipBandwithSupport(mode_lib->ms.num_active_planes,
9337 mode_lib->ms.ReturnBW,
9338 mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
9339 mode_lib->ms.policy.ImmediateFlipRequirement,
9340 locals->final_flip_bw,
9341 locals->ReadBandwidthSurfaceLuma,
9342 locals->ReadBandwidthSurfaceChroma,
9343 locals->RequiredPrefetchPixDataBWLuma,
9344 locals->RequiredPrefetchPixDataBWChroma,
9345 locals->cursor_bw,
9346 locals->meta_row_bw,
9347 locals->dpte_row_bw,
9348 locals->cursor_bw_pre,
9349 locals->prefetch_vmrow_bw,
9350 mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
9351 s->dummy_unit_vector,
9352 s->dummy_unit_vector,
9353 s->dummy_unit_vector,
9354 s->dummy_unit_vector,
9355 s->dummy_unit_vector,
9356 s->dummy_unit_vector,
9357
9358 /* output */
9359 &locals->non_urgent_total_dcn_read_bw_with_flip, // dml_float_t *TotalBandwidth
9360 &locals->non_urgent_total_dcn_read_bw_with_flip_not_including_MALL_prefetch, // dml_float_t TotalBandwidthNotIncludingMALLPrefetch
9361 &locals->FractionOfUrgentBandwidthImmediateFlip, // dml_float_t *FractionOfUrgentBandwidth
9362 &s->dummy_boolean[0]); // dml_bool_t *ImmediateFlipBandwidthSupport
9363
9364 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9365 if (mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required && locals->ImmediateFlipSupportedForPipe[k] == false) {
9366 locals->ImmediateFlipSupported = false;
9367 #ifdef __DML_VBA_DEBUG__
9368 dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k);
9369 #endif
9370 }
9371 }
9372 } else {
9373 locals->ImmediateFlipSupported = false;
9374 locals->total_dcn_read_bw_with_flip = s->MaxTotalRDBandwidth;
9375 locals->total_dcn_read_bw_with_flip_not_including_MALL_prefetch = s->MaxTotalRDBandwidthNotIncludingMALLPrefetch;
9376 locals->non_urgent_total_dcn_read_bw_with_flip = s->NonUrgentMaxTotalRDBandwidth;
9377 locals->non_urgent_total_dcn_read_bw_with_flip_not_including_MALL_prefetch = s->NonUrgentMaxTotalRDBandwidthNotIncludingMALLPrefetch;
9378 }
9379
9380 /* consider flip support is okay if the flip bw is ok or (when user does't require a iflip and there is no host vm) */
9381 locals->PrefetchAndImmediateFlipSupported = (locals->PrefetchModeSupported == true &&
9382 ((!mode_lib->ms.support.ImmediateFlipSupport && !mode_lib->ms.cache_display_cfg.plane.HostVMEnable && !s->ImmediateFlipRequirementFinal) ||
9383 locals->ImmediateFlipSupported)) ? true : false;
9384
9385 #ifdef __DML_VBA_DEBUG__
9386 dml_print("DML::%s: PrefetchModeSupported = %u\n", __func__, locals->PrefetchModeSupported);
9387 for (k = 0; k < mode_lib->ms.num_active_planes; ++k)
9388 dml_print("DML::%s: ImmediateFlipRequirement[%u] = %u\n", __func__, k, mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_required);
9389 dml_print("DML::%s: HostVMEnable = %u\n", __func__, mode_lib->ms.cache_display_cfg.plane.HostVMEnable);
9390 dml_print("DML::%s: ImmediateFlipSupport = %u (from mode_support)\n", __func__, mode_lib->ms.support.ImmediateFlipSupport);
9391 dml_print("DML::%s: ImmediateFlipSupported = %u\n", __func__, locals->ImmediateFlipSupported);
9392 dml_print("DML::%s: PrefetchAndImmediateFlipSupported = %u\n", __func__, locals->PrefetchAndImmediateFlipSupported);
9393 #endif
9394 dml_print("DML::%s: Done one iteration: VStartupLines=%u, MaxVStartupAllPlanes=%u\n", __func__, s->VStartupLines, s->MaxVStartupAllPlanes);
9395
9396 s->VStartupLines = s->VStartupLines + 1;
9397
9398 if (s->VStartupLines > s->MaxVStartupAllPlanes) {
9399 s->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
9400
9401 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
9402 s->NextPrefetchMode[k] = s->NextPrefetchMode[k] + 1;
9403
9404 if (s->NextPrefetchMode[k] <= s->MaxPrefetchMode[k])
9405 s->AllPrefetchModeTested = false;
9406 dml_print("DML::%s: VStartupLines=%u, reaches max vstartup, try next prefetch mode=%u\n", __func__, s->VStartupLines-1, s->AllPrefetchModeTested);
9407 }
9408 } else {
9409 s->AllPrefetchModeTested = false;
9410 }
9411 s->iteration++;
9412 if (s->iteration > 2500) {
9413 dml_print("ERROR: DML::%s: Too many errors, exit now\n", __func__);
9414 ASSERT(0);
9415 }
9416 } while (!(locals->PrefetchAndImmediateFlipSupported || s->AllPrefetchModeTested));
9417
9418 if (locals->PrefetchAndImmediateFlipSupported) {
9419 dml_print("DML::%s: Good, Prefetch and flip scheduling solution found at VStartupLines=%u (MaxVStartupAllPlanes=%u)\n", __func__, s->VStartupLines-1, s->MaxVStartupAllPlanes);
9420 } else {
9421 dml_print("DML::%s: Bad, Prefetch and flip scheduling solution did NOT find solution! (MaxVStartupAllPlanes=%u)\n", __func__, s->MaxVStartupAllPlanes);
9422 }
9423
9424 //Watermarks and NB P-State/DRAM Clock Change Support
9425 {
9426 s->mmSOCParameters.UrgentLatency = locals->UrgentLatency;
9427 s->mmSOCParameters.ExtraLatency = locals->UrgentExtraLatency;
9428 s->mmSOCParameters.WritebackLatency = mode_lib->ms.state.writeback_latency_us;
9429 s->mmSOCParameters.DRAMClockChangeLatency = mode_lib->ms.state.dram_clock_change_latency_us;
9430 s->mmSOCParameters.FCLKChangeLatency = mode_lib->ms.state.fclk_change_latency_us;
9431 s->mmSOCParameters.SRExitTime = mode_lib->ms.state.sr_exit_time_us;
9432 s->mmSOCParameters.SREnterPlusExitTime = mode_lib->ms.state.sr_enter_plus_exit_time_us;
9433 s->mmSOCParameters.SRExitZ8Time = mode_lib->ms.state.sr_exit_z8_time_us;
9434 s->mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->ms.state.sr_enter_plus_exit_z8_time_us;
9435 s->mmSOCParameters.USRRetrainingLatency = mode_lib->ms.state.usr_retraining_latency_us;
9436 s->mmSOCParameters.SMNLatency = mode_lib->ms.soc.smn_latency_us;
9437
9438 CalculateWatermarks_params->USRRetrainingRequiredFinal = mode_lib->ms.policy.USRRetrainingRequiredFinal;
9439 CalculateWatermarks_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
9440 CalculateWatermarks_params->PrefetchMode = locals->PrefetchMode;
9441 CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
9442 CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ms.ip.max_line_buffer_lines;
9443 CalculateWatermarks_params->LineBufferSize = mode_lib->ms.ip.line_buffer_size_bits;
9444 CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ms.ip.writeback_interface_buffer_size_kbytes;
9445 CalculateWatermarks_params->DCFCLK = locals->Dcfclk;
9446 CalculateWatermarks_params->ReturnBW = mode_lib->ms.ReturnBW;
9447 CalculateWatermarks_params->SynchronizeTimingsFinal = mode_lib->ms.policy.SynchronizeTimingsFinal;
9448 CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal;
9449 CalculateWatermarks_params->DRRDisplay = mode_lib->ms.cache_display_cfg.timing.DRRDisplay;
9450 CalculateWatermarks_params->dpte_group_bytes = locals->dpte_group_bytes;
9451 CalculateWatermarks_params->meta_row_height = locals->meta_row_height;
9452 CalculateWatermarks_params->meta_row_height_chroma = locals->meta_row_height_chroma;
9453 CalculateWatermarks_params->mmSOCParameters = s->mmSOCParameters;
9454 CalculateWatermarks_params->WritebackChunkSize = mode_lib->ms.ip.writeback_chunk_size_kbytes;
9455 CalculateWatermarks_params->SOCCLK = mode_lib->ms.SOCCLK;
9456 CalculateWatermarks_params->DCFClkDeepSleep = locals->DCFCLKDeepSleep;
9457 CalculateWatermarks_params->DETBufferSizeY = locals->DETBufferSizeY;
9458 CalculateWatermarks_params->DETBufferSizeC = locals->DETBufferSizeC;
9459 CalculateWatermarks_params->SwathHeightY = locals->SwathHeightY;
9460 CalculateWatermarks_params->SwathHeightC = locals->SwathHeightC;
9461 CalculateWatermarks_params->LBBitPerPixel = mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel;
9462 CalculateWatermarks_params->SwathWidthY = locals->SwathWidthY;
9463 CalculateWatermarks_params->SwathWidthC = locals->SwathWidthC;
9464 CalculateWatermarks_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio;
9465 CalculateWatermarks_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma;
9466 CalculateWatermarks_params->VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps;
9467 CalculateWatermarks_params->VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma;
9468 CalculateWatermarks_params->VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio;
9469 CalculateWatermarks_params->VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma;
9470 CalculateWatermarks_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal;
9471 CalculateWatermarks_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal;
9472 CalculateWatermarks_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive;
9473 CalculateWatermarks_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock;
9474 CalculateWatermarks_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
9475 CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface;
9476 CalculateWatermarks_params->BytePerPixelDETY = locals->BytePerPixelDETY;
9477 CalculateWatermarks_params->BytePerPixelDETC = locals->BytePerPixelDETC;
9478 CalculateWatermarks_params->DSTXAfterScaler = locals->DSTXAfterScaler;
9479 CalculateWatermarks_params->DSTYAfterScaler = locals->DSTYAfterScaler;
9480 CalculateWatermarks_params->WritebackEnable = mode_lib->ms.cache_display_cfg.writeback.WritebackEnable;
9481 CalculateWatermarks_params->WritebackPixelFormat = mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat;
9482 CalculateWatermarks_params->WritebackDestinationWidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth;
9483 CalculateWatermarks_params->WritebackDestinationHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight;
9484 CalculateWatermarks_params->WritebackSourceHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight;
9485 CalculateWatermarks_params->UnboundedRequestEnabled = locals->UnboundedRequestEnabled;
9486 CalculateWatermarks_params->CompressedBufferSizeInkByte = locals->CompressedBufferSizeInkByte;
9487
9488 // Output
9489 CalculateWatermarks_params->Watermark = &locals->Watermark; // Watermarks *Watermark
9490 CalculateWatermarks_params->DRAMClockChangeSupport = &locals->DRAMClockChangeSupport;
9491 CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = locals->MaxActiveDRAMClockChangeLatencySupported; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[]
9492 CalculateWatermarks_params->SubViewportLinesNeededInMALL = locals->SubViewportLinesNeededInMALL; // dml_uint_t SubViewportLinesNeededInMALL[]
9493 CalculateWatermarks_params->FCLKChangeSupport = &locals->FCLKChangeSupport;
9494 CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &locals->MaxActiveFCLKChangeLatencySupported; // dml_float_t *MaxActiveFCLKChangeLatencySupported
9495 CalculateWatermarks_params->USRRetrainingSupport = &locals->USRRetrainingSupport;
9496
9497 CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
9498 &mode_lib->scratch,
9499 CalculateWatermarks_params);
9500
9501 /* Copy the calculated watermarks to mp.Watermark as the getter functions are
9502 * implemented by the DML team to copy the calculated values from the mp.Watermark interface.
9503 * &mode_lib->mp.Watermark and &locals->Watermark are the same address, memcpy may lead to
9504 * unexpected behavior. memmove should be used.
9505 */
9506 memmove(&mode_lib->mp.Watermark, CalculateWatermarks_params->Watermark, sizeof(struct Watermarks));
9507
9508 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9509 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
9510 locals->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(0, locals->VStartupMin[k] * mode_lib->ms.cache_display_cfg.timing.HTotal[k] /
9511 mode_lib->ms.cache_display_cfg.timing.PixelClock[k] - locals->Watermark.WritebackDRAMClockChangeWatermark);
9512 locals->WritebackAllowFCLKChangeEndPosition[k] = dml_max(0, locals->VStartupMin[k] * mode_lib->ms.cache_display_cfg.timing.HTotal[k] /
9513 mode_lib->ms.cache_display_cfg.timing.PixelClock[k] - locals->Watermark.WritebackFCLKChangeWatermark);
9514 } else {
9515 locals->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
9516 locals->WritebackAllowFCLKChangeEndPosition[k] = 0;
9517 }
9518 }
9519 }
9520
9521 //Display Pipeline Delivery Time in Prefetch, Groups
9522 CalculatePixelDeliveryTimes(
9523 mode_lib->ms.num_active_planes,
9524 mode_lib->ms.cache_display_cfg.plane.VRatio,
9525 mode_lib->ms.cache_display_cfg.plane.VRatioChroma,
9526 locals->VRatioPrefetchY,
9527 locals->VRatioPrefetchC,
9528 locals->swath_width_luma_ub,
9529 locals->swath_width_chroma_ub,
9530 mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
9531 mode_lib->ms.cache_display_cfg.plane.HRatio,
9532 mode_lib->ms.cache_display_cfg.plane.HRatioChroma,
9533 mode_lib->ms.cache_display_cfg.timing.PixelClock,
9534 locals->PSCL_THROUGHPUT,
9535 locals->PSCL_THROUGHPUT_CHROMA,
9536 locals->Dppclk,
9537 locals->BytePerPixelC,
9538 mode_lib->ms.cache_display_cfg.plane.SourceScan,
9539 mode_lib->ms.cache_display_cfg.plane.NumberOfCursors,
9540 mode_lib->ms.cache_display_cfg.plane.CursorWidth,
9541 mode_lib->ms.cache_display_cfg.plane.CursorBPP,
9542 locals->BlockWidth256BytesY,
9543 locals->BlockHeight256BytesY,
9544 locals->BlockWidth256BytesC,
9545 locals->BlockHeight256BytesC,
9546
9547 /* Output */
9548 locals->DisplayPipeLineDeliveryTimeLuma,
9549 locals->DisplayPipeLineDeliveryTimeChroma,
9550 locals->DisplayPipeLineDeliveryTimeLumaPrefetch,
9551 locals->DisplayPipeLineDeliveryTimeChromaPrefetch,
9552 locals->DisplayPipeRequestDeliveryTimeLuma,
9553 locals->DisplayPipeRequestDeliveryTimeChroma,
9554 locals->DisplayPipeRequestDeliveryTimeLumaPrefetch,
9555 locals->DisplayPipeRequestDeliveryTimeChromaPrefetch,
9556 locals->CursorRequestDeliveryTime,
9557 locals->CursorRequestDeliveryTimePrefetch);
9558
9559 CalculateMetaAndPTETimes(
9560 locals->use_one_row_for_frame,
9561 mode_lib->ms.num_active_planes,
9562 mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
9563 mode_lib->ms.ip.meta_chunk_size_kbytes,
9564 mode_lib->ms.ip.min_meta_chunk_size_bytes,
9565 mode_lib->ms.cache_display_cfg.timing.HTotal,
9566 mode_lib->ms.cache_display_cfg.plane.VRatio,
9567 mode_lib->ms.cache_display_cfg.plane.VRatioChroma,
9568 locals->DestinationLinesToRequestRowInVBlank,
9569 locals->DestinationLinesToRequestRowInImmediateFlip,
9570 mode_lib->ms.cache_display_cfg.surface.DCCEnable,
9571 mode_lib->ms.cache_display_cfg.timing.PixelClock,
9572 locals->BytePerPixelY,
9573 locals->BytePerPixelC,
9574 mode_lib->ms.cache_display_cfg.plane.SourceScan,
9575 locals->dpte_row_height,
9576 locals->dpte_row_height_chroma,
9577 locals->meta_row_width,
9578 locals->meta_row_width_chroma,
9579 locals->meta_row_height,
9580 locals->meta_row_height_chroma,
9581 locals->meta_req_width,
9582 locals->meta_req_width_chroma,
9583 locals->meta_req_height,
9584 locals->meta_req_height_chroma,
9585 locals->dpte_group_bytes,
9586 locals->PTERequestSizeY,
9587 locals->PTERequestSizeC,
9588 locals->PixelPTEReqWidthY,
9589 locals->PixelPTEReqHeightY,
9590 locals->PixelPTEReqWidthC,
9591 locals->PixelPTEReqHeightC,
9592 locals->dpte_row_width_luma_ub,
9593 locals->dpte_row_width_chroma_ub,
9594
9595 /* Output */
9596 locals->DST_Y_PER_PTE_ROW_NOM_L,
9597 locals->DST_Y_PER_PTE_ROW_NOM_C,
9598 locals->DST_Y_PER_META_ROW_NOM_L,
9599 locals->DST_Y_PER_META_ROW_NOM_C,
9600 locals->TimePerMetaChunkNominal,
9601 locals->TimePerChromaMetaChunkNominal,
9602 locals->TimePerMetaChunkVBlank,
9603 locals->TimePerChromaMetaChunkVBlank,
9604 locals->TimePerMetaChunkFlip,
9605 locals->TimePerChromaMetaChunkFlip,
9606 locals->time_per_pte_group_nom_luma,
9607 locals->time_per_pte_group_vblank_luma,
9608 locals->time_per_pte_group_flip_luma,
9609 locals->time_per_pte_group_nom_chroma,
9610 locals->time_per_pte_group_vblank_chroma,
9611 locals->time_per_pte_group_flip_chroma);
9612
9613 CalculateVMGroupAndRequestTimes(
9614 mode_lib->ms.num_active_planes,
9615 mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
9616 mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels,
9617 mode_lib->ms.cache_display_cfg.timing.HTotal,
9618 locals->BytePerPixelC,
9619 locals->DestinationLinesToRequestVMInVBlank,
9620 locals->DestinationLinesToRequestVMInImmediateFlip,
9621 mode_lib->ms.cache_display_cfg.surface.DCCEnable,
9622 mode_lib->ms.cache_display_cfg.timing.PixelClock,
9623 locals->dpte_row_width_luma_ub,
9624 locals->dpte_row_width_chroma_ub,
9625 locals->vm_group_bytes,
9626 locals->dpde0_bytes_per_frame_ub_l,
9627 locals->dpde0_bytes_per_frame_ub_c,
9628 locals->meta_pte_bytes_per_frame_ub_l,
9629 locals->meta_pte_bytes_per_frame_ub_c,
9630
9631 /* Output */
9632 locals->TimePerVMGroupVBlank,
9633 locals->TimePerVMGroupFlip,
9634 locals->TimePerVMRequestVBlank,
9635 locals->TimePerVMRequestFlip);
9636
9637 // Min TTUVBlank
9638 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9639 if (locals->PrefetchMode[k] == 0) {
9640 locals->MinTTUVBlank[k] = dml_max4(
9641 locals->Watermark.DRAMClockChangeWatermark,
9642 locals->Watermark.FCLKChangeWatermark,
9643 locals->Watermark.StutterEnterPlusExitWatermark,
9644 locals->Watermark.UrgentWatermark);
9645 } else if (locals->PrefetchMode[k] == 1) {
9646 locals->MinTTUVBlank[k] = dml_max3(
9647 locals->Watermark.FCLKChangeWatermark,
9648 locals->Watermark.StutterEnterPlusExitWatermark,
9649 locals->Watermark.UrgentWatermark);
9650 } else if (locals->PrefetchMode[k] == 2) {
9651 locals->MinTTUVBlank[k] = dml_max(
9652 locals->Watermark.StutterEnterPlusExitWatermark,
9653 locals->Watermark.UrgentWatermark);
9654 } else {
9655 locals->MinTTUVBlank[k] = locals->Watermark.UrgentWatermark;
9656 }
9657 if (!mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k])
9658 locals->MinTTUVBlank[k] = locals->TCalc + locals->MinTTUVBlank[k];
9659 }
9660
9661 // DCC Configuration
9662 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9663 #ifdef __DML_VBA_DEBUG__
9664 dml_print("DML::%s: Calculate DCC configuration for surface k=%u\n", __func__, k);
9665 #endif
9666 CalculateDCCConfiguration(
9667 mode_lib->ms.cache_display_cfg.surface.DCCEnable[k],
9668 mode_lib->ms.policy.DCCProgrammingAssumesScanDirectionUnknownFinal,
9669 mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
9670 mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k],
9671 mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k],
9672 mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY[k],
9673 mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC[k],
9674 mode_lib->ms.NomDETInKByte,
9675 locals->BlockHeight256BytesY[k],
9676 locals->BlockHeight256BytesC[k],
9677 mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k],
9678 locals->BytePerPixelY[k],
9679 locals->BytePerPixelC[k],
9680 locals->BytePerPixelDETY[k],
9681 locals->BytePerPixelDETC[k],
9682 mode_lib->ms.cache_display_cfg.plane.SourceScan[k],
9683 /* Output */
9684 &locals->DCCYMaxUncompressedBlock[k],
9685 &locals->DCCCMaxUncompressedBlock[k],
9686 &locals->DCCYMaxCompressedBlock[k],
9687 &locals->DCCCMaxCompressedBlock[k],
9688 &locals->DCCYIndependentBlock[k],
9689 &locals->DCCCIndependentBlock[k]);
9690 }
9691
9692 // VStartup Adjustment
9693 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9694 s->Tvstartup_margin = (s->MaxVStartupLines[k] - locals->VStartupMin[k]) * mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k];
9695 #ifdef __DML_VBA_DEBUG__
9696 dml_print("DML::%s: k=%u, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k, locals->MinTTUVBlank[k]);
9697 #endif
9698
9699 locals->MinTTUVBlank[k] = locals->MinTTUVBlank[k] + s->Tvstartup_margin;
9700
9701 #ifdef __DML_VBA_DEBUG__
9702 dml_print("DML::%s: k=%u, Tvstartup_margin = %f\n", __func__, k, s->Tvstartup_margin);
9703 dml_print("DML::%s: k=%u, MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
9704 dml_print("DML::%s: k=%u, MinTTUVBlank = %f\n", __func__, k, locals->MinTTUVBlank[k]);
9705 #endif
9706
9707 locals->Tdmdl[k] = locals->Tdmdl[k] + s->Tvstartup_margin;
9708 if (mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k] && mode_lib->ms.ip.dynamic_metadata_vm_enabled) {
9709 locals->Tdmdl_vm[k] = locals->Tdmdl_vm[k] + s->Tvstartup_margin;
9710 }
9711
9712 isInterlaceTiming = (mode_lib->ms.cache_display_cfg.timing.Interlace[k] && !mode_lib->ms.ip.ptoi_supported);
9713
9714 // The actual positioning of the vstartup
9715 locals->VStartup[k] = (isInterlaceTiming ? (2 * s->MaxVStartupLines[k]) : s->MaxVStartupLines[k]);
9716
9717 s->dlg_vblank_start = ((isInterlaceTiming ? dml_floor((mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]) / 2.0, 1.0) :
9718 mode_lib->ms.cache_display_cfg.timing.VTotal[k]) - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]);
9719 s->LSetup = dml_floor(4.0 * locals->TSetup[k] / ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), 1.0) / 4.0;
9720 s->blank_lines_remaining = (mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k]) - locals->VStartup[k];
9721
9722 if (s->blank_lines_remaining < 0) {
9723 dml_print("ERROR: Vstartup is larger than vblank!?\n");
9724 s->blank_lines_remaining = 0;
9725 ASSERT(0);
9726 }
9727 locals->MIN_DST_Y_NEXT_START[k] = s->dlg_vblank_start + s->blank_lines_remaining + s->LSetup;
9728
9729 // debug only
9730 s->old_MIN_DST_Y_NEXT_START = ((isInterlaceTiming ? dml_floor((mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]) / 2.0, 1.0) :
9731 mode_lib->ms.cache_display_cfg.timing.VTotal[k]) - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k])
9732 + dml_max(1.0, dml_ceil((dml_float_t) locals->WritebackDelay[k] / ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), 1.0))
9733 + dml_floor(4.0 * locals->TSetup[k] / ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), 1.0) / 4.0;
9734
9735 if (((locals->VUpdateOffsetPix[k] + locals->VUpdateWidthPix[k] + locals->VReadyOffsetPix[k]) / (double) mode_lib->ms.cache_display_cfg.timing.HTotal[k]) <=
9736 (isInterlaceTiming ?
9737 dml_floor((mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k] - locals->VStartup[k]) / 2.0, 1.0) :
9738 (int) (mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k] - locals->VStartup[k]))) {
9739 locals->VREADY_AT_OR_AFTER_VSYNC[k] = true;
9740 } else {
9741 locals->VREADY_AT_OR_AFTER_VSYNC[k] = false;
9742 }
9743 #ifdef __DML_VBA_DEBUG__
9744 dml_print("DML::%s: k=%u, VStartup = %u (max)\n", __func__, k, locals->VStartup[k]);
9745 dml_print("DML::%s: k=%u, VStartupMin = %u (max)\n", __func__, k, locals->VStartupMin[k]);
9746 dml_print("DML::%s: k=%u, VUpdateOffsetPix = %u\n", __func__, k, locals->VUpdateOffsetPix[k]);
9747 dml_print("DML::%s: k=%u, VUpdateWidthPix = %u\n", __func__, k, locals->VUpdateWidthPix[k]);
9748 dml_print("DML::%s: k=%u, VReadyOffsetPix = %u\n", __func__, k, locals->VReadyOffsetPix[k]);
9749 dml_print("DML::%s: k=%u, HTotal = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.HTotal[k]);
9750 dml_print("DML::%s: k=%u, VTotal = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.VTotal[k]);
9751 dml_print("DML::%s: k=%u, VActive = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.VActive[k]);
9752 dml_print("DML::%s: k=%u, VFrontPorch = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]);
9753 dml_print("DML::%s: k=%u, TSetup = %f\n", __func__, k, locals->TSetup[k]);
9754 dml_print("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f\n", __func__, k, locals->MIN_DST_Y_NEXT_START[k]);
9755 dml_print("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f (old)\n", __func__, k, s->old_MIN_DST_Y_NEXT_START);
9756 dml_print("DML::%s: k=%u, VREADY_AT_OR_AFTER_VSYNC = %u\n", __func__, k, locals->VREADY_AT_OR_AFTER_VSYNC[k]);
9757 #endif
9758 }
9759
9760 //Maximum Bandwidth Used
9761 s->TotalWRBandwidth = 0;
9762 s->WRBandwidth = 0;
9763 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9764 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true && mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k] == dml_444_32) {
9765 s->WRBandwidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k] /
9766 (mode_lib->ms.cache_display_cfg.timing.HTotal[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 4;
9767 } else if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
9768 s->WRBandwidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k] /
9769 (mode_lib->ms.cache_display_cfg.timing.HTotal[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 8;
9770 }
9771 s->TotalWRBandwidth = s->TotalWRBandwidth + s->WRBandwidth;
9772 }
9773
9774 locals->TotalDataReadBandwidth = 0;
9775 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9776 locals->TotalDataReadBandwidth = locals->TotalDataReadBandwidth + locals->ReadBandwidthSurfaceLuma[k] + locals->ReadBandwidthSurfaceChroma[k];
9777
9778 #ifdef __DML_VBA_DEBUG__
9779 dml_print("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, locals->TotalDataReadBandwidth);
9780 dml_print("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, locals->ReadBandwidthSurfaceLuma[k]);
9781 dml_print("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, locals->ReadBandwidthSurfaceChroma[k]);
9782 #endif
9783 }
9784
9785 locals->TotalDataReadBandwidthNotIncludingMALLPrefetch = 0;
9786 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9787 if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) {
9788 locals->TotalDataReadBandwidthNotIncludingMALLPrefetch = locals->TotalDataReadBandwidthNotIncludingMALLPrefetch
9789 + locals->ReadBandwidthSurfaceLuma[k] + locals->ReadBandwidthSurfaceChroma[k];
9790 }
9791 }
9792
9793 CalculateStutterEfficiency_params->CompressedBufferSizeInkByte = locals->CompressedBufferSizeInkByte;
9794 CalculateStutterEfficiency_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
9795 CalculateStutterEfficiency_params->UnboundedRequestEnabled = locals->UnboundedRequestEnabled;
9796 CalculateStutterEfficiency_params->MetaFIFOSizeInKEntries = mode_lib->ms.ip.meta_fifo_size_in_kentries;
9797 CalculateStutterEfficiency_params->ZeroSizeBufferEntries = mode_lib->ms.ip.zero_size_buffer_entries;
9798 CalculateStutterEfficiency_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes;
9799 CalculateStutterEfficiency_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
9800 CalculateStutterEfficiency_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes;
9801 CalculateStutterEfficiency_params->TotalDataReadBandwidth = locals->TotalDataReadBandwidth;
9802 CalculateStutterEfficiency_params->DCFCLK = locals->Dcfclk;
9803 CalculateStutterEfficiency_params->ReturnBW = mode_lib->ms.ReturnBW;
9804 CalculateStutterEfficiency_params->CompbufReservedSpace64B = locals->compbuf_reserved_space_64b;
9805 CalculateStutterEfficiency_params->CompbufReservedSpaceZs = locals->compbuf_reserved_space_zs;
9806 CalculateStutterEfficiency_params->SRExitTime = mode_lib->ms.state.sr_exit_time_us;
9807 CalculateStutterEfficiency_params->SRExitZ8Time = mode_lib->ms.state.sr_exit_z8_time_us;
9808 CalculateStutterEfficiency_params->SynchronizeTimingsFinal = mode_lib->ms.policy.SynchronizeTimingsFinal;
9809 CalculateStutterEfficiency_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
9810 CalculateStutterEfficiency_params->StutterEnterPlusExitWatermark = locals->Watermark.StutterEnterPlusExitWatermark;
9811 CalculateStutterEfficiency_params->Z8StutterEnterPlusExitWatermark = locals->Watermark.Z8StutterEnterPlusExitWatermark;
9812 CalculateStutterEfficiency_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
9813 CalculateStutterEfficiency_params->Interlace = mode_lib->ms.cache_display_cfg.timing.Interlace;
9814 CalculateStutterEfficiency_params->MinTTUVBlank = locals->MinTTUVBlank;
9815 CalculateStutterEfficiency_params->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface;
9816 CalculateStutterEfficiency_params->DETBufferSizeY = locals->DETBufferSizeY;
9817 CalculateStutterEfficiency_params->BytePerPixelY = locals->BytePerPixelY;
9818 CalculateStutterEfficiency_params->BytePerPixelDETY = locals->BytePerPixelDETY;
9819 CalculateStutterEfficiency_params->SwathWidthY = locals->SwathWidthY;
9820 CalculateStutterEfficiency_params->SwathHeightY = locals->SwathHeightY;
9821 CalculateStutterEfficiency_params->SwathHeightC = locals->SwathHeightC;
9822 CalculateStutterEfficiency_params->NetDCCRateLuma = mode_lib->ms.cache_display_cfg.surface.DCCRateLuma;
9823 CalculateStutterEfficiency_params->NetDCCRateChroma = mode_lib->ms.cache_display_cfg.surface.DCCRateChroma;
9824 CalculateStutterEfficiency_params->DCCFractionOfZeroSizeRequestsLuma = mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsLuma;
9825 CalculateStutterEfficiency_params->DCCFractionOfZeroSizeRequestsChroma = mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsChroma;
9826 CalculateStutterEfficiency_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal;
9827 CalculateStutterEfficiency_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal;
9828 CalculateStutterEfficiency_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock;
9829 CalculateStutterEfficiency_params->VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio;
9830 CalculateStutterEfficiency_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan;
9831 CalculateStutterEfficiency_params->BlockHeight256BytesY = locals->BlockHeight256BytesY;
9832 CalculateStutterEfficiency_params->BlockWidth256BytesY = locals->BlockWidth256BytesY;
9833 CalculateStutterEfficiency_params->BlockHeight256BytesC = locals->BlockHeight256BytesC;
9834 CalculateStutterEfficiency_params->BlockWidth256BytesC = locals->BlockWidth256BytesC;
9835 CalculateStutterEfficiency_params->DCCYMaxUncompressedBlock = locals->DCCYMaxUncompressedBlock;
9836 CalculateStutterEfficiency_params->DCCCMaxUncompressedBlock = locals->DCCCMaxUncompressedBlock;
9837 CalculateStutterEfficiency_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive;
9838 CalculateStutterEfficiency_params->DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable;
9839 CalculateStutterEfficiency_params->WritebackEnable = mode_lib->ms.cache_display_cfg.writeback.WritebackEnable;
9840 CalculateStutterEfficiency_params->ReadBandwidthSurfaceLuma = locals->ReadBandwidthSurfaceLuma;
9841 CalculateStutterEfficiency_params->ReadBandwidthSurfaceChroma = locals->ReadBandwidthSurfaceChroma;
9842 CalculateStutterEfficiency_params->meta_row_bw = locals->meta_row_bw;
9843 CalculateStutterEfficiency_params->dpte_row_bw = locals->dpte_row_bw;
9844 CalculateStutterEfficiency_params->StutterEfficiencyNotIncludingVBlank = &locals->StutterEfficiencyNotIncludingVBlank;
9845 CalculateStutterEfficiency_params->StutterEfficiency = &locals->StutterEfficiency;
9846 CalculateStutterEfficiency_params->NumberOfStutterBurstsPerFrame = &locals->NumberOfStutterBurstsPerFrame;
9847 CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &locals->Z8StutterEfficiencyNotIncludingVBlank;
9848 CalculateStutterEfficiency_params->Z8StutterEfficiency = &locals->Z8StutterEfficiency;
9849 CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &locals->Z8NumberOfStutterBurstsPerFrame;
9850 CalculateStutterEfficiency_params->StutterPeriod = &locals->StutterPeriod;
9851 CalculateStutterEfficiency_params->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = &locals->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE;
9852
9853 // Stutter Efficiency
9854 CalculateStutterEfficiency(&mode_lib->scratch,
9855 CalculateStutterEfficiency_params);
9856
9857 #ifdef __DML_VBA_ALLOW_DELTA__
9858 {
9859 dml_float_t dummy_single[2];
9860 dml_uint_t dummy_integer[1];
9861 dml_bool_t dummy_boolean[1];
9862
9863 // Calculate z8 stutter eff assuming 0 reserved space
9864 CalculateStutterEfficiency(
9865 locals->CompressedBufferSizeInkByte,
9866 mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
9867 locals->UnboundedRequestEnabled,
9868 mode_lib->ms.ip.meta_fifo_size_in_kentries,
9869 mode_lib->ms.ip.zero_size_buffer_entries,
9870 mode_lib->ms.ip.pixel_chunk_size_kbytes,
9871 mode_lib->ms.num_active_planes,
9872 mode_lib->ms.ip.rob_buffer_size_kbytes,
9873 locals->TotalDataReadBandwidth,
9874 locals->Dcfclk,
9875 mode_lib->ms.ReturnBW,
9876 0, //mode_lib->ms.ip.compbuf_reserved_space_64b,
9877 0, //mode_lib->ms.ip.compbuf_reserved_space_zs,
9878 mode_lib->ms.state.sr_exit_time_us,
9879 mode_lib->ms.state.sr_exit_z8_time_us,
9880 mode_lib->ms.policy.SynchronizeTimingsFinal,
9881 mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming,
9882 locals->Watermark.StutterEnterPlusExitWatermark,
9883 locals->Watermark.Z8StutterEnterPlusExitWatermark,
9884 mode_lib->ms.ip.ptoi_supported,
9885 mode_lib->ms.cache_display_cfg.timing.Interlace,
9886 locals->MinTTUVBlank,
9887 mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
9888 mode_lib->ms.DETBufferSizeY,
9889 locals->BytePerPixelY,
9890 locals->BytePerPixelDETY,
9891 locals->SwathWidthY,
9892 mode_lib->ms.SwathHeightY,
9893 mode_lib->ms.SwathHeightC,
9894 mode_lib->ms.cache_display_cfg.surface.DCCRateLuma,
9895 mode_lib->ms.cache_display_cfg.surface.DCCRateChroma,
9896 mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsLuma,
9897 mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsChroma,
9898 mode_lib->ms.cache_display_cfg.timing.HTotal,
9899 mode_lib->ms.cache_display_cfg.timing.VTotal,
9900 mode_lib->ms.cache_display_cfg.timing.PixelClock,
9901 mode_lib->ms.cache_display_cfg.plane.VRatio,
9902 mode_lib->ms.cache_display_cfg.plane.SourceScan,
9903 locals->BlockHeight256BytesY,
9904 locals->BlockWidth256BytesY,
9905 locals->BlockHeight256BytesC,
9906 locals->BlockWidth256BytesC,
9907 locals->DCCYMaxUncompressedBlock,
9908 locals->DCCCMaxUncompressedBlock,
9909 mode_lib->ms.cache_display_cfg.timing.VActive,
9910 mode_lib->ms.cache_display_cfg.surface.DCCEnable,
9911 mode_lib->ms.cache_display_cfg.writeback.WritebackEnable,
9912 locals->ReadBandwidthSurfaceLuma,
9913 locals->ReadBandwidthSurfaceChroma,
9914 locals->meta_row_bw,
9915 locals->dpte_row_bw,
9916
9917 /* Output */
9918 &dummy_single[0],
9919 &dummy_single[1],
9920 &dummy_integer[0],
9921 &locals->Z8StutterEfficiencyNotIncludingVBlankBestCase,
9922 &locals->Z8StutterEfficiencyBestCase,
9923 &locals->Z8NumberOfStutterBurstsPerFrameBestCase,
9924 &locals->StutterPeriodBestCase,
9925 &dummy_boolean[0]);
9926 }
9927 #else
9928 locals->Z8StutterEfficiencyNotIncludingVBlankBestCase = locals->Z8StutterEfficiencyNotIncludingVBlank;
9929 locals->Z8StutterEfficiencyBestCase = locals->Z8StutterEfficiency;
9930 locals->Z8NumberOfStutterBurstsPerFrameBestCase = locals->Z8NumberOfStutterBurstsPerFrame;
9931 locals->StutterPeriodBestCase = locals->StutterPeriod;
9932 #endif
9933
9934 #ifdef __DML_VBA_DEBUG__
9935 dml_print("DML::%s: --- END --- \n", __func__);
9936 #endif
9937 } // dml_core_mode_programming
9938
9939 /// Function: dml_core_get_row_heights
9940 /// @brief Get row height for DPTE and META with minimal input.
dml_core_get_row_heights(dml_uint_t * dpte_row_height,dml_uint_t * meta_row_height,const struct display_mode_lib_st * mode_lib,dml_bool_t is_plane1,enum dml_source_format_class SourcePixelFormat,enum dml_swizzle_mode SurfaceTiling,enum dml_rotation_angle ScanDirection,dml_uint_t pitch,dml_uint_t GPUVMMinPageSizeKBytes)9941 void dml_core_get_row_heights(
9942 dml_uint_t *dpte_row_height,
9943 dml_uint_t *meta_row_height,
9944 const struct display_mode_lib_st *mode_lib,
9945 dml_bool_t is_plane1,
9946 enum dml_source_format_class SourcePixelFormat,
9947 enum dml_swizzle_mode SurfaceTiling,
9948 enum dml_rotation_angle ScanDirection,
9949 dml_uint_t pitch,
9950 dml_uint_t GPUVMMinPageSizeKBytes)
9951 {
9952 dml_uint_t BytePerPixelY;
9953 dml_uint_t BytePerPixelC;
9954 dml_float_t BytePerPixelInDETY;
9955 dml_float_t BytePerPixelInDETC;
9956 dml_uint_t BlockHeight256BytesY;
9957 dml_uint_t BlockHeight256BytesC;
9958 dml_uint_t BlockWidth256BytesY;
9959 dml_uint_t BlockWidth256BytesC;
9960 dml_uint_t MacroTileWidthY;
9961 dml_uint_t MacroTileWidthC;
9962 dml_uint_t MacroTileHeightY;
9963 dml_uint_t MacroTileHeightC;
9964
9965 dml_uint_t BytePerPixel;
9966 dml_uint_t BlockHeight256Bytes;
9967 dml_uint_t BlockWidth256Bytes;
9968 dml_uint_t MacroTileWidth;
9969 dml_uint_t MacroTileHeight;
9970 dml_uint_t PTEBufferSizeInRequests;
9971
9972 dml_uint_t dummy_integer[16];
9973
9974 CalculateBytePerPixelAndBlockSizes(
9975 SourcePixelFormat,
9976 SurfaceTiling,
9977
9978 /* Output */
9979 &BytePerPixelY,
9980 &BytePerPixelC,
9981 &BytePerPixelInDETY,
9982 &BytePerPixelInDETC,
9983 &BlockHeight256BytesY,
9984 &BlockHeight256BytesC,
9985 &BlockWidth256BytesY,
9986 &BlockWidth256BytesC,
9987 &MacroTileHeightY,
9988 &MacroTileHeightC,
9989 &MacroTileWidthY,
9990 &MacroTileWidthC);
9991
9992 BytePerPixel = is_plane1 ? BytePerPixelC : BytePerPixelY;
9993 BlockHeight256Bytes = is_plane1 ? BlockHeight256BytesC : BlockHeight256BytesY;
9994 BlockWidth256Bytes = is_plane1 ? BlockWidth256BytesC : BlockWidth256BytesY;
9995 MacroTileWidth = is_plane1 ? MacroTileWidthC : MacroTileWidthY;
9996 MacroTileHeight = is_plane1 ? MacroTileHeightC : MacroTileHeightY;
9997 PTEBufferSizeInRequests = is_plane1 ? mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma : mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
9998 #ifdef __DML_RQ_DLG_CALC_DEBUG__
9999 dml_print("DML_DLG: %s: is_plane1 = %u\n", __func__, is_plane1);
10000 dml_print("DML_DLG: %s: BytePerPixel = %u\n", __func__, BytePerPixel);
10001 dml_print("DML_DLG: %s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes);
10002 dml_print("DML_DLG: %s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes);
10003 dml_print("DML_DLG: %s: MacroTileWidth = %u\n", __func__, MacroTileWidth);
10004 dml_print("DML_DLG: %s: MacroTileHeight = %u\n", __func__, MacroTileHeight);
10005 dml_print("DML_DLG: %s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests);
10006 dml_print("DML_DLG: %s: dpte_buffer_size_in_pte_reqs_luma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma);
10007 dml_print("DML_DLG: %s: dpte_buffer_size_in_pte_reqs_chroma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma);
10008 dml_print("DML_DLG: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes);
10009 #endif
10010
10011 // just supply with enough parameters to calculate meta and dte
10012 CalculateVMAndRowBytes(
10013 0, // dml_bool_t ViewportStationary,
10014 1, // dml_bool_t DCCEnable,
10015 1, // dml_uint_t NumberOfDPPs,
10016 BlockHeight256Bytes,
10017 BlockWidth256Bytes,
10018 SourcePixelFormat,
10019 SurfaceTiling,
10020 BytePerPixel,
10021 ScanDirection,
10022 0, // dml_uint_t SwathWidth,
10023 0, // dml_uint_t ViewportHeight, (Note: DML calculates one_row_for_frame height regardless, would need test input if that height is useful)
10024 0, // dml_uint_t ViewportXStart,
10025 0, // dml_uint_t ViewportYStart,
10026 1, // dml_bool_t GPUVMEnable,
10027 4, // dml_uint_t GPUVMMaxPageTableLevels,
10028 GPUVMMinPageSizeKBytes,
10029 PTEBufferSizeInRequests,
10030 pitch,
10031 0, // dml_uint_t DCCMetaPitch,
10032 MacroTileWidth,
10033 MacroTileHeight,
10034
10035 // /* Output */
10036 &dummy_integer[0], // dml_uint_t *MetaRowByte,
10037 &dummy_integer[1], // dml_uint_t *PixelPTEBytesPerRow,
10038 &dummy_integer[2], // dml_uint_t *PixelPTEBytesPerRowStorage,
10039 &dummy_integer[3], // dml_uint_t *dpte_row_width_ub,
10040 dpte_row_height,
10041 &dummy_integer[4], // dml_uint_t *dpte_row_height_linear
10042 &dummy_integer[5], // dml_uint_t *PixelPTEBytesPerRow_one_row_per_frame,
10043 &dummy_integer[6], // dml_uint_t *dpte_row_width_ub_one_row_per_frame,
10044 &dummy_integer[7], // dml_uint_t *dpte_row_height_one_row_per_frame,
10045 &dummy_integer[8], // dml_uint_t *MetaRequestWidth,
10046 &dummy_integer[9], // dml_uint_t *MetaRequestHeight,
10047 &dummy_integer[10], // dml_uint_t *meta_row_width,
10048 meta_row_height,
10049 &dummy_integer[11], // dml_uint_t *PixelPTEReqWidth,
10050 &dummy_integer[12], // dml_uint_t *PixelPTEReqHeight,
10051 &dummy_integer[13], // dml_uint_t *PTERequestSize,
10052 &dummy_integer[14], // dml_uint_t *DPDE0BytesFrame,
10053 &dummy_integer[15]); // dml_uint_t *MetaPTEBytesFrame)
10054
10055 #ifdef __DML_RQ_DLG_CALC_DEBUG__
10056 dml_print("DML_DLG: %s: dpte_row_height = %u\n", __func__, *dpte_row_height);
10057 dml_print("DML_DLG: %s: meta_row_height = %u\n", __func__, *meta_row_height);
10058 #endif
10059 }
10060
dml_get_soc_state_bounding_box(const struct soc_states_st * states,dml_uint_t state_idx)10061 static struct soc_state_bounding_box_st dml_get_soc_state_bounding_box(
10062 const struct soc_states_st *states,
10063 dml_uint_t state_idx)
10064 {
10065 dml_print("DML::%s: state_idx=%u (num_states=%u)\n", __func__, state_idx, states->num_states);
10066
10067 if (state_idx >= (dml_uint_t)states->num_states) {
10068 dml_print("DML::%s: ERROR: Invalid state_idx=%u! num_states=%u\n", __func__, state_idx, states->num_states);
10069 ASSERT(0);
10070 }
10071 return (states->state_array[state_idx]);
10072 }
10073
10074 /// @brief Copy the parameters to a calculation struct, it actually only need when the DML needs to have
10075 /// the intelligence to re-calculate when any of display cfg, bbox, or policy changes since last calculated.
10076 ///
cache_ip_soc_cfg(struct display_mode_lib_st * mode_lib,dml_uint_t state_idx)10077 static void cache_ip_soc_cfg(struct display_mode_lib_st *mode_lib,
10078 dml_uint_t state_idx)
10079 {
10080 mode_lib->ms.state_idx = state_idx;
10081 mode_lib->ms.max_state_idx = mode_lib->states.num_states - 1;
10082 mode_lib->ms.soc = mode_lib->soc;
10083 mode_lib->ms.ip = mode_lib->ip;
10084 mode_lib->ms.policy = mode_lib->policy;
10085 mode_lib->ms.state = dml_get_soc_state_bounding_box(&mode_lib->states, state_idx);
10086 mode_lib->ms.max_state = dml_get_soc_state_bounding_box(&mode_lib->states, mode_lib->states.num_states - 1);
10087 }
10088
cache_display_cfg(struct display_mode_lib_st * mode_lib,const struct dml_display_cfg_st * display_cfg)10089 static void cache_display_cfg(struct display_mode_lib_st *mode_lib,
10090 const struct dml_display_cfg_st *display_cfg)
10091 {
10092 mode_lib->ms.cache_display_cfg = *display_cfg;
10093 }
10094
fetch_socbb_params(struct display_mode_lib_st * mode_lib)10095 static void fetch_socbb_params(struct display_mode_lib_st *mode_lib)
10096 {
10097 struct soc_state_bounding_box_st *state = &mode_lib->ms.state;
10098
10099 // Default values, SOCCLK, DRAMSpeed, and FabricClock will be reassigned to the same state value in mode_check step
10100 // If UseMinimumRequiredDCFCLK is used, the DCFCLK will be the min dcflk for the mode support
10101 mode_lib->ms.SOCCLK = (dml_float_t)state->socclk_mhz;
10102 mode_lib->ms.DRAMSpeed = (dml_float_t)state->dram_speed_mts;
10103 mode_lib->ms.FabricClock = (dml_float_t)state->fabricclk_mhz;
10104 mode_lib->ms.DCFCLK = (dml_float_t)state->dcfclk_mhz;
10105 }
10106
10107 /// @brief Use display_cfg directly for mode_support calculation
10108 /// Calculated values and informational output are stored in mode_lib.vba data struct
10109 /// The display configuration is described with pipes struct and num_pipes
10110 /// This function is used when physical resource mapping is not finalized (for example,
10111 /// don't know how many pipes to represent a surface)
10112 /// @param mode_lib Contains the bounding box and policy setting.
10113 /// @param state_idx Power state index
10114 /// @param display_cfg Display configurations. A display
dml_mode_support(struct display_mode_lib_st * mode_lib,dml_uint_t state_idx,const struct dml_display_cfg_st * display_cfg)10115 dml_bool_t dml_mode_support(
10116 struct display_mode_lib_st *mode_lib,
10117 dml_uint_t state_idx,
10118 const struct dml_display_cfg_st *display_cfg)
10119 {
10120 dml_bool_t is_mode_support;
10121
10122 dml_print("DML::%s: ------------- START ----------\n", __func__);
10123 cache_ip_soc_cfg(mode_lib, state_idx);
10124 cache_display_cfg(mode_lib, display_cfg);
10125
10126 fetch_socbb_params(mode_lib);
10127
10128 dml_print("DML::%s: state_idx = %u\n", __func__, state_idx);
10129
10130 is_mode_support = dml_core_mode_support(mode_lib);
10131
10132 dml_print("DML::%s: is_mode_support = %u\n", __func__, is_mode_support);
10133 dml_print("DML::%s: ------------- DONE ----------\n", __func__);
10134 return is_mode_support;
10135 }
10136
10137 /// @Brief A function to calculate the programming values for DCN DCHUB (Assume mode is supported)
10138 /// The output will be stored in the mode_lib.mp (mode_program_st) data struct and those can be accessed via the getter functions
10139 /// Calculated values include: watermarks, dlg, rq reg, different clock frequency
10140 /// This function returns 1 when there is no error.
10141 /// Note: In this function, it is assumed that DCFCLK, SOCCLK freq are the state values, and mode_program will just use the DML calculated DPPCLK and DISPCLK
10142 /// @param mode_lib mode_lib data struct that house all the input/output/bbox and calculation values.
10143 /// @param state_idx Power state idx chosen
10144 /// @param display_cfg Display Configuration
10145 /// @param call_standalone Calling mode_programming without calling mode support. Some of the "support" struct member will be pre-calculated before doing mode programming
10146 /// TODO: Add clk_cfg input, could be useful for standalone mode
dml_mode_programming(struct display_mode_lib_st * mode_lib,dml_uint_t state_idx,const struct dml_display_cfg_st * display_cfg,bool call_standalone)10147 dml_bool_t dml_mode_programming(
10148 struct display_mode_lib_st *mode_lib,
10149 dml_uint_t state_idx,
10150 const struct dml_display_cfg_st *display_cfg,
10151 bool call_standalone)
10152 {
10153 struct dml_clk_cfg_st clk_cfg;
10154 memset(&clk_cfg, 0, sizeof(clk_cfg));
10155
10156 clk_cfg.dcfclk_option = dml_use_required_freq;
10157 clk_cfg.dispclk_option = dml_use_required_freq;
10158 for (dml_uint_t k = 0; k < __DML_NUM_PLANES__; ++k)
10159 clk_cfg.dppclk_option[k] = dml_use_required_freq;
10160
10161 dml_print("DML::%s: ------------- START ----------\n", __func__);
10162 dml_print("DML::%s: state_idx = %u\n", __func__, state_idx);
10163 dml_print("DML::%s: call_standalone = %u\n", __func__, call_standalone);
10164
10165 cache_ip_soc_cfg(mode_lib, state_idx);
10166 cache_display_cfg(mode_lib, display_cfg);
10167
10168 fetch_socbb_params(mode_lib);
10169 if (call_standalone) {
10170 mode_lib->ms.support.ImmediateFlipSupport = 1; // assume mode support say immediate flip ok at max state/combine
10171 dml_core_mode_support_partial(mode_lib);
10172 }
10173
10174 dml_core_mode_programming(mode_lib, &clk_cfg);
10175
10176 dml_print("DML::%s: ------------- DONE ----------\n", __func__);
10177 dml_print("DML::%s: PrefetchAndImmediateFlipSupported = %0d\n", __func__, mode_lib->mp.PrefetchAndImmediateFlipSupported);
10178 return mode_lib->mp.PrefetchAndImmediateFlipSupported;
10179 }
10180
mode_support_pwr_states(dml_uint_t * lowest_state_idx,struct display_mode_lib_st * mode_lib,const struct dml_display_cfg_st * display_cfg,dml_uint_t start_state_idx,dml_uint_t end_state_idx)10181 static dml_uint_t mode_support_pwr_states(
10182 dml_uint_t *lowest_state_idx,
10183 struct display_mode_lib_st *mode_lib,
10184 const struct dml_display_cfg_st *display_cfg,
10185 dml_uint_t start_state_idx,
10186 dml_uint_t end_state_idx)
10187 {
10188 dml_uint_t state_idx = 0;
10189 dml_bool_t mode_is_supported = 0;
10190 *lowest_state_idx = end_state_idx;
10191
10192 if (end_state_idx < start_state_idx)
10193 ASSERT(0);
10194
10195 if (end_state_idx >= mode_lib->states.num_states) // idx is 0-based
10196 ASSERT(0);
10197
10198 for (state_idx = start_state_idx; state_idx <= end_state_idx; state_idx++) {
10199 if (dml_mode_support(mode_lib, state_idx, display_cfg)) {
10200 dml_print("DML::%s: Mode is supported at power state_idx = %u\n", __func__, state_idx);
10201 mode_is_supported = 1;
10202 *lowest_state_idx = state_idx;
10203 break;
10204 }
10205 }
10206
10207 return mode_is_supported;
10208 }
10209
dml_mode_support_ex(struct dml_mode_support_ex_params_st * in_out_params)10210 dml_uint_t dml_mode_support_ex(struct dml_mode_support_ex_params_st *in_out_params)
10211 {
10212 dml_uint_t result;
10213
10214 result = mode_support_pwr_states(&in_out_params->out_lowest_state_idx,
10215 in_out_params->mode_lib,
10216 in_out_params->in_display_cfg,
10217 in_out_params->in_start_state_idx,
10218 in_out_params->mode_lib->states.num_states - 1);
10219
10220 if (result)
10221 *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support;
10222
10223 return result;
10224 }
10225
dml_get_is_phantom_pipe(struct display_mode_lib_st * mode_lib,dml_uint_t pipe_idx)10226 dml_bool_t dml_get_is_phantom_pipe(struct display_mode_lib_st *mode_lib, dml_uint_t pipe_idx)
10227 {
10228 dml_uint_t plane_idx = mode_lib->mp.pipe_plane[pipe_idx];
10229 dml_print("DML::%s: pipe_idx=%d UseMALLForPStateChange=%0d\n", __func__, pipe_idx, mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[plane_idx]);
10230 return (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[plane_idx] == dml_use_mall_pstate_change_phantom_pipe);
10231 }
10232
10233
10234 #define dml_get_per_surface_var_func(variable, type, interval_var) type dml_get_##variable(struct display_mode_lib_st *mode_lib, dml_uint_t surface_idx) \
10235 { \
10236 dml_uint_t plane_idx; \
10237 plane_idx = mode_lib->mp.pipe_plane[surface_idx]; \
10238 return (type) interval_var[plane_idx]; \
10239 }
10240
10241 #define dml_get_var_func(var, type, internal_var) type dml_get_##var(struct display_mode_lib_st *mode_lib) \
10242 { \
10243 return (type) internal_var; \
10244 }
10245
10246 dml_get_var_func(wm_urgent, dml_float_t, mode_lib->mp.Watermark.UrgentWatermark);
10247 dml_get_var_func(wm_stutter_exit, dml_float_t, mode_lib->mp.Watermark.StutterExitWatermark);
10248 dml_get_var_func(wm_stutter_enter_exit, dml_float_t, mode_lib->mp.Watermark.StutterEnterPlusExitWatermark);
10249 dml_get_var_func(wm_memory_trip, dml_float_t, mode_lib->mp.UrgentLatency);
10250 dml_get_var_func(wm_fclk_change, dml_float_t, mode_lib->mp.Watermark.FCLKChangeWatermark);
10251 dml_get_var_func(wm_usr_retraining, dml_float_t, mode_lib->mp.Watermark.USRRetrainingWatermark);
10252 dml_get_var_func(wm_dram_clock_change, dml_float_t, mode_lib->mp.Watermark.DRAMClockChangeWatermark);
10253 dml_get_var_func(wm_z8_stutter_enter_exit, dml_float_t, mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark);
10254 dml_get_var_func(wm_z8_stutter, dml_float_t, mode_lib->mp.Watermark.Z8StutterExitWatermark);
10255 dml_get_var_func(fraction_of_urgent_bandwidth, dml_float_t, mode_lib->mp.FractionOfUrgentBandwidth);
10256 dml_get_var_func(fraction_of_urgent_bandwidth_imm_flip, dml_float_t, mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip);
10257 dml_get_var_func(urgent_latency, dml_float_t, mode_lib->mp.UrgentLatency);
10258 dml_get_var_func(clk_dcf_deepsleep, dml_float_t, mode_lib->mp.DCFCLKDeepSleep);
10259 dml_get_var_func(wm_writeback_dram_clock_change, dml_float_t, mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark);
10260 dml_get_var_func(wm_writeback_urgent, dml_float_t, mode_lib->mp.Watermark.WritebackUrgentWatermark);
10261 dml_get_var_func(stutter_efficiency, dml_float_t, mode_lib->mp.StutterEfficiency);
10262 dml_get_var_func(stutter_efficiency_no_vblank, dml_float_t, mode_lib->mp.StutterEfficiencyNotIncludingVBlank);
10263 dml_get_var_func(stutter_efficiency_z8, dml_float_t, mode_lib->mp.Z8StutterEfficiency);
10264 dml_get_var_func(stutter_num_bursts_z8, dml_float_t, mode_lib->mp.Z8NumberOfStutterBurstsPerFrame);
10265 dml_get_var_func(stutter_period, dml_float_t, mode_lib->mp.StutterPeriod);
10266 dml_get_var_func(stutter_efficiency_z8_bestcase, dml_float_t, mode_lib->mp.Z8StutterEfficiencyBestCase);
10267 dml_get_var_func(stutter_num_bursts_z8_bestcase, dml_float_t, mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase);
10268 dml_get_var_func(stutter_period_bestcase, dml_float_t, mode_lib->mp.StutterPeriodBestCase);
10269 dml_get_var_func(urgent_extra_latency, dml_float_t, mode_lib->mp.UrgentExtraLatency);
10270 dml_get_var_func(fclk_change_latency, dml_float_t, mode_lib->mp.MaxActiveFCLKChangeLatencySupported);
10271 dml_get_var_func(dispclk_calculated, dml_float_t, mode_lib->mp.Dispclk_calculated);
10272 dml_get_var_func(total_data_read_bw, dml_float_t, mode_lib->mp.TotalDataReadBandwidth);
10273 dml_get_var_func(return_bw, dml_float_t, mode_lib->ms.ReturnBW);
10274 dml_get_var_func(return_dram_bw, dml_float_t, mode_lib->ms.ReturnDRAMBW);
10275 dml_get_var_func(tcalc, dml_float_t, mode_lib->mp.TCalc);
10276 dml_get_var_func(comp_buffer_size_kbytes, dml_uint_t, mode_lib->mp.CompressedBufferSizeInkByte);
10277 dml_get_var_func(pixel_chunk_size_in_kbyte, dml_uint_t, mode_lib->ms.ip.pixel_chunk_size_kbytes);
10278 dml_get_var_func(alpha_pixel_chunk_size_in_kbyte, dml_uint_t, mode_lib->ms.ip.alpha_pixel_chunk_size_kbytes);
10279 dml_get_var_func(meta_chunk_size_in_kbyte, dml_uint_t, mode_lib->ms.ip.meta_chunk_size_kbytes);
10280 dml_get_var_func(min_pixel_chunk_size_in_byte, dml_uint_t, mode_lib->ms.ip.min_pixel_chunk_size_bytes);
10281 dml_get_var_func(min_meta_chunk_size_in_byte, dml_uint_t, mode_lib->ms.ip.min_meta_chunk_size_bytes);
10282 dml_get_var_func(total_immediate_flip_bytes, dml_uint_t, mode_lib->mp.TotImmediateFlipBytes);
10283
10284 dml_get_per_surface_var_func(dsc_delay, dml_uint_t, mode_lib->mp.DSCDelay); // this is the dsc latency
10285 dml_get_per_surface_var_func(dppclk_calculated, dml_float_t, mode_lib->mp.Dppclk_calculated);
10286 dml_get_per_surface_var_func(dscclk_calculated, dml_float_t, mode_lib->mp.DSCCLK_calculated);
10287 dml_get_per_surface_var_func(min_ttu_vblank_in_us, dml_float_t, mode_lib->mp.MinTTUVBlank);
10288 dml_get_per_surface_var_func(vratio_prefetch_l, dml_float_t, mode_lib->mp.VRatioPrefetchY);
10289 dml_get_per_surface_var_func(vratio_prefetch_c, dml_float_t, mode_lib->mp.VRatioPrefetchC);
10290 dml_get_per_surface_var_func(dst_x_after_scaler, dml_uint_t, mode_lib->mp.DSTXAfterScaler);
10291 dml_get_per_surface_var_func(dst_y_after_scaler, dml_uint_t, mode_lib->mp.DSTYAfterScaler);
10292 dml_get_per_surface_var_func(dst_y_per_vm_vblank, dml_float_t, mode_lib->mp.DestinationLinesToRequestVMInVBlank);
10293 dml_get_per_surface_var_func(dst_y_per_row_vblank, dml_float_t, mode_lib->mp.DestinationLinesToRequestRowInVBlank);
10294 dml_get_per_surface_var_func(dst_y_prefetch, dml_float_t, mode_lib->mp.DestinationLinesForPrefetch);
10295 dml_get_per_surface_var_func(dst_y_per_vm_flip, dml_float_t, mode_lib->mp.DestinationLinesToRequestVMInImmediateFlip);
10296 dml_get_per_surface_var_func(dst_y_per_row_flip, dml_float_t, mode_lib->mp.DestinationLinesToRequestRowInImmediateFlip);
10297 dml_get_per_surface_var_func(dst_y_per_pte_row_nom_l, dml_float_t, mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L);
10298 dml_get_per_surface_var_func(dst_y_per_pte_row_nom_c, dml_float_t, mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C);
10299 dml_get_per_surface_var_func(dst_y_per_meta_row_nom_l, dml_float_t, mode_lib->mp.DST_Y_PER_META_ROW_NOM_L);
10300 dml_get_per_surface_var_func(dst_y_per_meta_row_nom_c, dml_float_t, mode_lib->mp.DST_Y_PER_META_ROW_NOM_C);
10301 dml_get_per_surface_var_func(refcyc_per_vm_group_vblank_in_us, dml_float_t, mode_lib->mp.TimePerVMGroupVBlank);
10302 dml_get_per_surface_var_func(refcyc_per_vm_group_flip_in_us, dml_float_t, mode_lib->mp.TimePerVMGroupFlip);
10303 dml_get_per_surface_var_func(refcyc_per_vm_req_vblank_in_us, dml_float_t, mode_lib->mp.TimePerVMRequestVBlank);
10304 dml_get_per_surface_var_func(refcyc_per_vm_req_flip_in_us, dml_float_t, mode_lib->mp.TimePerVMRequestFlip);
10305 dml_get_per_surface_var_func(refcyc_per_vm_dmdata_in_us, dml_float_t, mode_lib->mp.Tdmdl_vm);
10306 dml_get_per_surface_var_func(dmdata_dl_delta_in_us, dml_float_t, mode_lib->mp.Tdmdl);
10307 dml_get_per_surface_var_func(refcyc_per_line_delivery_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeLuma);
10308 dml_get_per_surface_var_func(refcyc_per_line_delivery_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeChroma);
10309 dml_get_per_surface_var_func(refcyc_per_line_delivery_pre_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch);
10310 dml_get_per_surface_var_func(refcyc_per_line_delivery_pre_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch);
10311 dml_get_per_surface_var_func(refcyc_per_req_delivery_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma);
10312 dml_get_per_surface_var_func(refcyc_per_req_delivery_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma);
10313 dml_get_per_surface_var_func(refcyc_per_req_delivery_pre_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch);
10314 dml_get_per_surface_var_func(refcyc_per_req_delivery_pre_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch);
10315 dml_get_per_surface_var_func(refcyc_per_cursor_req_delivery_in_us, dml_float_t, mode_lib->mp.CursorRequestDeliveryTime);
10316 dml_get_per_surface_var_func(refcyc_per_cursor_req_delivery_pre_in_us, dml_float_t, mode_lib->mp.CursorRequestDeliveryTimePrefetch);
10317 dml_get_per_surface_var_func(refcyc_per_meta_chunk_nom_l_in_us, dml_float_t, mode_lib->mp.TimePerMetaChunkNominal);
10318 dml_get_per_surface_var_func(refcyc_per_meta_chunk_nom_c_in_us, dml_float_t, mode_lib->mp.TimePerChromaMetaChunkNominal);
10319 dml_get_per_surface_var_func(refcyc_per_meta_chunk_vblank_l_in_us, dml_float_t, mode_lib->mp.TimePerMetaChunkVBlank);
10320 dml_get_per_surface_var_func(refcyc_per_meta_chunk_vblank_c_in_us, dml_float_t, mode_lib->mp.TimePerChromaMetaChunkVBlank);
10321 dml_get_per_surface_var_func(refcyc_per_meta_chunk_flip_l_in_us, dml_float_t, mode_lib->mp.TimePerMetaChunkFlip);
10322 dml_get_per_surface_var_func(refcyc_per_meta_chunk_flip_c_in_us, dml_float_t, mode_lib->mp.TimePerChromaMetaChunkFlip);
10323 dml_get_per_surface_var_func(refcyc_per_pte_group_nom_l_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_nom_luma);
10324 dml_get_per_surface_var_func(refcyc_per_pte_group_nom_c_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_nom_chroma);
10325 dml_get_per_surface_var_func(refcyc_per_pte_group_vblank_l_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_vblank_luma);
10326 dml_get_per_surface_var_func(refcyc_per_pte_group_vblank_c_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_vblank_chroma);
10327 dml_get_per_surface_var_func(refcyc_per_pte_group_flip_l_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_flip_luma);
10328 dml_get_per_surface_var_func(refcyc_per_pte_group_flip_c_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_flip_chroma);
10329 dml_get_per_surface_var_func(dpte_group_size_in_bytes, dml_uint_t, mode_lib->mp.dpte_group_bytes);
10330 dml_get_per_surface_var_func(vm_group_size_in_bytes, dml_uint_t, mode_lib->mp.vm_group_bytes);
10331 dml_get_per_surface_var_func(swath_height_l, dml_uint_t, mode_lib->ms.SwathHeightY);
10332 dml_get_per_surface_var_func(swath_height_c, dml_uint_t, mode_lib->ms.SwathHeightC);
10333 dml_get_per_surface_var_func(dpte_row_height_l, dml_uint_t, mode_lib->mp.dpte_row_height);
10334 dml_get_per_surface_var_func(dpte_row_height_c, dml_uint_t, mode_lib->mp.dpte_row_height_chroma);
10335 dml_get_per_surface_var_func(dpte_row_height_linear_l, dml_uint_t, mode_lib->mp.dpte_row_height_linear);
10336 dml_get_per_surface_var_func(dpte_row_height_linear_c, dml_uint_t, mode_lib->mp.dpte_row_height_linear_chroma);
10337 dml_get_per_surface_var_func(meta_row_height_l, dml_uint_t, mode_lib->mp.meta_row_height);
10338 dml_get_per_surface_var_func(meta_row_height_c, dml_uint_t, mode_lib->mp.meta_row_height_chroma);
10339
10340 dml_get_per_surface_var_func(vstartup_calculated, dml_uint_t, mode_lib->mp.VStartup);
10341 dml_get_per_surface_var_func(vupdate_offset, dml_uint_t, mode_lib->mp.VUpdateOffsetPix);
10342 dml_get_per_surface_var_func(vupdate_width, dml_uint_t, mode_lib->mp.VUpdateWidthPix);
10343 dml_get_per_surface_var_func(vready_offset, dml_uint_t, mode_lib->mp.VReadyOffsetPix);
10344 dml_get_per_surface_var_func(vready_at_or_after_vsync, dml_uint_t, mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC);
10345 dml_get_per_surface_var_func(min_dst_y_next_start, dml_uint_t, mode_lib->mp.MIN_DST_Y_NEXT_START);
10346 dml_get_per_surface_var_func(det_stored_buffer_size_l_bytes, dml_uint_t, mode_lib->ms.DETBufferSizeY);
10347 dml_get_per_surface_var_func(det_stored_buffer_size_c_bytes, dml_uint_t, mode_lib->ms.DETBufferSizeC);
10348 dml_get_per_surface_var_func(use_mall_for_static_screen, dml_uint_t, mode_lib->mp.UsesMALLForStaticScreen);
10349 dml_get_per_surface_var_func(surface_size_for_mall, dml_uint_t, mode_lib->mp.SurfaceSizeInTheMALL);
10350 dml_get_per_surface_var_func(dcc_max_uncompressed_block_l, dml_uint_t, mode_lib->mp.DCCYMaxUncompressedBlock);
10351 dml_get_per_surface_var_func(dcc_max_compressed_block_l, dml_uint_t, mode_lib->mp.DCCYMaxCompressedBlock);
10352 dml_get_per_surface_var_func(dcc_independent_block_l, dml_uint_t, mode_lib->mp.DCCYIndependentBlock);
10353 dml_get_per_surface_var_func(dcc_max_uncompressed_block_c, dml_uint_t, mode_lib->mp.DCCCMaxUncompressedBlock);
10354 dml_get_per_surface_var_func(dcc_max_compressed_block_c, dml_uint_t, mode_lib->mp.DCCCMaxCompressedBlock);
10355 dml_get_per_surface_var_func(dcc_independent_block_c, dml_uint_t, mode_lib->mp.DCCCIndependentBlock);
10356 dml_get_per_surface_var_func(max_active_dram_clock_change_latency_supported, dml_uint_t, mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported);
10357 dml_get_per_surface_var_func(pte_buffer_mode, dml_uint_t, mode_lib->mp.PTE_BUFFER_MODE);
10358 dml_get_per_surface_var_func(bigk_fragment_size, dml_uint_t, mode_lib->mp.BIGK_FRAGMENT_SIZE);
10359 dml_get_per_surface_var_func(dpte_bytes_per_row, dml_uint_t, mode_lib->mp.PixelPTEBytesPerRow);
10360 dml_get_per_surface_var_func(meta_bytes_per_row, dml_uint_t, mode_lib->mp.MetaRowByte);
10361 dml_get_per_surface_var_func(det_buffer_size_kbytes, dml_uint_t, mode_lib->ms.DETBufferSizeInKByte);
10362
10363