1 /*
2 * Copyright 2022 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25 #include "display_mode_vba_util_32.h"
26 #include "../dml_inline_defs.h"
27 #include "display_mode_vba_32.h"
28 #include "../display_mode_lib.h"
29
30 #define DCN32_MAX_FMT_420_BUFFER_WIDTH 4096
31
dml32_dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat,enum output_encoder_class Output)32 unsigned int dml32_dscceComputeDelay(
33 unsigned int bpc,
34 double BPP,
35 unsigned int sliceWidth,
36 unsigned int numSlices,
37 enum output_format_class pixelFormat,
38 enum output_encoder_class Output)
39 {
40 // valid bpc = source bits per component in the set of {8, 10, 12}
41 // valid bpp = increments of 1/16 of a bit
42 // min = 6/7/8 in N420/N422/444, respectively
43 // max = such that compression is 1:1
44 //valid sliceWidth = number of pixels per slice line,
45 // must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
46 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
47 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
48
49 // fixed value
50 unsigned int rcModelSize = 8192;
51
52 // N422/N420 operate at 2 pixels per clock
53 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
54 Delay, pixels;
55
56 if (pixelFormat == dm_420)
57 pixelsPerClock = 2;
58 else if (pixelFormat == dm_n422)
59 pixelsPerClock = 2;
60 // #all other modes operate at 1 pixel per clock
61 else
62 pixelsPerClock = 1;
63
64 //initial transmit delay as per PPS
65 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
66
67 //compute ssm delay
68 if (bpc == 8)
69 D = 81;
70 else if (bpc == 10)
71 D = 89;
72 else
73 D = 113;
74
75 //divide by pixel per cycle to compute slice width as seen by DSC
76 w = sliceWidth / pixelsPerClock;
77
78 //422 mode has an additional cycle of delay
79 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
80 s = 0;
81 else
82 s = 1;
83
84 //main calculation for the dscce
85 ix = initalXmitDelay + 45;
86 wx = (w + 2) / 3;
87 p = 3 * wx - w;
88 l0 = ix / w;
89 a = ix + p * l0;
90 ax = (a + 2) / 3 + D + 6 + 1;
91 L = (ax + wx - 1) / wx;
92 if ((ix % w) == 0 && p != 0)
93 lstall = 1;
94 else
95 lstall = 0;
96 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
97
98 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
99 pixels = Delay * 3 * pixelsPerClock;
100
101 #ifdef __DML_VBA_DEBUG__
102 dml_print("DML::%s: bpc: %d\n", __func__, bpc);
103 dml_print("DML::%s: BPP: %f\n", __func__, BPP);
104 dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth);
105 dml_print("DML::%s: numSlices: %d\n", __func__, numSlices);
106 dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat);
107 dml_print("DML::%s: Output: %d\n", __func__, Output);
108 dml_print("DML::%s: pixels: %d\n", __func__, pixels);
109 #endif
110
111 return pixels;
112 }
113
dml32_dscComputeDelay(enum output_format_class pixelFormat,enum output_encoder_class Output)114 unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
115 {
116 unsigned int Delay = 0;
117
118 if (pixelFormat == dm_420) {
119 // sfr
120 Delay = Delay + 2;
121 // dsccif
122 Delay = Delay + 0;
123 // dscc - input deserializer
124 Delay = Delay + 3;
125 // dscc gets pixels every other cycle
126 Delay = Delay + 2;
127 // dscc - input cdc fifo
128 Delay = Delay + 12;
129 // dscc gets pixels every other cycle
130 Delay = Delay + 13;
131 // dscc - cdc uncertainty
132 Delay = Delay + 2;
133 // dscc - output cdc fifo
134 Delay = Delay + 7;
135 // dscc gets pixels every other cycle
136 Delay = Delay + 3;
137 // dscc - cdc uncertainty
138 Delay = Delay + 2;
139 // dscc - output serializer
140 Delay = Delay + 1;
141 // sft
142 Delay = Delay + 1;
143 } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
144 // sfr
145 Delay = Delay + 2;
146 // dsccif
147 Delay = Delay + 1;
148 // dscc - input deserializer
149 Delay = Delay + 5;
150 // dscc - input cdc fifo
151 Delay = Delay + 25;
152 // dscc - cdc uncertainty
153 Delay = Delay + 2;
154 // dscc - output cdc fifo
155 Delay = Delay + 10;
156 // dscc - cdc uncertainty
157 Delay = Delay + 2;
158 // dscc - output serializer
159 Delay = Delay + 1;
160 // sft
161 Delay = Delay + 1;
162 } else {
163 // sfr
164 Delay = Delay + 2;
165 // dsccif
166 Delay = Delay + 0;
167 // dscc - input deserializer
168 Delay = Delay + 3;
169 // dscc - input cdc fifo
170 Delay = Delay + 12;
171 // dscc - cdc uncertainty
172 Delay = Delay + 2;
173 // dscc - output cdc fifo
174 Delay = Delay + 7;
175 // dscc - output serializer
176 Delay = Delay + 1;
177 // dscc - cdc uncertainty
178 Delay = Delay + 2;
179 // sft
180 Delay = Delay + 1;
181 }
182
183 return Delay;
184 }
185
186
IsVertical(enum dm_rotation_angle Scan)187 bool IsVertical(enum dm_rotation_angle Scan)
188 {
189 bool is_vert = false;
190
191 if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
192 is_vert = true;
193 else
194 is_vert = false;
195 return is_vert;
196 }
197
dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(double HRatio,double HRatioChroma,double VRatio,double VRatioChroma,double MaxDCHUBToPSCLThroughput,double MaxPSCLToLBThroughput,double PixelClock,enum source_format_class SourcePixelFormat,unsigned int HTaps,unsigned int HTapsChroma,unsigned int VTaps,unsigned int VTapsChroma,double * PSCL_THROUGHPUT,double * PSCL_THROUGHPUT_CHROMA,double * DPPCLKUsingSingleDPP)198 void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
199 double HRatio,
200 double HRatioChroma,
201 double VRatio,
202 double VRatioChroma,
203 double MaxDCHUBToPSCLThroughput,
204 double MaxPSCLToLBThroughput,
205 double PixelClock,
206 enum source_format_class SourcePixelFormat,
207 unsigned int HTaps,
208 unsigned int HTapsChroma,
209 unsigned int VTaps,
210 unsigned int VTapsChroma,
211
212 /* output */
213 double *PSCL_THROUGHPUT,
214 double *PSCL_THROUGHPUT_CHROMA,
215 double *DPPCLKUsingSingleDPP)
216 {
217 double DPPCLKUsingSingleDPPLuma;
218 double DPPCLKUsingSingleDPPChroma;
219
220 if (HRatio > 1) {
221 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
222 dml_ceil((double) HTaps / 6.0, 1.0));
223 } else {
224 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
225 }
226
227 DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
228 *PSCL_THROUGHPUT, 1);
229
230 if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
231 DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
232
233 if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
234 SourcePixelFormat != dm_rgbe_alpha)) {
235 *PSCL_THROUGHPUT_CHROMA = 0;
236 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
237 } else {
238 if (HRatioChroma > 1) {
239 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
240 HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
241 } else {
242 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
243 }
244 DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
245 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
246 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
247 DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
248 *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
249 }
250 }
251
dml32_CalculateBytePerPixelAndBlockSizes(enum source_format_class SourcePixelFormat,enum dm_swizzle_mode SurfaceTiling,unsigned int * BytePerPixelY,unsigned int * BytePerPixelC,double * BytePerPixelDETY,double * BytePerPixelDETC,unsigned int * BlockHeight256BytesY,unsigned int * BlockHeight256BytesC,unsigned int * BlockWidth256BytesY,unsigned int * BlockWidth256BytesC,unsigned int * MacroTileHeightY,unsigned int * MacroTileHeightC,unsigned int * MacroTileWidthY,unsigned int * MacroTileWidthC)252 void dml32_CalculateBytePerPixelAndBlockSizes(
253 enum source_format_class SourcePixelFormat,
254 enum dm_swizzle_mode SurfaceTiling,
255
256 /* Output */
257 unsigned int *BytePerPixelY,
258 unsigned int *BytePerPixelC,
259 double *BytePerPixelDETY,
260 double *BytePerPixelDETC,
261 unsigned int *BlockHeight256BytesY,
262 unsigned int *BlockHeight256BytesC,
263 unsigned int *BlockWidth256BytesY,
264 unsigned int *BlockWidth256BytesC,
265 unsigned int *MacroTileHeightY,
266 unsigned int *MacroTileHeightC,
267 unsigned int *MacroTileWidthY,
268 unsigned int *MacroTileWidthC)
269 {
270 if (SourcePixelFormat == dm_444_64) {
271 *BytePerPixelDETY = 8;
272 *BytePerPixelDETC = 0;
273 *BytePerPixelY = 8;
274 *BytePerPixelC = 0;
275 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
276 *BytePerPixelDETY = 4;
277 *BytePerPixelDETC = 0;
278 *BytePerPixelY = 4;
279 *BytePerPixelC = 0;
280 } else if (SourcePixelFormat == dm_444_16) {
281 *BytePerPixelDETY = 2;
282 *BytePerPixelDETC = 0;
283 *BytePerPixelY = 2;
284 *BytePerPixelC = 0;
285 } else if (SourcePixelFormat == dm_444_8) {
286 *BytePerPixelDETY = 1;
287 *BytePerPixelDETC = 0;
288 *BytePerPixelY = 1;
289 *BytePerPixelC = 0;
290 } else if (SourcePixelFormat == dm_rgbe_alpha) {
291 *BytePerPixelDETY = 4;
292 *BytePerPixelDETC = 1;
293 *BytePerPixelY = 4;
294 *BytePerPixelC = 1;
295 } else if (SourcePixelFormat == dm_420_8) {
296 *BytePerPixelDETY = 1;
297 *BytePerPixelDETC = 2;
298 *BytePerPixelY = 1;
299 *BytePerPixelC = 2;
300 } else if (SourcePixelFormat == dm_420_12) {
301 *BytePerPixelDETY = 2;
302 *BytePerPixelDETC = 4;
303 *BytePerPixelY = 2;
304 *BytePerPixelC = 4;
305 } else {
306 *BytePerPixelDETY = 4.0 / 3;
307 *BytePerPixelDETC = 8.0 / 3;
308 *BytePerPixelY = 2;
309 *BytePerPixelC = 4;
310 }
311 #ifdef __DML_VBA_DEBUG__
312 dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat);
313 dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
314 dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
315 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, *BytePerPixelY);
316 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, *BytePerPixelC);
317 #endif
318 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
319 || SourcePixelFormat == dm_444_16
320 || SourcePixelFormat == dm_444_8
321 || SourcePixelFormat == dm_mono_16
322 || SourcePixelFormat == dm_mono_8
323 || SourcePixelFormat == dm_rgbe)) {
324 if (SurfaceTiling == dm_sw_linear)
325 *BlockHeight256BytesY = 1;
326 else if (SourcePixelFormat == dm_444_64)
327 *BlockHeight256BytesY = 4;
328 else if (SourcePixelFormat == dm_444_8)
329 *BlockHeight256BytesY = 16;
330 else
331 *BlockHeight256BytesY = 8;
332
333 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
334 *BlockHeight256BytesC = 0;
335 *BlockWidth256BytesC = 0;
336 } else {
337 if (SurfaceTiling == dm_sw_linear) {
338 *BlockHeight256BytesY = 1;
339 *BlockHeight256BytesC = 1;
340 } else if (SourcePixelFormat == dm_rgbe_alpha) {
341 *BlockHeight256BytesY = 8;
342 *BlockHeight256BytesC = 16;
343 } else if (SourcePixelFormat == dm_420_8) {
344 *BlockHeight256BytesY = 16;
345 *BlockHeight256BytesC = 8;
346 } else {
347 *BlockHeight256BytesY = 8;
348 *BlockHeight256BytesC = 8;
349 }
350 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
351 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
352 }
353 #ifdef __DML_VBA_DEBUG__
354 dml_print("DML::%s: BlockWidth256BytesY = %d\n", __func__, *BlockWidth256BytesY);
355 dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY);
356 dml_print("DML::%s: BlockWidth256BytesC = %d\n", __func__, *BlockWidth256BytesC);
357 dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC);
358 #endif
359
360 if (SurfaceTiling == dm_sw_linear) {
361 *MacroTileHeightY = *BlockHeight256BytesY;
362 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
363 *MacroTileHeightC = *BlockHeight256BytesC;
364 if (*MacroTileHeightC == 0)
365 *MacroTileWidthC = 0;
366 else
367 *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
368 } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
369 SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
370 *MacroTileHeightY = 16 * *BlockHeight256BytesY;
371 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
372 *MacroTileHeightC = 16 * *BlockHeight256BytesC;
373 if (*MacroTileHeightC == 0)
374 *MacroTileWidthC = 0;
375 else
376 *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
377 } else {
378 *MacroTileHeightY = 32 * *BlockHeight256BytesY;
379 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
380 *MacroTileHeightC = 32 * *BlockHeight256BytesC;
381 if (*MacroTileHeightC == 0)
382 *MacroTileWidthC = 0;
383 else
384 *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
385 }
386
387 #ifdef __DML_VBA_DEBUG__
388 dml_print("DML::%s: MacroTileWidthY = %d\n", __func__, *MacroTileWidthY);
389 dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY);
390 dml_print("DML::%s: MacroTileWidthC = %d\n", __func__, *MacroTileWidthC);
391 dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC);
392 #endif
393 } // CalculateBytePerPixelAndBlockSizes
394
dml32_CalculateSwathAndDETConfiguration(unsigned int DETSizeOverride[],enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],unsigned int ConfigReturnBufferSizeInKByte,unsigned int MaxTotalDETInKByte,unsigned int MinCompressedBufferSizeInKByte,double ForceSingleDPP,unsigned int NumberOfActiveSurfaces,unsigned int nomDETInKByte,enum unbounded_requesting_policy UseUnboundedRequestingFinal,bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,unsigned int PixelChunkSizeKBytes,unsigned int ROBSizeKBytes,unsigned int CompressedBufferSegmentSizeInkByteFinal,enum output_encoder_class Output[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double MaximumSwathWidthLuma[],double MaximumSwathWidthChroma[],enum dm_rotation_angle SourceRotation[],bool ViewportStationary[],enum source_format_class SourcePixelFormat[],enum dm_swizzle_mode SurfaceTiling[],unsigned int ViewportWidth[],unsigned int ViewportHeight[],unsigned int ViewportXStart[],unsigned int ViewportYStart[],unsigned int ViewportXStartC[],unsigned int ViewportYStartC[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],enum odm_combine_mode ODMMode[],unsigned int BlendingAndTiming[],unsigned int BytePerPixY[],unsigned int BytePerPixC[],double BytePerPixDETY[],double BytePerPixDETC[],unsigned int HActive[],double HRatio[],double HRatioChroma[],unsigned int DPPPerSurface[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],double SwathWidth[],double SwathWidthChroma[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],unsigned int DETBufferSizeInKByte[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],bool * UnboundedRequestEnabled,unsigned int * CompressedBufferSizeInkByte,unsigned int * CompBufReservedSpaceKBytes,bool * CompBufReservedSpaceNeedAdjustment,bool ViewportSizeSupportPerSurface[],bool * ViewportSizeSupport)395 void dml32_CalculateSwathAndDETConfiguration(
396 unsigned int DETSizeOverride[],
397 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
398 unsigned int ConfigReturnBufferSizeInKByte,
399 unsigned int MaxTotalDETInKByte,
400 unsigned int MinCompressedBufferSizeInKByte,
401 double ForceSingleDPP,
402 unsigned int NumberOfActiveSurfaces,
403 unsigned int nomDETInKByte,
404 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
405 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
406 unsigned int PixelChunkSizeKBytes,
407 unsigned int ROBSizeKBytes,
408 unsigned int CompressedBufferSegmentSizeInkByteFinal,
409 enum output_encoder_class Output[],
410 double ReadBandwidthLuma[],
411 double ReadBandwidthChroma[],
412 double MaximumSwathWidthLuma[],
413 double MaximumSwathWidthChroma[],
414 enum dm_rotation_angle SourceRotation[],
415 bool ViewportStationary[],
416 enum source_format_class SourcePixelFormat[],
417 enum dm_swizzle_mode SurfaceTiling[],
418 unsigned int ViewportWidth[],
419 unsigned int ViewportHeight[],
420 unsigned int ViewportXStart[],
421 unsigned int ViewportYStart[],
422 unsigned int ViewportXStartC[],
423 unsigned int ViewportYStartC[],
424 unsigned int SurfaceWidthY[],
425 unsigned int SurfaceWidthC[],
426 unsigned int SurfaceHeightY[],
427 unsigned int SurfaceHeightC[],
428 unsigned int Read256BytesBlockHeightY[],
429 unsigned int Read256BytesBlockHeightC[],
430 unsigned int Read256BytesBlockWidthY[],
431 unsigned int Read256BytesBlockWidthC[],
432 enum odm_combine_mode ODMMode[],
433 unsigned int BlendingAndTiming[],
434 unsigned int BytePerPixY[],
435 unsigned int BytePerPixC[],
436 double BytePerPixDETY[],
437 double BytePerPixDETC[],
438 unsigned int HActive[],
439 double HRatio[],
440 double HRatioChroma[],
441 unsigned int DPPPerSurface[],
442
443 /* Output */
444 unsigned int swath_width_luma_ub[],
445 unsigned int swath_width_chroma_ub[],
446 double SwathWidth[],
447 double SwathWidthChroma[],
448 unsigned int SwathHeightY[],
449 unsigned int SwathHeightC[],
450 unsigned int DETBufferSizeInKByte[],
451 unsigned int DETBufferSizeY[],
452 unsigned int DETBufferSizeC[],
453 bool *UnboundedRequestEnabled,
454 unsigned int *CompressedBufferSizeInkByte,
455 unsigned int *CompBufReservedSpaceKBytes,
456 bool *CompBufReservedSpaceNeedAdjustment,
457 bool ViewportSizeSupportPerSurface[],
458 bool *ViewportSizeSupport)
459 {
460 unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
461 unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
462 unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
463 unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
464 unsigned int RoundedUpSwathSizeBytesY;
465 unsigned int RoundedUpSwathSizeBytesC;
466 double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
467 double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
468 unsigned int k;
469 unsigned int TotalActiveDPP = 0;
470 bool NoChromaSurfaces = true;
471 unsigned int DETBufferSizeInKByteForSwathCalculation;
472
473 #ifdef __DML_VBA_DEBUG__
474 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
475 dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes);
476 dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes);
477 #endif
478 dml32_CalculateSwathWidth(ForceSingleDPP,
479 NumberOfActiveSurfaces,
480 SourcePixelFormat,
481 SourceRotation,
482 ViewportStationary,
483 ViewportWidth,
484 ViewportHeight,
485 ViewportXStart,
486 ViewportYStart,
487 ViewportXStartC,
488 ViewportYStartC,
489 SurfaceWidthY,
490 SurfaceWidthC,
491 SurfaceHeightY,
492 SurfaceHeightC,
493 ODMMode,
494 BytePerPixY,
495 BytePerPixC,
496 Read256BytesBlockHeightY,
497 Read256BytesBlockHeightC,
498 Read256BytesBlockWidthY,
499 Read256BytesBlockWidthC,
500 BlendingAndTiming,
501 HActive,
502 HRatio,
503 DPPPerSurface,
504
505 /* Output */
506 SwathWidthdoubleDPP,
507 SwathWidthdoubleDPPChroma,
508 SwathWidth,
509 SwathWidthChroma,
510 MaximumSwathHeightY,
511 MaximumSwathHeightC,
512 swath_width_luma_ub,
513 swath_width_chroma_ub);
514
515 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
516 RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
517 RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
518 #ifdef __DML_VBA_DEBUG__
519 dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
520 dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
521 dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
522 dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]);
523 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
524 RoundedUpMaxSwathSizeBytesY[k]);
525 dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
526 dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
527 dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]);
528 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
529 RoundedUpMaxSwathSizeBytesC[k]);
530 #endif
531
532 if (SourcePixelFormat[k] == dm_420_10) {
533 RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256);
534 RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256);
535 }
536 }
537
538 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
539 TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
540 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
541 SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
542 NoChromaSurfaces = false;
543 }
544 }
545
546 // By default, just set the reserved space to 2 pixel chunks size
547 *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2;
548
549 // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
550 // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
551 // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
552 *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512);
553
554 if (*CompBufReservedSpaceNeedAdjustment == 1) {
555 *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512;
556 }
557
558 #ifdef __DML_VBA_DEBUG__
559 dml_print("DML::%s: CompBufReservedSpaceKBytes = %d\n", __func__, *CompBufReservedSpaceKBytes);
560 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment);
561 #endif
562
563 *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
564
565 dml32_CalculateDETBufferSize(DETSizeOverride,
566 UseMALLForPStateChange,
567 ForceSingleDPP,
568 NumberOfActiveSurfaces,
569 *UnboundedRequestEnabled,
570 nomDETInKByte,
571 MaxTotalDETInKByte,
572 ConfigReturnBufferSizeInKByte,
573 MinCompressedBufferSizeInKByte,
574 CompressedBufferSegmentSizeInkByteFinal,
575 SourcePixelFormat,
576 ReadBandwidthLuma,
577 ReadBandwidthChroma,
578 RoundedUpMaxSwathSizeBytesY,
579 RoundedUpMaxSwathSizeBytesC,
580 DPPPerSurface,
581
582 /* Output */
583 DETBufferSizeInKByte, // per hubp pipe
584 CompressedBufferSizeInkByte);
585
586 #ifdef __DML_VBA_DEBUG__
587 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
588 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
589 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
590 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
591 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
592 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
593 #endif
594
595 *ViewportSizeSupport = true;
596 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
597
598 DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
599 dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
600 #ifdef __DML_VBA_DEBUG__
601 dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
602 DETBufferSizeInKByteForSwathCalculation);
603 #endif
604
605 if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <=
606 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
607 SwathHeightY[k] = MaximumSwathHeightY[k];
608 SwathHeightC[k] = MaximumSwathHeightC[k];
609 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
610 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
611 } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
612 RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <=
613 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
614 SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
615 SwathHeightC[k] = MaximumSwathHeightC[k];
616 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
617 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
618 } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
619 RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <=
620 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
621 SwathHeightY[k] = MaximumSwathHeightY[k];
622 SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
623 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
624 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
625 } else {
626 SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
627 SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
628 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
629 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
630 }
631
632 if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 >
633 DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
634 || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
635 SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
636 *ViewportSizeSupport = false;
637 ViewportSizeSupportPerSurface[k] = false;
638 } else {
639 ViewportSizeSupportPerSurface[k] = true;
640 }
641
642 if (SwathHeightC[k] == 0) {
643 #ifdef __DML_VBA_DEBUG__
644 dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k);
645 #endif
646 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
647 DETBufferSizeC[k] = 0;
648 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
649 #ifdef __DML_VBA_DEBUG__
650 dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
651 #endif
652 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
653 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
654 } else {
655 #ifdef __DML_VBA_DEBUG__
656 dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
657 #endif
658 DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024);
659 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
660 }
661
662 #ifdef __DML_VBA_DEBUG__
663 dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
664 dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
665 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
666 k, RoundedUpMaxSwathSizeBytesY[k]);
667 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
668 k, RoundedUpMaxSwathSizeBytesC[k]);
669 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY);
670 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC);
671 dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
672 dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
673 dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
674 dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,
675 ViewportSizeSupportPerSurface[k]);
676 #endif
677
678 }
679 } // CalculateSwathAndDETConfiguration
680
dml32_CalculateSwathWidth(bool ForceSingleDPP,unsigned int NumberOfActiveSurfaces,enum source_format_class SourcePixelFormat[],enum dm_rotation_angle SourceRotation[],bool ViewportStationary[],unsigned int ViewportWidth[],unsigned int ViewportHeight[],unsigned int ViewportXStart[],unsigned int ViewportYStart[],unsigned int ViewportXStartC[],unsigned int ViewportYStartC[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],enum odm_combine_mode ODMMode[],unsigned int BytePerPixY[],unsigned int BytePerPixC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],unsigned int BlendingAndTiming[],unsigned int HActive[],double HRatio[],unsigned int DPPPerSurface[],double SwathWidthdoubleDPPY[],double SwathWidthdoubleDPPC[],double SwathWidthY[],double SwathWidthC[],unsigned int MaximumSwathHeightY[],unsigned int MaximumSwathHeightC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[])681 void dml32_CalculateSwathWidth(
682 bool ForceSingleDPP,
683 unsigned int NumberOfActiveSurfaces,
684 enum source_format_class SourcePixelFormat[],
685 enum dm_rotation_angle SourceRotation[],
686 bool ViewportStationary[],
687 unsigned int ViewportWidth[],
688 unsigned int ViewportHeight[],
689 unsigned int ViewportXStart[],
690 unsigned int ViewportYStart[],
691 unsigned int ViewportXStartC[],
692 unsigned int ViewportYStartC[],
693 unsigned int SurfaceWidthY[],
694 unsigned int SurfaceWidthC[],
695 unsigned int SurfaceHeightY[],
696 unsigned int SurfaceHeightC[],
697 enum odm_combine_mode ODMMode[],
698 unsigned int BytePerPixY[],
699 unsigned int BytePerPixC[],
700 unsigned int Read256BytesBlockHeightY[],
701 unsigned int Read256BytesBlockHeightC[],
702 unsigned int Read256BytesBlockWidthY[],
703 unsigned int Read256BytesBlockWidthC[],
704 unsigned int BlendingAndTiming[],
705 unsigned int HActive[],
706 double HRatio[],
707 unsigned int DPPPerSurface[],
708
709 /* Output */
710 double SwathWidthdoubleDPPY[],
711 double SwathWidthdoubleDPPC[],
712 double SwathWidthY[], // per-pipe
713 double SwathWidthC[], // per-pipe
714 unsigned int MaximumSwathHeightY[],
715 unsigned int MaximumSwathHeightC[],
716 unsigned int swath_width_luma_ub[], // per-pipe
717 unsigned int swath_width_chroma_ub[]) // per-pipe
718 {
719 unsigned int k, j;
720 enum odm_combine_mode MainSurfaceODMMode;
721
722 unsigned int surface_width_ub_l;
723 unsigned int surface_height_ub_l;
724 unsigned int surface_width_ub_c = 0;
725 unsigned int surface_height_ub_c = 0;
726
727 #ifdef __DML_VBA_DEBUG__
728 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
729 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
730 #endif
731
732 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
733 if (!IsVertical(SourceRotation[k]))
734 SwathWidthdoubleDPPY[k] = ViewportWidth[k];
735 else
736 SwathWidthdoubleDPPY[k] = ViewportHeight[k];
737
738 #ifdef __DML_VBA_DEBUG__
739 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
740 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
741 #endif
742
743 MainSurfaceODMMode = ODMMode[k];
744 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
745 if (BlendingAndTiming[k] == j)
746 MainSurfaceODMMode = ODMMode[j];
747 }
748
749 if (ForceSingleDPP) {
750 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
751 } else {
752 if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
753 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
754 dml_round(HActive[k] / 4.0 * HRatio[k]));
755 } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
756 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
757 dml_round(HActive[k] / 2.0 * HRatio[k]));
758 } else if (DPPPerSurface[k] == 2) {
759 SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
760 } else {
761 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
762 }
763 }
764
765 #ifdef __DML_VBA_DEBUG__
766 dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]);
767 dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]);
768 dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode);
769 dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]);
770 dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]);
771 #endif
772
773 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
774 SourcePixelFormat[k] == dm_420_12) {
775 SwathWidthC[k] = SwathWidthY[k] / 2;
776 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
777 } else {
778 SwathWidthC[k] = SwathWidthY[k];
779 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
780 }
781
782 if (ForceSingleDPP == true) {
783 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
784 SwathWidthC[k] = SwathWidthdoubleDPPC[k];
785 }
786
787 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
788 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
789
790 if (!IsVertical(SourceRotation[k])) {
791 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
792 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
793 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
794 swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
795 dml_floor(ViewportXStart[k] +
796 SwathWidthY[k] +
797 Read256BytesBlockWidthY[k] - 1,
798 Read256BytesBlockWidthY[k]) -
799 dml_floor(ViewportXStart[k],
800 Read256BytesBlockWidthY[k]));
801 } else {
802 swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
803 dml_ceil(SwathWidthY[k] - 1,
804 Read256BytesBlockWidthY[k]) +
805 Read256BytesBlockWidthY[k]);
806 }
807 if (BytePerPixC[k] > 0) {
808 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
809 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
810 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
811 dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
812 Read256BytesBlockWidthC[k] - 1,
813 Read256BytesBlockWidthC[k]) -
814 dml_floor(ViewportXStartC[k],
815 Read256BytesBlockWidthC[k]));
816 } else {
817 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
818 dml_ceil(SwathWidthC[k] - 1,
819 Read256BytesBlockWidthC[k]) +
820 Read256BytesBlockWidthC[k]);
821 }
822 } else {
823 swath_width_chroma_ub[k] = 0;
824 }
825 } else {
826 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
827 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
828
829 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
830 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] +
831 SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
832 Read256BytesBlockHeightY[k]) -
833 dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]));
834 } else {
835 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1,
836 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
837 }
838 if (BytePerPixC[k] > 0) {
839 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
840 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
841 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
842 dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
843 Read256BytesBlockHeightC[k] - 1,
844 Read256BytesBlockHeightC[k]) -
845 dml_floor(ViewportYStartC[k],
846 Read256BytesBlockHeightC[k]));
847 } else {
848 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
849 dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) +
850 Read256BytesBlockHeightC[k]);
851 }
852 } else {
853 swath_width_chroma_ub[k] = 0;
854 }
855 }
856
857 #ifdef __DML_VBA_DEBUG__
858 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
859 dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
860 dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
861 dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
862 dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
863 dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
864 dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
865 dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
866 dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
867 dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
868 dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
869 dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
870 dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
871 dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]);
872 #endif
873
874 }
875 } // CalculateSwathWidth
876
dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,unsigned int TotalNumberOfActiveDPP,bool NoChroma,enum output_encoder_class Output,enum dm_swizzle_mode SurfaceTiling,bool CompBufReservedSpaceNeedAdjustment,bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)877 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
878 unsigned int TotalNumberOfActiveDPP,
879 bool NoChroma,
880 enum output_encoder_class Output,
881 enum dm_swizzle_mode SurfaceTiling,
882 bool CompBufReservedSpaceNeedAdjustment,
883 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
884 {
885 bool ret_val = false;
886
887 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
888 TotalNumberOfActiveDPP == 1 && NoChroma);
889 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
890 ret_val = false;
891
892 if (SurfaceTiling == dm_sw_linear)
893 ret_val = false;
894
895 if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
896 ret_val = false;
897
898 #ifdef __DML_VBA_DEBUG__
899 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, CompBufReservedSpaceNeedAdjustment);
900 dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = %d\n", __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
901 dml_print("DML::%s: ret_val = %d\n", __func__, ret_val);
902 #endif
903
904 return (ret_val);
905 }
906
dml32_CalculateDETBufferSize(unsigned int DETSizeOverride[],enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],bool ForceSingleDPP,unsigned int NumberOfActiveSurfaces,bool UnboundedRequestEnabled,unsigned int nomDETInKByte,unsigned int MaxTotalDETInKByte,unsigned int ConfigReturnBufferSizeInKByte,unsigned int MinCompressedBufferSizeInKByte,unsigned int CompressedBufferSegmentSizeInkByteFinal,enum source_format_class SourcePixelFormat[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int RoundedUpMaxSwathSizeBytesY[],unsigned int RoundedUpMaxSwathSizeBytesC[],unsigned int DPPPerSurface[],unsigned int DETBufferSizeInKByte[],unsigned int * CompressedBufferSizeInkByte)907 void dml32_CalculateDETBufferSize(
908 unsigned int DETSizeOverride[],
909 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
910 bool ForceSingleDPP,
911 unsigned int NumberOfActiveSurfaces,
912 bool UnboundedRequestEnabled,
913 unsigned int nomDETInKByte,
914 unsigned int MaxTotalDETInKByte,
915 unsigned int ConfigReturnBufferSizeInKByte,
916 unsigned int MinCompressedBufferSizeInKByte,
917 unsigned int CompressedBufferSegmentSizeInkByteFinal,
918 enum source_format_class SourcePixelFormat[],
919 double ReadBandwidthLuma[],
920 double ReadBandwidthChroma[],
921 unsigned int RoundedUpMaxSwathSizeBytesY[],
922 unsigned int RoundedUpMaxSwathSizeBytesC[],
923 unsigned int DPPPerSurface[],
924 /* Output */
925 unsigned int DETBufferSizeInKByte[],
926 unsigned int *CompressedBufferSizeInkByte)
927 {
928 unsigned int DETBufferSizePoolInKByte;
929 unsigned int NextDETBufferPieceInKByte;
930 bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX];
931 bool NextPotentialSurfaceToAssignDETPieceFound;
932 unsigned int NextSurfaceToAssignDETPiece;
933 double TotalBandwidth;
934 double BandwidthOfSurfacesNotAssignedDETPiece;
935 unsigned int max_minDET;
936 unsigned int minDET;
937 unsigned int minDET_pipe;
938 unsigned int j, k;
939
940 #ifdef __DML_VBA_DEBUG__
941 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
942 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
943 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
944 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
945 dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte);
946 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
947 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte);
948 dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,
949 CompressedBufferSegmentSizeInkByteFinal);
950 #endif
951
952 // Note: Will use default det size if that fits 2 swaths
953 if (UnboundedRequestEnabled) {
954 if (DETSizeOverride[0] > 0) {
955 DETBufferSizeInKByte[0] = DETSizeOverride[0];
956 } else {
957 DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 *
958 ((double) RoundedUpMaxSwathSizeBytesY[0] +
959 (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0));
960 }
961 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
962 } else {
963 DETBufferSizePoolInKByte = MaxTotalDETInKByte;
964 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
965 DETBufferSizeInKByte[k] = nomDETInKByte;
966 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
967 SourcePixelFormat[k] == dm_420_12) {
968 max_minDET = nomDETInKByte - 64;
969 } else {
970 max_minDET = nomDETInKByte;
971 }
972 minDET = 128;
973 minDET_pipe = 0;
974
975 // add DET resource until can hold 2 full swaths
976 while (minDET <= max_minDET && minDET_pipe == 0) {
977 if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
978 (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
979 minDET_pipe = minDET;
980 minDET = minDET + 64;
981 }
982
983 #ifdef __DML_VBA_DEBUG__
984 dml_print("DML::%s: k=%0d minDET = %d\n", __func__, k, minDET);
985 dml_print("DML::%s: k=%0d max_minDET = %d\n", __func__, k, max_minDET);
986 dml_print("DML::%s: k=%0d minDET_pipe = %d\n", __func__, k, minDET_pipe);
987 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
988 RoundedUpMaxSwathSizeBytesY[k]);
989 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
990 RoundedUpMaxSwathSizeBytesC[k]);
991 #endif
992
993 if (minDET_pipe == 0) {
994 minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] +
995 (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64));
996 #ifdef __DML_VBA_DEBUG__
997 dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",
998 __func__, k, minDET_pipe);
999 #endif
1000 }
1001
1002 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1003 DETBufferSizeInKByte[k] = 0;
1004 } else if (DETSizeOverride[k] > 0) {
1005 DETBufferSizeInKByte[k] = DETSizeOverride[k];
1006 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1007 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
1008 } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
1009 DETBufferSizeInKByte[k] = minDET_pipe;
1010 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1011 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
1012 }
1013
1014 #ifdef __DML_VBA_DEBUG__
1015 dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
1016 dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]);
1017 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1018 dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte);
1019 #endif
1020 }
1021
1022 TotalBandwidth = 0;
1023 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1024 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
1025 TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
1026 }
1027 #ifdef __DML_VBA_DEBUG__
1028 dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
1029 for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
1030 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1031 dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
1032 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
1033 #endif
1034 BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
1035 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1036
1037 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1038 DETPieceAssignedToThisSurfaceAlready[k] = true;
1039 } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
1040 (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
1041 ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
1042 DETPieceAssignedToThisSurfaceAlready[k] = true;
1043 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1044 ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1045 } else {
1046 DETPieceAssignedToThisSurfaceAlready[k] = false;
1047 }
1048 #ifdef __DML_VBA_DEBUG__
1049 dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,
1050 DETPieceAssignedToThisSurfaceAlready[k]);
1051 dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,
1052 BandwidthOfSurfacesNotAssignedDETPiece);
1053 #endif
1054 }
1055
1056 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
1057 NextPotentialSurfaceToAssignDETPieceFound = false;
1058 NextSurfaceToAssignDETPiece = 0;
1059
1060 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1061 #ifdef __DML_VBA_DEBUG__
1062 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,
1063 ReadBandwidthLuma[k]);
1064 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,
1065 ReadBandwidthChroma[k]);
1066 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,
1067 ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1068 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,
1069 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1070 dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,
1071 NextSurfaceToAssignDETPiece);
1072 #endif
1073 if (!DETPieceAssignedToThisSurfaceAlready[k] &&
1074 (!NextPotentialSurfaceToAssignDETPieceFound ||
1075 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
1076 ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1077 ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
1078 NextSurfaceToAssignDETPiece = k;
1079 NextPotentialSurfaceToAssignDETPieceFound = true;
1080 }
1081 #ifdef __DML_VBA_DEBUG__
1082 dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",
1083 __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
1084 dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",
1085 __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
1086 #endif
1087 }
1088
1089 if (NextPotentialSurfaceToAssignDETPieceFound) {
1090 // Note: To show the banker's rounding behavior in VBA and also the fact
1091 // that the DET buffer size varies due to precision issue
1092 //
1093 //double tmp1 = ((double) DETBufferSizePoolInKByte *
1094 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1095 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1096 // BandwidthOfSurfacesNotAssignedDETPiece /
1097 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1098 //double tmp2 = dml_round((double) DETBufferSizePoolInKByte *
1099 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1100 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1101 //BandwidthOfSurfacesNotAssignedDETPiece /
1102 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1103 //
1104 //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
1105 //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);
1106
1107 NextDETBufferPieceInKByte = dml_min(
1108 dml_round((double) DETBufferSizePoolInKByte *
1109 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1110 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1111 BandwidthOfSurfacesNotAssignedDETPiece /
1112 ((ForceSingleDPP ? 1 :
1113 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
1114 (ForceSingleDPP ? 1 :
1115 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
1116 dml_floor((double) DETBufferSizePoolInKByte,
1117 (ForceSingleDPP ? 1 :
1118 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1119
1120 // Above calculation can assign the entire DET buffer allocation to a single pipe.
1121 // We should limit the per-pipe DET size to the nominal / max per pipe.
1122 if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1123 if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
1124 nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1125 NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
1126 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
1127 } else {
1128 // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1129 // already has the max per-pipe value
1130 NextDETBufferPieceInKByte = 0;
1131 }
1132 }
1133
1134 #ifdef __DML_VBA_DEBUG__
1135 dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,
1136 DETBufferSizePoolInKByte);
1137 dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,
1138 NextSurfaceToAssignDETPiece);
1139 dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,
1140 NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1141 dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,
1142 NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1143 dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",
1144 __func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
1145 dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,
1146 NextDETBufferPieceInKByte);
1147 dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",
1148 __func__, j, NextSurfaceToAssignDETPiece,
1149 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1150 #endif
1151
1152 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
1153 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1154 + NextDETBufferPieceInKByte
1155 / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
1156 #ifdef __DML_VBA_DEBUG__
1157 dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1158 #endif
1159
1160 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
1161 DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
1162 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1163 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1164 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1165 }
1166 }
1167 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1168 }
1169 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
1170
1171 #ifdef __DML_VBA_DEBUG__
1172 dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
1173 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
1174 for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
1175 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",
1176 __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1177 }
1178 #endif
1179 } // CalculateDETBufferSize
1180
dml32_CalculateODMMode(unsigned int MaximumPixelsPerLinePerDSCUnit,unsigned int HActive,enum output_format_class OutFormat,enum output_encoder_class Output,enum odm_combine_policy ODMUse,double StateDispclk,double MaxDispclk,bool DSCEnable,unsigned int TotalNumberOfActiveDPP,unsigned int MaxNumDPP,double PixelClock,double DISPCLKDPPCLKDSCCLKDownSpreading,double DISPCLKRampingMargin,double DISPCLKDPPCLKVCOSpeed,unsigned int NumberOfDSCSlices,bool * TotalAvailablePipesSupport,unsigned int * NumberOfDPP,enum odm_combine_mode * ODMMode,double * RequiredDISPCLKPerSurface)1181 void dml32_CalculateODMMode(
1182 unsigned int MaximumPixelsPerLinePerDSCUnit,
1183 unsigned int HActive,
1184 enum output_format_class OutFormat,
1185 enum output_encoder_class Output,
1186 enum odm_combine_policy ODMUse,
1187 double StateDispclk,
1188 double MaxDispclk,
1189 bool DSCEnable,
1190 unsigned int TotalNumberOfActiveDPP,
1191 unsigned int MaxNumDPP,
1192 double PixelClock,
1193 double DISPCLKDPPCLKDSCCLKDownSpreading,
1194 double DISPCLKRampingMargin,
1195 double DISPCLKDPPCLKVCOSpeed,
1196 unsigned int NumberOfDSCSlices,
1197
1198 /* Output */
1199 bool *TotalAvailablePipesSupport,
1200 unsigned int *NumberOfDPP,
1201 enum odm_combine_mode *ODMMode,
1202 double *RequiredDISPCLKPerSurface)
1203 {
1204
1205 double SurfaceRequiredDISPCLKWithoutODMCombine;
1206 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1207 double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1208
1209 SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled,
1210 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1211 MaxDispclk);
1212 SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1,
1213 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1214 MaxDispclk);
1215 SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1,
1216 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1217 MaxDispclk);
1218 *TotalAvailablePipesSupport = true;
1219 *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable
1220
1221 if (ODMUse == dm_odm_combine_policy_none)
1222 *ODMMode = dm_odm_combine_mode_disabled;
1223
1224 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
1225 *NumberOfDPP = 0;
1226
1227 // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
1228 // (ODMUse == "" || ODMUse == "CombineAsNeeded")
1229
1230 if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
1231 ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
1232 (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit))
1233 || NumberOfDSCSlices > 8)))) {
1234 if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
1235 *ODMMode = dm_odm_combine_mode_4to1;
1236 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1237 *NumberOfDPP = 4;
1238 } else {
1239 *TotalAvailablePipesSupport = false;
1240 }
1241 } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
1242 (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
1243 SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
1244 (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit))
1245 || (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) {
1246 if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
1247 *ODMMode = dm_odm_combine_mode_2to1;
1248 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1249 *NumberOfDPP = 2;
1250 } else {
1251 *TotalAvailablePipesSupport = false;
1252 }
1253 } else {
1254 if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
1255 *NumberOfDPP = 1;
1256 else
1257 *TotalAvailablePipesSupport = false;
1258 }
1259 if (OutFormat == dm_420 && HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH &&
1260 ODMUse != dm_odm_combine_policy_4to1) {
1261 if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 4) {
1262 *ODMMode = dm_odm_combine_mode_disabled;
1263 *NumberOfDPP = 0;
1264 *TotalAvailablePipesSupport = false;
1265 } else if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 2 ||
1266 *ODMMode == dm_odm_combine_mode_4to1) {
1267 *ODMMode = dm_odm_combine_mode_4to1;
1268 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1269 *NumberOfDPP = 4;
1270 } else {
1271 *ODMMode = dm_odm_combine_mode_2to1;
1272 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1273 *NumberOfDPP = 2;
1274 }
1275 }
1276 if (Output == dm_hdmi && OutFormat == dm_420 &&
1277 HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH) {
1278 *ODMMode = dm_odm_combine_mode_disabled;
1279 *NumberOfDPP = 0;
1280 *TotalAvailablePipesSupport = false;
1281 }
1282 }
1283
dml32_CalculateRequiredDispclk(enum odm_combine_mode ODMMode,double PixelClock,double DISPCLKDPPCLKDSCCLKDownSpreading,double DISPCLKRampingMargin,double DISPCLKDPPCLKVCOSpeed,double MaxDispclk)1284 double dml32_CalculateRequiredDispclk(
1285 enum odm_combine_mode ODMMode,
1286 double PixelClock,
1287 double DISPCLKDPPCLKDSCCLKDownSpreading,
1288 double DISPCLKRampingMargin,
1289 double DISPCLKDPPCLKVCOSpeed,
1290 double MaxDispclk)
1291 {
1292 double RequiredDispclk = 0.;
1293 double PixelClockAfterODM;
1294 double DISPCLKWithRampingRoundedToDFSGranularity;
1295 double DISPCLKWithoutRampingRoundedToDFSGranularity;
1296 double MaxDispclkRoundedDownToDFSGranularity;
1297
1298 if (ODMMode == dm_odm_combine_mode_4to1)
1299 PixelClockAfterODM = PixelClock / 4;
1300 else if (ODMMode == dm_odm_combine_mode_2to1)
1301 PixelClockAfterODM = PixelClock / 2;
1302 else
1303 PixelClockAfterODM = PixelClock;
1304
1305
1306 DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1307 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1308 * (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed);
1309
1310 DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1311 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed);
1312
1313 MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
1314
1315 if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1316 RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
1317 else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1318 RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
1319 else
1320 RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
1321
1322 return RequiredDispclk;
1323 }
1324
dml32_RoundToDFSGranularity(double Clock,bool round_up,double VCOSpeed)1325 double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed)
1326 {
1327 if (Clock <= 0.0)
1328 return 0.0;
1329
1330 if (round_up)
1331 return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
1332 else
1333 return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
1334 }
1335
dml32_CalculateOutputLink(double PHYCLKPerState,double PHYCLKD18PerState,double PHYCLKD32PerState,double Downspreading,bool IsMainSurfaceUsingTheIndicatedTiming,enum output_encoder_class Output,enum output_format_class OutputFormat,unsigned int HTotal,unsigned int HActive,double PixelClockBackEnd,double ForcedOutputLinkBPP,unsigned int DSCInputBitPerComponent,unsigned int NumberOfDSCSlices,double AudioSampleRate,unsigned int AudioSampleLayout,enum odm_combine_mode ODMModeNoDSC,enum odm_combine_mode ODMModeDSC,bool DSCEnable,unsigned int OutputLinkDPLanes,enum dm_output_link_dp_rate OutputLinkDPRate,bool * RequiresDSC,double * RequiresFEC,double * OutBpp,enum dm_output_type * OutputType,enum dm_output_rate * OutputRate,unsigned int * RequiredSlots)1336 void dml32_CalculateOutputLink(
1337 double PHYCLKPerState,
1338 double PHYCLKD18PerState,
1339 double PHYCLKD32PerState,
1340 double Downspreading,
1341 bool IsMainSurfaceUsingTheIndicatedTiming,
1342 enum output_encoder_class Output,
1343 enum output_format_class OutputFormat,
1344 unsigned int HTotal,
1345 unsigned int HActive,
1346 double PixelClockBackEnd,
1347 double ForcedOutputLinkBPP,
1348 unsigned int DSCInputBitPerComponent,
1349 unsigned int NumberOfDSCSlices,
1350 double AudioSampleRate,
1351 unsigned int AudioSampleLayout,
1352 enum odm_combine_mode ODMModeNoDSC,
1353 enum odm_combine_mode ODMModeDSC,
1354 bool DSCEnable,
1355 unsigned int OutputLinkDPLanes,
1356 enum dm_output_link_dp_rate OutputLinkDPRate,
1357
1358 /* Output */
1359 bool *RequiresDSC,
1360 double *RequiresFEC,
1361 double *OutBpp,
1362 enum dm_output_type *OutputType,
1363 enum dm_output_rate *OutputRate,
1364 unsigned int *RequiredSlots)
1365 {
1366 bool LinkDSCEnable;
1367 unsigned int dummy;
1368 *RequiresDSC = false;
1369 *RequiresFEC = false;
1370 *OutBpp = 0;
1371 *OutputType = dm_output_type_unknown;
1372 *OutputRate = dm_output_rate_unknown;
1373
1374 if (IsMainSurfaceUsingTheIndicatedTiming) {
1375 if (Output == dm_hdmi) {
1376 *RequiresDSC = false;
1377 *RequiresFEC = false;
1378 *OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive,
1379 PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat,
1380 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1381 ODMModeNoDSC, ODMModeDSC, &dummy);
1382 //OutputTypeAndRate = "HDMI";
1383 *OutputType = dm_output_type_hdmi;
1384
1385 } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
1386 if (DSCEnable == true) {
1387 *RequiresDSC = true;
1388 LinkDSCEnable = true;
1389 if (Output == dm_dp || Output == dm_dp2p0)
1390 *RequiresFEC = true;
1391 else
1392 *RequiresFEC = false;
1393 } else {
1394 *RequiresDSC = false;
1395 LinkDSCEnable = false;
1396 if (Output == dm_dp2p0)
1397 *RequiresFEC = true;
1398 else
1399 *RequiresFEC = false;
1400 }
1401 if (Output == dm_dp2p0) {
1402 *OutBpp = 0;
1403 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
1404 PHYCLKD32PerState >= 10000.0 / 32) {
1405 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1406 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1407 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1408 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1409 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1410 if (*OutBpp == 0 && PHYCLKD32PerState < 13500.0 / 32 && DSCEnable == true &&
1411 ForcedOutputLinkBPP == 0) {
1412 *RequiresDSC = true;
1413 LinkDSCEnable = true;
1414 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1415 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1416 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1417 OutputFormat, DSCInputBitPerComponent,
1418 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1419 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1420 }
1421 //OutputTypeAndRate = Output & " UHBR10";
1422 *OutputType = dm_output_type_dp2p0;
1423 *OutputRate = dm_output_rate_dp_rate_uhbr10;
1424 }
1425 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
1426 *OutBpp == 0 && PHYCLKD32PerState >= 13500.0 / 32) {
1427 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1428 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1429 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1430 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1431 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1432
1433 if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true &&
1434 ForcedOutputLinkBPP == 0) {
1435 *RequiresDSC = true;
1436 LinkDSCEnable = true;
1437 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1438 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1439 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1440 OutputFormat, DSCInputBitPerComponent,
1441 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1442 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1443 }
1444 //OutputTypeAndRate = Output & " UHBR13p5";
1445 *OutputType = dm_output_type_dp2p0;
1446 *OutputRate = dm_output_rate_dp_rate_uhbr13p5;
1447 }
1448 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
1449 *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
1450 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1451 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1452 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1453 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1454 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1455 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1456 *RequiresDSC = true;
1457 LinkDSCEnable = true;
1458 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1459 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1460 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1461 OutputFormat, DSCInputBitPerComponent,
1462 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1463 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1464 }
1465 //OutputTypeAndRate = Output & " UHBR20";
1466 *OutputType = dm_output_type_dp2p0;
1467 *OutputRate = dm_output_rate_dp_rate_uhbr20;
1468 }
1469 } else {
1470 *OutBpp = 0;
1471 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
1472 PHYCLKPerState >= 270) {
1473 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1474 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1475 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1476 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1477 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1478 if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true &&
1479 ForcedOutputLinkBPP == 0) {
1480 *RequiresDSC = true;
1481 LinkDSCEnable = true;
1482 if (Output == dm_dp)
1483 *RequiresFEC = true;
1484 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1485 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1486 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1487 OutputFormat, DSCInputBitPerComponent,
1488 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1489 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1490 }
1491 //OutputTypeAndRate = Output & " HBR";
1492 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1493 *OutputRate = dm_output_rate_dp_rate_hbr;
1494 }
1495 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
1496 *OutBpp == 0 && PHYCLKPerState >= 540) {
1497 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1498 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1499 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1500 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1501 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1502
1503 if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true &&
1504 ForcedOutputLinkBPP == 0) {
1505 *RequiresDSC = true;
1506 LinkDSCEnable = true;
1507 if (Output == dm_dp)
1508 *RequiresFEC = true;
1509
1510 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1511 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1512 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1513 OutputFormat, DSCInputBitPerComponent,
1514 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1515 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1516 }
1517 //OutputTypeAndRate = Output & " HBR2";
1518 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1519 *OutputRate = dm_output_rate_dp_rate_hbr2;
1520 }
1521 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
1522 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1523 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1524 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1525 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices,
1526 AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
1527 RequiredSlots);
1528
1529 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1530 *RequiresDSC = true;
1531 LinkDSCEnable = true;
1532 if (Output == dm_dp)
1533 *RequiresFEC = true;
1534
1535 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1536 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1537 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1538 OutputFormat, DSCInputBitPerComponent,
1539 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1540 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1541 }
1542 //OutputTypeAndRate = Output & " HBR3";
1543 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1544 *OutputRate = dm_output_rate_dp_rate_hbr3;
1545 }
1546 }
1547 }
1548 }
1549 }
1550
dml32_CalculateDPPCLK(unsigned int NumberOfActiveSurfaces,double DISPCLKDPPCLKDSCCLKDownSpreading,double DISPCLKDPPCLKVCOSpeed,double DPPCLKUsingSingleDPP[],unsigned int DPPPerSurface[],double * GlobalDPPCLK,double Dppclk[])1551 void dml32_CalculateDPPCLK(
1552 unsigned int NumberOfActiveSurfaces,
1553 double DISPCLKDPPCLKDSCCLKDownSpreading,
1554 double DISPCLKDPPCLKVCOSpeed,
1555 double DPPCLKUsingSingleDPP[],
1556 unsigned int DPPPerSurface[],
1557
1558 /* output */
1559 double *GlobalDPPCLK,
1560 double Dppclk[])
1561 {
1562 unsigned int k;
1563 *GlobalDPPCLK = 0;
1564 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1565 Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1566 *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
1567 }
1568 *GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
1569 for (k = 0; k < NumberOfActiveSurfaces; ++k)
1570 Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
1571 }
1572
dml32_TruncToValidBPP(double LinkBitRate,unsigned int Lanes,unsigned int HTotal,unsigned int HActive,double PixelClock,double DesiredBPP,bool DSCEnable,enum output_encoder_class Output,enum output_format_class Format,unsigned int DSCInputBitPerComponent,unsigned int DSCSlices,unsigned int AudioRate,unsigned int AudioLayout,enum odm_combine_mode ODMModeNoDSC,enum odm_combine_mode ODMModeDSC,unsigned int * RequiredSlots)1573 double dml32_TruncToValidBPP(
1574 double LinkBitRate,
1575 unsigned int Lanes,
1576 unsigned int HTotal,
1577 unsigned int HActive,
1578 double PixelClock,
1579 double DesiredBPP,
1580 bool DSCEnable,
1581 enum output_encoder_class Output,
1582 enum output_format_class Format,
1583 unsigned int DSCInputBitPerComponent,
1584 unsigned int DSCSlices,
1585 unsigned int AudioRate,
1586 unsigned int AudioLayout,
1587 enum odm_combine_mode ODMModeNoDSC,
1588 enum odm_combine_mode ODMModeDSC,
1589 /* Output */
1590 unsigned int *RequiredSlots)
1591 {
1592 double MaxLinkBPP;
1593 unsigned int MinDSCBPP;
1594 double MaxDSCBPP;
1595 unsigned int NonDSCBPP0;
1596 unsigned int NonDSCBPP1;
1597 unsigned int NonDSCBPP2;
1598 unsigned int NonDSCBPP3 = BPP_INVALID;
1599
1600 if (Format == dm_420) {
1601 NonDSCBPP0 = 12;
1602 NonDSCBPP1 = 15;
1603 NonDSCBPP2 = 18;
1604 MinDSCBPP = 6;
1605 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
1606 } else if (Format == dm_444) {
1607 NonDSCBPP3 = 18;
1608 NonDSCBPP0 = 24;
1609 NonDSCBPP1 = 30;
1610 NonDSCBPP2 = 36;
1611 MinDSCBPP = 8;
1612 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
1613 } else {
1614 if (Output == dm_hdmi) {
1615 NonDSCBPP0 = 24;
1616 NonDSCBPP1 = 24;
1617 NonDSCBPP2 = 24;
1618 } else {
1619 NonDSCBPP0 = 16;
1620 NonDSCBPP1 = 20;
1621 NonDSCBPP2 = 24;
1622 }
1623 if (Format == dm_n422) {
1624 MinDSCBPP = 7;
1625 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
1626 } else {
1627 MinDSCBPP = 8;
1628 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
1629 }
1630 }
1631 if (Output == dm_dp2p0) {
1632 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
1633 } else if (DSCEnable && Output == dm_dp) {
1634 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
1635 } else {
1636 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
1637 }
1638
1639 if (DSCEnable) {
1640 if (ODMModeDSC == dm_odm_combine_mode_4to1)
1641 MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1642 else if (ODMModeDSC == dm_odm_combine_mode_2to1)
1643 MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1644 else if (ODMModeDSC == dm_odm_split_mode_1to2)
1645 MaxLinkBPP = 2 * MaxLinkBPP;
1646 } else {
1647 if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
1648 MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1649 else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
1650 MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1651 else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
1652 MaxLinkBPP = 2 * MaxLinkBPP;
1653 }
1654
1655 *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
1656
1657 if (DesiredBPP == 0) {
1658 if (DSCEnable) {
1659 if (MaxLinkBPP < MinDSCBPP)
1660 return BPP_INVALID;
1661 else if (MaxLinkBPP >= MaxDSCBPP)
1662 return MaxDSCBPP;
1663 else
1664 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
1665 } else {
1666 if (MaxLinkBPP >= NonDSCBPP2)
1667 return NonDSCBPP2;
1668 else if (MaxLinkBPP >= NonDSCBPP1)
1669 return NonDSCBPP1;
1670 else if (MaxLinkBPP >= NonDSCBPP0)
1671 return 16.0;
1672 else if ((Output == dm_dp2p0 || Output == dm_dp) && NonDSCBPP3 != BPP_INVALID && MaxLinkBPP >= NonDSCBPP3)
1673 return NonDSCBPP3; // Special case to allow 6bpc RGB for DP connections.
1674 else
1675 return BPP_INVALID;
1676 }
1677 } else {
1678 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
1679 DesiredBPP <= NonDSCBPP0)) ||
1680 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
1681 return BPP_INVALID;
1682 else
1683 return DesiredBPP;
1684 }
1685 } // TruncToValidBPP
1686
dml32_RequiredDTBCLK(bool DSCEnable,double PixelClock,enum output_format_class OutputFormat,double OutputBpp,unsigned int DSCSlices,unsigned int HTotal,unsigned int HActive,unsigned int AudioRate,unsigned int AudioLayout)1687 double dml32_RequiredDTBCLK(
1688 bool DSCEnable,
1689 double PixelClock,
1690 enum output_format_class OutputFormat,
1691 double OutputBpp,
1692 unsigned int DSCSlices,
1693 unsigned int HTotal,
1694 unsigned int HActive,
1695 unsigned int AudioRate,
1696 unsigned int AudioLayout)
1697 {
1698 double PixelWordRate;
1699 double HCActive;
1700 double HCBlank;
1701 double AverageTribyteRate;
1702 double HActiveTribyteRate;
1703
1704 if (DSCEnable != true)
1705 return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
1706
1707 PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2);
1708 HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
1709 dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
1710 HCBlank = 64 + 32 *
1711 dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
1712 AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
1713 HActiveTribyteRate = PixelWordRate * HCActive / HActive;
1714 return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
1715 }
1716
dml32_DSCDelayRequirement(bool DSCEnabled,enum odm_combine_mode ODMMode,unsigned int DSCInputBitPerComponent,double OutputBpp,unsigned int HActive,unsigned int HTotal,unsigned int NumberOfDSCSlices,enum output_format_class OutputFormat,enum output_encoder_class Output,double PixelClock,double PixelClockBackEnd,double dsc_delay_factor_wa)1717 unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
1718 enum odm_combine_mode ODMMode,
1719 unsigned int DSCInputBitPerComponent,
1720 double OutputBpp,
1721 unsigned int HActive,
1722 unsigned int HTotal,
1723 unsigned int NumberOfDSCSlices,
1724 enum output_format_class OutputFormat,
1725 enum output_encoder_class Output,
1726 double PixelClock,
1727 double PixelClockBackEnd,
1728 double dsc_delay_factor_wa)
1729 {
1730 unsigned int DSCDelayRequirement_val;
1731
1732 if (DSCEnabled == true && OutputBpp != 0) {
1733 if (ODMMode == dm_odm_combine_mode_4to1) {
1734 DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1735 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4,
1736 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1737 } else if (ODMMode == dm_odm_combine_mode_2to1) {
1738 DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1739 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2,
1740 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1741 } else {
1742 DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1743 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices,
1744 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output);
1745 }
1746
1747 DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
1748 dml_ceil((double)DSCDelayRequirement_val / HActive, 1);
1749
1750 DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
1751
1752 } else {
1753 DSCDelayRequirement_val = 0;
1754 }
1755
1756 #ifdef __DML_VBA_DEBUG__
1757 dml_print("DML::%s: DSCEnabled = %d\n", __func__, DSCEnabled);
1758 dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp);
1759 dml_print("DML::%s: HActive = %d\n", __func__, HActive);
1760 dml_print("DML::%s: OutputFormat = %d\n", __func__, OutputFormat);
1761 dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent);
1762 dml_print("DML::%s: NumberOfDSCSlices = %d\n", __func__, NumberOfDSCSlices);
1763 dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
1764 #endif
1765
1766 return dml_ceil(DSCDelayRequirement_val * dsc_delay_factor_wa, 1);
1767 }
1768
dml32_CalculateSurfaceSizeInMall(unsigned int NumberOfActiveSurfaces,unsigned int MALLAllocatedForDCN,enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],bool DCCEnable[],bool ViewportStationary[],unsigned int ViewportXStartY[],unsigned int ViewportYStartY[],unsigned int ViewportXStartC[],unsigned int ViewportYStartC[],unsigned int ViewportWidthY[],unsigned int ViewportHeightY[],unsigned int BytesPerPixelY[],unsigned int ViewportWidthC[],unsigned int ViewportHeightC[],unsigned int BytesPerPixelC[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int ReadBlockWidthY[],unsigned int ReadBlockWidthC[],unsigned int ReadBlockHeightY[],unsigned int ReadBlockHeightC[],unsigned int DCCMetaPitchY[],unsigned int DCCMetaPitchC[],unsigned int SurfaceSizeInMALL[],bool * ExceededMALLSize)1769 void dml32_CalculateSurfaceSizeInMall(
1770 unsigned int NumberOfActiveSurfaces,
1771 unsigned int MALLAllocatedForDCN,
1772 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1773 enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],
1774 bool DCCEnable[],
1775 bool ViewportStationary[],
1776 unsigned int ViewportXStartY[],
1777 unsigned int ViewportYStartY[],
1778 unsigned int ViewportXStartC[],
1779 unsigned int ViewportYStartC[],
1780 unsigned int ViewportWidthY[],
1781 unsigned int ViewportHeightY[],
1782 unsigned int BytesPerPixelY[],
1783 unsigned int ViewportWidthC[],
1784 unsigned int ViewportHeightC[],
1785 unsigned int BytesPerPixelC[],
1786 unsigned int SurfaceWidthY[],
1787 unsigned int SurfaceWidthC[],
1788 unsigned int SurfaceHeightY[],
1789 unsigned int SurfaceHeightC[],
1790 unsigned int Read256BytesBlockWidthY[],
1791 unsigned int Read256BytesBlockWidthC[],
1792 unsigned int Read256BytesBlockHeightY[],
1793 unsigned int Read256BytesBlockHeightC[],
1794 unsigned int ReadBlockWidthY[],
1795 unsigned int ReadBlockWidthC[],
1796 unsigned int ReadBlockHeightY[],
1797 unsigned int ReadBlockHeightC[],
1798 unsigned int DCCMetaPitchY[],
1799 unsigned int DCCMetaPitchC[],
1800
1801 /* Output */
1802 unsigned int SurfaceSizeInMALL[],
1803 bool *ExceededMALLSize)
1804 {
1805 unsigned int k;
1806 unsigned int TotalSurfaceSizeInMALLForSS = 0;
1807 unsigned int TotalSurfaceSizeInMALLForSubVP = 0;
1808 unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024;
1809
1810 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1811 if (ViewportStationary[k]) {
1812 SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]),
1813 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
1814 ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k],
1815 ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k],
1816 ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1817 ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
1818 dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k];
1819
1820 if (ReadBlockWidthC[k] > 0) {
1821 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1822 dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]),
1823 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] +
1824 ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
1825 dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
1826 dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]),
1827 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1828 ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
1829 dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) *
1830 BytesPerPixelC[k];
1831 }
1832 if (DCCEnable[k] == true) {
1833 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1834 (dml_min(dml_ceil(DCCMetaPitchY[k], 8 * Read256BytesBlockWidthY[k]),
1835 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *
1836 Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
1837 - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
1838 * dml_min(dml_ceil(SurfaceHeightY[k], 8 *
1839 Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1840 ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 *
1841 Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 *
1842 Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256) + (64 * 1024);
1843 if (Read256BytesBlockWidthC[k] > 0) {
1844 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1845 dml_min(dml_ceil(DCCMetaPitchC[k], 8 *
1846 Read256BytesBlockWidthC[k]),
1847 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
1848 * Read256BytesBlockWidthC[k] - 1, 8 *
1849 Read256BytesBlockWidthC[k]) -
1850 dml_floor(ViewportXStartC[k], 8 *
1851 Read256BytesBlockWidthC[k])) *
1852 dml_min(dml_ceil(SurfaceHeightC[k], 8 *
1853 Read256BytesBlockHeightC[k]),
1854 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1855 8 * Read256BytesBlockHeightC[k] - 1, 8 *
1856 Read256BytesBlockHeightC[k]) -
1857 dml_floor(ViewportYStartC[k], 8 *
1858 Read256BytesBlockHeightC[k])) *
1859 BytesPerPixelC[k] / 256;
1860 }
1861 }
1862 } else {
1863 SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] +
1864 ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
1865 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] +
1866 ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) *
1867 BytesPerPixelY[k];
1868 if (ReadBlockWidthC[k] > 0) {
1869 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1870 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] +
1871 ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
1872 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] +
1873 ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) *
1874 BytesPerPixelC[k];
1875 }
1876 if (DCCEnable[k] == true) {
1877 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1878 (dml_ceil(dml_min(DCCMetaPitchY[k], ViewportWidthY[k] + 8 *
1879 Read256BytesBlockWidthY[k] - 1), 8 *
1880 Read256BytesBlockWidthY[k]) *
1881 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
1882 Read256BytesBlockHeightY[k] - 1), 8 *
1883 Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256) + (64 * 1024);
1884
1885 if (Read256BytesBlockWidthC[k] > 0) {
1886 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1887 dml_ceil(dml_min(DCCMetaPitchC[k], ViewportWidthC[k] + 8 *
1888 Read256BytesBlockWidthC[k] - 1), 8 *
1889 Read256BytesBlockWidthC[k]) *
1890 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 *
1891 Read256BytesBlockHeightC[k] - 1), 8 *
1892 Read256BytesBlockHeightC[k]) *
1893 BytesPerPixelC[k] / 256;
1894 }
1895 }
1896 }
1897 }
1898
1899 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1900 /* SS and Subvp counted separate as they are never used at the same time */
1901 if (UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe)
1902 TotalSurfaceSizeInMALLForSubVP = TotalSurfaceSizeInMALLForSubVP + SurfaceSizeInMALL[k];
1903 else if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
1904 TotalSurfaceSizeInMALLForSS = TotalSurfaceSizeInMALLForSS + SurfaceSizeInMALL[k];
1905 }
1906 *ExceededMALLSize = (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) ||
1907 (TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes);
1908 } // CalculateSurfaceSizeInMall
1909
dml32_CalculateVMRowAndSwath(unsigned int NumberOfActiveSurfaces,DmlPipe myPipe[],unsigned int SurfaceSizeInMALL[],unsigned int PTEBufferSizeInRequestsLuma,unsigned int PTEBufferSizeInRequestsChroma,unsigned int DCCMetaBufferSizeBytes,enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],unsigned int MALLAllocatedForDCN,double SwathWidthY[],double SwathWidthC[],bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,unsigned int GPUVMMaxPageTableLevels,unsigned int GPUVMMinPageSizeKBytes[],unsigned int HostVMMinPageSize,bool PTEBufferSizeNotExceeded[],bool DCCMetaBufferSizeNotExceeded[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],unsigned int dpte_row_height_luma[],unsigned int dpte_row_height_chroma[],unsigned int dpte_row_height_linear_luma[],unsigned int dpte_row_height_linear_chroma[],unsigned int meta_req_width[],unsigned int meta_req_width_chroma[],unsigned int meta_req_height[],unsigned int meta_req_height_chroma[],unsigned int meta_row_width[],unsigned int meta_row_width_chroma[],unsigned int meta_row_height[],unsigned int meta_row_height_chroma[],unsigned int vm_group_bytes[],unsigned int dpte_group_bytes[],unsigned int PixelPTEReqWidthY[],unsigned int PixelPTEReqHeightY[],unsigned int PTERequestSizeY[],unsigned int PixelPTEReqWidthC[],unsigned int PixelPTEReqHeightC[],unsigned int PTERequestSizeC[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int meta_pte_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],unsigned int meta_pte_bytes_per_frame_ub_c[],double PrefetchSourceLinesY[],double PrefetchSourceLinesC[],double VInitPreFillY[],double VInitPreFillC[],unsigned int MaxNumSwathY[],unsigned int MaxNumSwathC[],double meta_row_bw[],double dpte_row_bw[],double PixelPTEBytesPerRow[],double PDEAndMetaPTEBytesFrame[],double MetaRowByte[],bool use_one_row_for_frame[],bool use_one_row_for_frame_flip[],bool UsesMALLForStaticScreen[],bool PTE_BUFFER_MODE[],unsigned int BIGK_FRAGMENT_SIZE[])1910 void dml32_CalculateVMRowAndSwath(
1911 unsigned int NumberOfActiveSurfaces,
1912 DmlPipe myPipe[],
1913 unsigned int SurfaceSizeInMALL[],
1914 unsigned int PTEBufferSizeInRequestsLuma,
1915 unsigned int PTEBufferSizeInRequestsChroma,
1916 unsigned int DCCMetaBufferSizeBytes,
1917 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1918 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
1919 unsigned int MALLAllocatedForDCN,
1920 double SwathWidthY[],
1921 double SwathWidthC[],
1922 bool GPUVMEnable,
1923 bool HostVMEnable,
1924 unsigned int HostVMMaxNonCachedPageTableLevels,
1925 unsigned int GPUVMMaxPageTableLevels,
1926 unsigned int GPUVMMinPageSizeKBytes[],
1927 unsigned int HostVMMinPageSize,
1928
1929 /* Output */
1930 bool PTEBufferSizeNotExceeded[],
1931 bool DCCMetaBufferSizeNotExceeded[],
1932 unsigned int dpte_row_width_luma_ub[],
1933 unsigned int dpte_row_width_chroma_ub[],
1934 unsigned int dpte_row_height_luma[],
1935 unsigned int dpte_row_height_chroma[],
1936 unsigned int dpte_row_height_linear_luma[], // VBA_DELTA
1937 unsigned int dpte_row_height_linear_chroma[], // VBA_DELTA
1938 unsigned int meta_req_width[],
1939 unsigned int meta_req_width_chroma[],
1940 unsigned int meta_req_height[],
1941 unsigned int meta_req_height_chroma[],
1942 unsigned int meta_row_width[],
1943 unsigned int meta_row_width_chroma[],
1944 unsigned int meta_row_height[],
1945 unsigned int meta_row_height_chroma[],
1946 unsigned int vm_group_bytes[],
1947 unsigned int dpte_group_bytes[],
1948 unsigned int PixelPTEReqWidthY[],
1949 unsigned int PixelPTEReqHeightY[],
1950 unsigned int PTERequestSizeY[],
1951 unsigned int PixelPTEReqWidthC[],
1952 unsigned int PixelPTEReqHeightC[],
1953 unsigned int PTERequestSizeC[],
1954 unsigned int dpde0_bytes_per_frame_ub_l[],
1955 unsigned int meta_pte_bytes_per_frame_ub_l[],
1956 unsigned int dpde0_bytes_per_frame_ub_c[],
1957 unsigned int meta_pte_bytes_per_frame_ub_c[],
1958 double PrefetchSourceLinesY[],
1959 double PrefetchSourceLinesC[],
1960 double VInitPreFillY[],
1961 double VInitPreFillC[],
1962 unsigned int MaxNumSwathY[],
1963 unsigned int MaxNumSwathC[],
1964 double meta_row_bw[],
1965 double dpte_row_bw[],
1966 double PixelPTEBytesPerRow[],
1967 double PDEAndMetaPTEBytesFrame[],
1968 double MetaRowByte[],
1969 bool use_one_row_for_frame[],
1970 bool use_one_row_for_frame_flip[],
1971 bool UsesMALLForStaticScreen[],
1972 bool PTE_BUFFER_MODE[],
1973 unsigned int BIGK_FRAGMENT_SIZE[])
1974 {
1975 unsigned int k;
1976 unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
1977 unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
1978 unsigned int PDEAndMetaPTEBytesFrameY;
1979 unsigned int PDEAndMetaPTEBytesFrameC;
1980 unsigned int MetaRowByteY[DC__NUM_DPP__MAX] = {0};
1981 unsigned int MetaRowByteC[DC__NUM_DPP__MAX] = {0};
1982 unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
1983 unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
1984 unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
1985 unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
1986 unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1987 unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
1988 unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1989 unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
1990 bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
1991
1992 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1993 if (HostVMEnable == true) {
1994 vm_group_bytes[k] = 512;
1995 dpte_group_bytes[k] = 512;
1996 } else if (GPUVMEnable == true) {
1997 vm_group_bytes[k] = 2048;
1998 if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation))
1999 dpte_group_bytes[k] = 512;
2000 else
2001 dpte_group_bytes[k] = 2048;
2002 } else {
2003 vm_group_bytes[k] = 0;
2004 dpte_group_bytes[k] = 0;
2005 }
2006
2007 if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
2008 myPipe[k].SourcePixelFormat == dm_420_12 ||
2009 myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
2010 if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
2011 !IsVertical(myPipe[k].SourceRotation)) {
2012 PTEBufferSizeInRequestsForLuma[k] =
2013 (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
2014 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k];
2015 } else {
2016 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
2017 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
2018 }
2019
2020 PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
2021 myPipe[k].ViewportStationary,
2022 myPipe[k].DCCEnable,
2023 myPipe[k].DPPPerSurface,
2024 myPipe[k].BlockHeight256BytesC,
2025 myPipe[k].BlockWidth256BytesC,
2026 myPipe[k].SourcePixelFormat,
2027 myPipe[k].SurfaceTiling,
2028 myPipe[k].BytePerPixelC,
2029 myPipe[k].SourceRotation,
2030 SwathWidthC[k],
2031 myPipe[k].ViewportHeightChroma,
2032 myPipe[k].ViewportXStartC,
2033 myPipe[k].ViewportYStartC,
2034 GPUVMEnable,
2035 HostVMEnable,
2036 HostVMMaxNonCachedPageTableLevels,
2037 GPUVMMaxPageTableLevels,
2038 GPUVMMinPageSizeKBytes[k],
2039 HostVMMinPageSize,
2040 PTEBufferSizeInRequestsForChroma[k],
2041 myPipe[k].PitchC,
2042 myPipe[k].DCCMetaPitchC,
2043 myPipe[k].BlockWidthC,
2044 myPipe[k].BlockHeightC,
2045
2046 /* Output */
2047 &MetaRowByteC[k],
2048 &PixelPTEBytesPerRowC[k],
2049 &dpte_row_width_chroma_ub[k],
2050 &dpte_row_height_chroma[k],
2051 &dpte_row_height_linear_chroma[k],
2052 &PixelPTEBytesPerRowC_one_row_per_frame[k],
2053 &dpte_row_width_chroma_ub_one_row_per_frame[k],
2054 &dpte_row_height_chroma_one_row_per_frame[k],
2055 &meta_req_width_chroma[k],
2056 &meta_req_height_chroma[k],
2057 &meta_row_width_chroma[k],
2058 &meta_row_height_chroma[k],
2059 &PixelPTEReqWidthC[k],
2060 &PixelPTEReqHeightC[k],
2061 &PTERequestSizeC[k],
2062 &dpde0_bytes_per_frame_ub_c[k],
2063 &meta_pte_bytes_per_frame_ub_c[k]);
2064
2065 PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
2066 myPipe[k].VRatioChroma,
2067 myPipe[k].VTapsChroma,
2068 myPipe[k].InterlaceEnable,
2069 myPipe[k].ProgressiveToInterlaceUnitInOPP,
2070 myPipe[k].SwathHeightC,
2071 myPipe[k].SourceRotation,
2072 myPipe[k].ViewportStationary,
2073 SwathWidthC[k],
2074 myPipe[k].ViewportHeightChroma,
2075 myPipe[k].ViewportXStartC,
2076 myPipe[k].ViewportYStartC,
2077
2078 /* Output */
2079 &VInitPreFillC[k],
2080 &MaxNumSwathC[k]);
2081 } else {
2082 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
2083 PTEBufferSizeInRequestsForChroma[k] = 0;
2084 PixelPTEBytesPerRowC[k] = 0;
2085 PDEAndMetaPTEBytesFrameC = 0;
2086 MetaRowByteC[k] = 0;
2087 MaxNumSwathC[k] = 0;
2088 PrefetchSourceLinesC[k] = 0;
2089 dpte_row_height_chroma_one_row_per_frame[k] = 0;
2090 dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
2091 PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
2092 }
2093
2094 PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
2095 myPipe[k].ViewportStationary,
2096 myPipe[k].DCCEnable,
2097 myPipe[k].DPPPerSurface,
2098 myPipe[k].BlockHeight256BytesY,
2099 myPipe[k].BlockWidth256BytesY,
2100 myPipe[k].SourcePixelFormat,
2101 myPipe[k].SurfaceTiling,
2102 myPipe[k].BytePerPixelY,
2103 myPipe[k].SourceRotation,
2104 SwathWidthY[k],
2105 myPipe[k].ViewportHeight,
2106 myPipe[k].ViewportXStart,
2107 myPipe[k].ViewportYStart,
2108 GPUVMEnable,
2109 HostVMEnable,
2110 HostVMMaxNonCachedPageTableLevels,
2111 GPUVMMaxPageTableLevels,
2112 GPUVMMinPageSizeKBytes[k],
2113 HostVMMinPageSize,
2114 PTEBufferSizeInRequestsForLuma[k],
2115 myPipe[k].PitchY,
2116 myPipe[k].DCCMetaPitchY,
2117 myPipe[k].BlockWidthY,
2118 myPipe[k].BlockHeightY,
2119
2120 /* Output */
2121 &MetaRowByteY[k],
2122 &PixelPTEBytesPerRowY[k],
2123 &dpte_row_width_luma_ub[k],
2124 &dpte_row_height_luma[k],
2125 &dpte_row_height_linear_luma[k],
2126 &PixelPTEBytesPerRowY_one_row_per_frame[k],
2127 &dpte_row_width_luma_ub_one_row_per_frame[k],
2128 &dpte_row_height_luma_one_row_per_frame[k],
2129 &meta_req_width[k],
2130 &meta_req_height[k],
2131 &meta_row_width[k],
2132 &meta_row_height[k],
2133 &PixelPTEReqWidthY[k],
2134 &PixelPTEReqHeightY[k],
2135 &PTERequestSizeY[k],
2136 &dpde0_bytes_per_frame_ub_l[k],
2137 &meta_pte_bytes_per_frame_ub_l[k]);
2138
2139 PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
2140 myPipe[k].VRatio,
2141 myPipe[k].VTaps,
2142 myPipe[k].InterlaceEnable,
2143 myPipe[k].ProgressiveToInterlaceUnitInOPP,
2144 myPipe[k].SwathHeightY,
2145 myPipe[k].SourceRotation,
2146 myPipe[k].ViewportStationary,
2147 SwathWidthY[k],
2148 myPipe[k].ViewportHeight,
2149 myPipe[k].ViewportXStart,
2150 myPipe[k].ViewportYStart,
2151
2152 /* Output */
2153 &VInitPreFillY[k],
2154 &MaxNumSwathY[k]);
2155
2156 PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2157 MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k];
2158
2159 if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] &&
2160 PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) {
2161 PTEBufferSizeNotExceeded[k] = true;
2162 } else {
2163 PTEBufferSizeNotExceeded[k] = false;
2164 }
2165
2166 one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
2167 PTEBufferSizeInRequestsForLuma[k] &&
2168 PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]);
2169 }
2170
2171 dml32_CalculateMALLUseForStaticScreen(
2172 NumberOfActiveSurfaces,
2173 MALLAllocatedForDCN,
2174 UseMALLForStaticScreen, // mode
2175 SurfaceSizeInMALL,
2176 one_row_per_frame_fits_in_buffer,
2177 /* Output */
2178 UsesMALLForStaticScreen); // boolen
2179
2180 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2181 PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2182 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2183 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2184 (GPUVMMinPageSizeKBytes[k] > 64);
2185 BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12;
2186 }
2187
2188 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2189 #ifdef __DML_VBA_DEBUG__
2190 dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n", __func__, k, SurfaceSizeInMALL[k]);
2191 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]);
2192 #endif
2193 use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2194 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2195 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2196 (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation));
2197
2198 use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
2199 !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
2200
2201 if (use_one_row_for_frame[k]) {
2202 dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k];
2203 dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k];
2204 PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k];
2205 dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k];
2206 dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k];
2207 PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k];
2208 PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k];
2209 }
2210
2211 if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
2212 DCCMetaBufferSizeNotExceeded[k] = true;
2213 else
2214 DCCMetaBufferSizeNotExceeded[k] = false;
2215
2216 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k];
2217 if (use_one_row_for_frame[k])
2218 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
2219
2220 dml32_CalculateRowBandwidth(
2221 GPUVMEnable,
2222 myPipe[k].SourcePixelFormat,
2223 myPipe[k].VRatio,
2224 myPipe[k].VRatioChroma,
2225 myPipe[k].DCCEnable,
2226 myPipe[k].HTotal / myPipe[k].PixelClock,
2227 MetaRowByteY[k], MetaRowByteC[k],
2228 meta_row_height[k],
2229 meta_row_height_chroma[k],
2230 PixelPTEBytesPerRowY[k],
2231 PixelPTEBytesPerRowC[k],
2232 dpte_row_height_luma[k],
2233 dpte_row_height_chroma[k],
2234
2235 /* Output */
2236 &meta_row_bw[k],
2237 &dpte_row_bw[k]);
2238 #ifdef __DML_VBA_DEBUG__
2239 dml_print("DML::%s: k=%d, use_one_row_for_frame = %d\n", __func__, k, use_one_row_for_frame[k]);
2240 dml_print("DML::%s: k=%d, use_one_row_for_frame_flip = %d\n",
2241 __func__, k, use_one_row_for_frame_flip[k]);
2242 dml_print("DML::%s: k=%d, UseMALLForPStateChange = %d\n",
2243 __func__, k, UseMALLForPStateChange[k]);
2244 dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]);
2245 dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n",
2246 __func__, k, dpte_row_width_luma_ub[k]);
2247 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]);
2248 dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n",
2249 __func__, k, dpte_row_height_chroma[k]);
2250 dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n",
2251 __func__, k, dpte_row_width_chroma_ub[k]);
2252 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]);
2253 dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]);
2254 dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n",
2255 __func__, k, PTEBufferSizeNotExceeded[k]);
2256 dml_print("DML::%s: k=%d, PTE_BUFFER_MODE = %d\n", __func__, k, PTE_BUFFER_MODE[k]);
2257 dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]);
2258 #endif
2259 }
2260 } // CalculateVMRowAndSwath
2261
dml32_CalculateVMAndRowBytes(bool ViewportStationary,bool DCCEnable,unsigned int NumberOfDPPs,unsigned int BlockHeight256Bytes,unsigned int BlockWidth256Bytes,enum source_format_class SourcePixelFormat,unsigned int SurfaceTiling,unsigned int BytePerPixel,enum dm_rotation_angle SourceRotation,double SwathWidth,unsigned int ViewportHeight,unsigned int ViewportXStart,unsigned int ViewportYStart,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,unsigned int GPUVMMaxPageTableLevels,unsigned int GPUVMMinPageSizeKBytes,unsigned int HostVMMinPageSize,unsigned int PTEBufferSizeInRequests,unsigned int Pitch,unsigned int DCCMetaPitch,unsigned int MacroTileWidth,unsigned int MacroTileHeight,unsigned int * MetaRowByte,unsigned int * PixelPTEBytesPerRow,unsigned int * dpte_row_width_ub,unsigned int * dpte_row_height,unsigned int * dpte_row_height_linear,unsigned int * PixelPTEBytesPerRow_one_row_per_frame,unsigned int * dpte_row_width_ub_one_row_per_frame,unsigned int * dpte_row_height_one_row_per_frame,unsigned int * MetaRequestWidth,unsigned int * MetaRequestHeight,unsigned int * meta_row_width,unsigned int * meta_row_height,unsigned int * PixelPTEReqWidth,unsigned int * PixelPTEReqHeight,unsigned int * PTERequestSize,unsigned int * DPDE0BytesFrame,unsigned int * MetaPTEBytesFrame)2262 unsigned int dml32_CalculateVMAndRowBytes(
2263 bool ViewportStationary,
2264 bool DCCEnable,
2265 unsigned int NumberOfDPPs,
2266 unsigned int BlockHeight256Bytes,
2267 unsigned int BlockWidth256Bytes,
2268 enum source_format_class SourcePixelFormat,
2269 unsigned int SurfaceTiling,
2270 unsigned int BytePerPixel,
2271 enum dm_rotation_angle SourceRotation,
2272 double SwathWidth,
2273 unsigned int ViewportHeight,
2274 unsigned int ViewportXStart,
2275 unsigned int ViewportYStart,
2276 bool GPUVMEnable,
2277 bool HostVMEnable,
2278 unsigned int HostVMMaxNonCachedPageTableLevels,
2279 unsigned int GPUVMMaxPageTableLevels,
2280 unsigned int GPUVMMinPageSizeKBytes,
2281 unsigned int HostVMMinPageSize,
2282 unsigned int PTEBufferSizeInRequests,
2283 unsigned int Pitch,
2284 unsigned int DCCMetaPitch,
2285 unsigned int MacroTileWidth,
2286 unsigned int MacroTileHeight,
2287
2288 /* Output */
2289 unsigned int *MetaRowByte,
2290 unsigned int *PixelPTEBytesPerRow,
2291 unsigned int *dpte_row_width_ub,
2292 unsigned int *dpte_row_height,
2293 unsigned int *dpte_row_height_linear,
2294 unsigned int *PixelPTEBytesPerRow_one_row_per_frame,
2295 unsigned int *dpte_row_width_ub_one_row_per_frame,
2296 unsigned int *dpte_row_height_one_row_per_frame,
2297 unsigned int *MetaRequestWidth,
2298 unsigned int *MetaRequestHeight,
2299 unsigned int *meta_row_width,
2300 unsigned int *meta_row_height,
2301 unsigned int *PixelPTEReqWidth,
2302 unsigned int *PixelPTEReqHeight,
2303 unsigned int *PTERequestSize,
2304 unsigned int *DPDE0BytesFrame,
2305 unsigned int *MetaPTEBytesFrame)
2306 {
2307 unsigned int MPDEBytesFrame;
2308 unsigned int DCCMetaSurfaceBytes;
2309 unsigned int ExtraDPDEBytesFrame;
2310 unsigned int PDEAndMetaPTEBytesFrame;
2311 unsigned int HostVMDynamicLevels = 0;
2312 unsigned int MacroTileSizeBytes;
2313 unsigned int vp_height_meta_ub;
2314 unsigned int vp_height_dpte_ub;
2315 unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
2316
2317 if (GPUVMEnable == true && HostVMEnable == true) {
2318 if (HostVMMinPageSize < 2048)
2319 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
2320 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
2321 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
2322 else
2323 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
2324 }
2325
2326 *MetaRequestHeight = 8 * BlockHeight256Bytes;
2327 *MetaRequestWidth = 8 * BlockWidth256Bytes;
2328 if (SurfaceTiling == dm_sw_linear) {
2329 *meta_row_height = 32;
2330 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth)
2331 - dml_floor(ViewportXStart, *MetaRequestWidth);
2332 } else if (!IsVertical(SourceRotation)) {
2333 *meta_row_height = *MetaRequestHeight;
2334 if (ViewportStationary && NumberOfDPPs == 1) {
2335 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
2336 *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth);
2337 } else {
2338 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
2339 }
2340 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
2341 } else {
2342 *meta_row_height = *MetaRequestWidth;
2343 if (ViewportStationary && NumberOfDPPs == 1) {
2344 *meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
2345 *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight);
2346 } else {
2347 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
2348 }
2349 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
2350 }
2351
2352 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2353 vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
2354 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes);
2355 } else if (!IsVertical(SourceRotation)) {
2356 vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2357 } else {
2358 vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2359 }
2360
2361 DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;
2362
2363 if (GPUVMEnable == true) {
2364 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
2365 (8 * 4.0 * 1024), 1) + 1) * 64;
2366 MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
2367 } else {
2368 *MetaPTEBytesFrame = 0;
2369 MPDEBytesFrame = 0;
2370 }
2371
2372 if (DCCEnable != true) {
2373 *MetaPTEBytesFrame = 0;
2374 MPDEBytesFrame = 0;
2375 *MetaRowByte = 0;
2376 }
2377
2378 MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
2379
2380 if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
2381 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2382 vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight +
2383 MacroTileHeight - 1, MacroTileHeight) -
2384 dml_floor(ViewportYStart, MacroTileHeight);
2385 } else if (!IsVertical(SourceRotation)) {
2386 vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight;
2387 } else {
2388 vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight;
2389 }
2390 *DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
2391 (8 * 2097152), 1) + 1);
2392 ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
2393 } else {
2394 *DPDE0BytesFrame = 0;
2395 ExtraDPDEBytesFrame = 0;
2396 vp_height_dpte_ub = 0;
2397 }
2398
2399 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2400
2401 #ifdef __DML_VBA_DEBUG__
2402 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
2403 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
2404 dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear);
2405 dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel);
2406 dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels);
2407 dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes);
2408 dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes);
2409 dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight);
2410 dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth);
2411 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
2412 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
2413 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
2414 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
2415 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2416 dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight);
2417 dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth);
2418 dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub);
2419 #endif
2420
2421 if (HostVMEnable == true)
2422 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
2423
2424 if (SurfaceTiling == dm_sw_linear) {
2425 *PixelPTEReqHeight = 1;
2426 *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2427 PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2428 *PTERequestSize = 64;
2429 } else if (GPUVMMinPageSizeKBytes == 4) {
2430 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2431 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2432 *PTERequestSize = 128;
2433 } else {
2434 *PixelPTEReqHeight = MacroTileHeight;
2435 *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
2436 *PTERequestSize = 64;
2437 }
2438 #ifdef __DML_VBA_DEBUG__
2439 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2440 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
2441 dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight);
2442 dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth);
2443 dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear);
2444 dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize);
2445 dml_print("DML::%s: Pitch = %d\n", __func__, Pitch);
2446 #endif
2447
2448 *dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
2449 *dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
2450 (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) *
2451 (double) *PixelPTEReqWidth;
2452 *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
2453 *PTERequestSize;
2454
2455 if (SurfaceTiling == dm_sw_linear) {
2456 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2457 *PixelPTEReqWidth / Pitch), 1));
2458 #ifdef __DML_VBA_DEBUG__
2459 dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,
2460 PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
2461 dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,
2462 dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
2463 dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,
2464 dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2465 dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,
2466 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2467 *PixelPTEReqWidth / Pitch), 1));
2468 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2469 #endif
2470 *dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1),
2471 (double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
2472 *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;
2473
2474 // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
2475 *dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2476 PixelPTEReqWidth_linear / Pitch), 1);
2477 if (*dpte_row_height_linear > 128)
2478 *dpte_row_height_linear = 128;
2479
2480 } else if (!IsVertical(SourceRotation)) {
2481 *dpte_row_height = *PixelPTEReqHeight;
2482
2483 if (GPUVMMinPageSizeKBytes > 64) {
2484 *dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
2485 *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2486 } else if (ViewportStationary && (NumberOfDPPs == 1)) {
2487 *dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth +
2488 *PixelPTEReqWidth - 1, *PixelPTEReqWidth) -
2489 dml_floor(ViewportXStart, *PixelPTEReqWidth);
2490 } else {
2491 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) *
2492 *PixelPTEReqWidth;
2493 }
2494
2495 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2496 } else {
2497 *dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth);
2498
2499 if (ViewportStationary && (NumberOfDPPs == 1)) {
2500 *dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
2501 *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight);
2502 } else {
2503 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1)
2504 * *PixelPTEReqHeight;
2505 }
2506
2507 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2508 }
2509
2510 if (GPUVMEnable != true)
2511 *PixelPTEBytesPerRow = 0;
2512 if (HostVMEnable == true)
2513 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2514
2515 #ifdef __DML_VBA_DEBUG__
2516 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2517 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2518 dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear);
2519 dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub);
2520 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow);
2521 dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests);
2522 dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame);
2523 dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",
2524 __func__, *dpte_row_width_ub_one_row_per_frame);
2525 dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",
2526 __func__, *PixelPTEBytesPerRow_one_row_per_frame);
2527 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",
2528 *MetaPTEBytesFrame);
2529 #endif
2530
2531 return PDEAndMetaPTEBytesFrame;
2532 } // CalculateVMAndRowBytes
2533
dml32_CalculatePrefetchSourceLines(double VRatio,unsigned int VTaps,bool Interlace,bool ProgressiveToInterlaceUnitInOPP,unsigned int SwathHeight,enum dm_rotation_angle SourceRotation,bool ViewportStationary,double SwathWidth,unsigned int ViewportHeight,unsigned int ViewportXStart,unsigned int ViewportYStart,double * VInitPreFill,unsigned int * MaxNumSwath)2534 double dml32_CalculatePrefetchSourceLines(
2535 double VRatio,
2536 unsigned int VTaps,
2537 bool Interlace,
2538 bool ProgressiveToInterlaceUnitInOPP,
2539 unsigned int SwathHeight,
2540 enum dm_rotation_angle SourceRotation,
2541 bool ViewportStationary,
2542 double SwathWidth,
2543 unsigned int ViewportHeight,
2544 unsigned int ViewportXStart,
2545 unsigned int ViewportYStart,
2546
2547 /* Output */
2548 double *VInitPreFill,
2549 unsigned int *MaxNumSwath)
2550 {
2551
2552 unsigned int vp_start_rot;
2553 unsigned int sw0_tmp;
2554 unsigned int MaxPartialSwath;
2555 double numLines;
2556
2557 #ifdef __DML_VBA_DEBUG__
2558 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
2559 dml_print("DML::%s: VTaps = %d\n", __func__, VTaps);
2560 dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart);
2561 dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart);
2562 dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary);
2563 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
2564 #endif
2565 if (ProgressiveToInterlaceUnitInOPP)
2566 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1);
2567 else
2568 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
2569
2570 if (ViewportStationary) {
2571 if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
2572 vp_start_rot = SwathHeight -
2573 (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
2574 } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
2575 vp_start_rot = ViewportXStart;
2576 } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
2577 vp_start_rot = SwathHeight -
2578 (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
2579 } else {
2580 vp_start_rot = ViewportYStart;
2581 }
2582 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
2583 if (sw0_tmp < *VInitPreFill)
2584 *MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1;
2585 else
2586 *MaxNumSwath = 1;
2587 MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
2588 } else {
2589 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1;
2590 if (*VInitPreFill > 1)
2591 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight);
2592 else
2593 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
2594 }
2595 numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
2596
2597 #ifdef __DML_VBA_DEBUG__
2598 dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot);
2599 dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill);
2600 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
2601 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
2602 dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
2603 #endif
2604 return numLines;
2605
2606 } // CalculatePrefetchSourceLines
2607
dml32_CalculateMALLUseForStaticScreen(unsigned int NumberOfActiveSurfaces,unsigned int MALLAllocatedForDCNFinal,enum dm_use_mall_for_static_screen_mode * UseMALLForStaticScreen,unsigned int SurfaceSizeInMALL[],bool one_row_per_frame_fits_in_buffer[],bool UsesMALLForStaticScreen[])2608 void dml32_CalculateMALLUseForStaticScreen(
2609 unsigned int NumberOfActiveSurfaces,
2610 unsigned int MALLAllocatedForDCNFinal,
2611 enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
2612 unsigned int SurfaceSizeInMALL[],
2613 bool one_row_per_frame_fits_in_buffer[],
2614
2615 /* output */
2616 bool UsesMALLForStaticScreen[])
2617 {
2618 unsigned int k;
2619 unsigned int SurfaceToAddToMALL;
2620 bool CanAddAnotherSurfaceToMALL;
2621 unsigned int TotalSurfaceSizeInMALL;
2622
2623 TotalSurfaceSizeInMALL = 0;
2624 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2625 UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
2626 if (UsesMALLForStaticScreen[k])
2627 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
2628 #ifdef __DML_VBA_DEBUG__
2629 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]);
2630 dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n", __func__, k, TotalSurfaceSizeInMALL);
2631 #endif
2632 }
2633
2634 SurfaceToAddToMALL = 0;
2635 CanAddAnotherSurfaceToMALL = true;
2636 while (CanAddAnotherSurfaceToMALL) {
2637 CanAddAnotherSurfaceToMALL = false;
2638 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2639 if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
2640 !UsesMALLForStaticScreen[k] &&
2641 UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
2642 one_row_per_frame_fits_in_buffer[k] &&
2643 (!CanAddAnotherSurfaceToMALL ||
2644 SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
2645 CanAddAnotherSurfaceToMALL = true;
2646 SurfaceToAddToMALL = k;
2647 #ifdef __DML_VBA_DEBUG__
2648 dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",
2649 __func__, k, UseMALLForStaticScreen[k]);
2650 #endif
2651 }
2652 }
2653 if (CanAddAnotherSurfaceToMALL) {
2654 UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
2655 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
2656
2657 #ifdef __DML_VBA_DEBUG__
2658 dml_print("DML::%s: SurfaceToAddToMALL = %d\n", __func__, SurfaceToAddToMALL);
2659 dml_print("DML::%s: TotalSurfaceSizeInMALL = %d\n", __func__, TotalSurfaceSizeInMALL);
2660 #endif
2661
2662 }
2663 }
2664 }
2665
dml32_CalculateRowBandwidth(bool GPUVMEnable,enum source_format_class SourcePixelFormat,double VRatio,double VRatioChroma,bool DCCEnable,double LineTime,unsigned int MetaRowByteLuma,unsigned int MetaRowByteChroma,unsigned int meta_row_height_luma,unsigned int meta_row_height_chroma,unsigned int PixelPTEBytesPerRowLuma,unsigned int PixelPTEBytesPerRowChroma,unsigned int dpte_row_height_luma,unsigned int dpte_row_height_chroma,double * meta_row_bw,double * dpte_row_bw)2666 void dml32_CalculateRowBandwidth(
2667 bool GPUVMEnable,
2668 enum source_format_class SourcePixelFormat,
2669 double VRatio,
2670 double VRatioChroma,
2671 bool DCCEnable,
2672 double LineTime,
2673 unsigned int MetaRowByteLuma,
2674 unsigned int MetaRowByteChroma,
2675 unsigned int meta_row_height_luma,
2676 unsigned int meta_row_height_chroma,
2677 unsigned int PixelPTEBytesPerRowLuma,
2678 unsigned int PixelPTEBytesPerRowChroma,
2679 unsigned int dpte_row_height_luma,
2680 unsigned int dpte_row_height_chroma,
2681 /* Output */
2682 double *meta_row_bw,
2683 double *dpte_row_bw)
2684 {
2685 if (DCCEnable != true) {
2686 *meta_row_bw = 0;
2687 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2688 SourcePixelFormat == dm_rgbe_alpha) {
2689 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
2690 MetaRowByteChroma / (meta_row_height_chroma * LineTime);
2691 } else {
2692 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
2693 }
2694
2695 if (GPUVMEnable != true) {
2696 *dpte_row_bw = 0;
2697 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2698 SourcePixelFormat == dm_rgbe_alpha) {
2699 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
2700 VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
2701 } else {
2702 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
2703 }
2704 }
2705
dml32_CalculateUrgentLatency(double UrgentLatencyPixelDataOnly,double UrgentLatencyPixelMixedWithVMData,double UrgentLatencyVMDataOnly,bool DoUrgentLatencyAdjustment,double UrgentLatencyAdjustmentFabricClockComponent,double UrgentLatencyAdjustmentFabricClockReference,double FabricClock)2706 double dml32_CalculateUrgentLatency(
2707 double UrgentLatencyPixelDataOnly,
2708 double UrgentLatencyPixelMixedWithVMData,
2709 double UrgentLatencyVMDataOnly,
2710 bool DoUrgentLatencyAdjustment,
2711 double UrgentLatencyAdjustmentFabricClockComponent,
2712 double UrgentLatencyAdjustmentFabricClockReference,
2713 double FabricClock)
2714 {
2715 double ret;
2716
2717 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
2718 if (DoUrgentLatencyAdjustment == true) {
2719 ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
2720 (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
2721 }
2722 return ret;
2723 }
2724
dml32_CalculateUrgentBurstFactor(enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,unsigned int swath_width_luma_ub,unsigned int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double LineTime,double UrgentLatency,double CursorBufferSize,unsigned int CursorWidth,unsigned int CursorBPP,double VRatio,double VRatioC,double BytePerPixelInDETY,double BytePerPixelInDETC,unsigned int DETBufferSizeY,unsigned int DETBufferSizeC,double * UrgentBurstFactorCursor,double * UrgentBurstFactorLuma,double * UrgentBurstFactorChroma,bool * NotEnoughUrgentLatencyHiding)2725 void dml32_CalculateUrgentBurstFactor(
2726 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
2727 unsigned int swath_width_luma_ub,
2728 unsigned int swath_width_chroma_ub,
2729 unsigned int SwathHeightY,
2730 unsigned int SwathHeightC,
2731 double LineTime,
2732 double UrgentLatency,
2733 double CursorBufferSize,
2734 unsigned int CursorWidth,
2735 unsigned int CursorBPP,
2736 double VRatio,
2737 double VRatioC,
2738 double BytePerPixelInDETY,
2739 double BytePerPixelInDETC,
2740 unsigned int DETBufferSizeY,
2741 unsigned int DETBufferSizeC,
2742 /* Output */
2743 double *UrgentBurstFactorCursor,
2744 double *UrgentBurstFactorLuma,
2745 double *UrgentBurstFactorChroma,
2746 bool *NotEnoughUrgentLatencyHiding)
2747 {
2748 double LinesInDETLuma;
2749 double LinesInDETChroma;
2750 unsigned int LinesInCursorBuffer;
2751 double CursorBufferSizeInTime;
2752 double DETBufferSizeInTimeLuma;
2753 double DETBufferSizeInTimeChroma;
2754
2755 *NotEnoughUrgentLatencyHiding = 0;
2756
2757 if (CursorWidth > 0) {
2758 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 /
2759 (CursorWidth * CursorBPP / 8.0)), 1.0);
2760 if (VRatio > 0) {
2761 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
2762 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
2763 *NotEnoughUrgentLatencyHiding = 1;
2764 *UrgentBurstFactorCursor = 0;
2765 } else {
2766 *UrgentBurstFactorCursor = CursorBufferSizeInTime /
2767 (CursorBufferSizeInTime - UrgentLatency);
2768 }
2769 } else {
2770 *UrgentBurstFactorCursor = 1;
2771 }
2772 }
2773
2774 LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
2775 DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
2776
2777 if (VRatio > 0) {
2778 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
2779 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
2780 *NotEnoughUrgentLatencyHiding = 1;
2781 *UrgentBurstFactorLuma = 0;
2782 } else {
2783 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
2784 }
2785 } else {
2786 *UrgentBurstFactorLuma = 1;
2787 }
2788
2789 if (BytePerPixelInDETC > 0) {
2790 LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
2791 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
2792 / swath_width_chroma_ub;
2793
2794 if (VRatio > 0) {
2795 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
2796 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
2797 *NotEnoughUrgentLatencyHiding = 1;
2798 *UrgentBurstFactorChroma = 0;
2799 } else {
2800 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
2801 / (DETBufferSizeInTimeChroma - UrgentLatency);
2802 }
2803 } else {
2804 *UrgentBurstFactorChroma = 1;
2805 }
2806 }
2807 } // CalculateUrgentBurstFactor
2808
dml32_CalculateDCFCLKDeepSleep(unsigned int NumberOfActiveSurfaces,unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],double VRatio[],double VRatioChroma[],double SwathWidthY[],double SwathWidthC[],unsigned int DPPPerSurface[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int ReturnBusWidth,double * DCFClkDeepSleep)2809 void dml32_CalculateDCFCLKDeepSleep(
2810 unsigned int NumberOfActiveSurfaces,
2811 unsigned int BytePerPixelY[],
2812 unsigned int BytePerPixelC[],
2813 double VRatio[],
2814 double VRatioChroma[],
2815 double SwathWidthY[],
2816 double SwathWidthC[],
2817 unsigned int DPPPerSurface[],
2818 double HRatio[],
2819 double HRatioChroma[],
2820 double PixelClock[],
2821 double PSCL_THROUGHPUT[],
2822 double PSCL_THROUGHPUT_CHROMA[],
2823 double Dppclk[],
2824 double ReadBandwidthLuma[],
2825 double ReadBandwidthChroma[],
2826 unsigned int ReturnBusWidth,
2827
2828 /* Output */
2829 double *DCFClkDeepSleep)
2830 {
2831 unsigned int k;
2832 double DisplayPipeLineDeliveryTimeLuma;
2833 double DisplayPipeLineDeliveryTimeChroma;
2834 double DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX];
2835 double ReadBandwidth = 0.0;
2836
2837 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2838
2839 if (VRatio[k] <= 1) {
2840 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
2841 / PixelClock[k];
2842 } else {
2843 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
2844 }
2845 if (BytePerPixelC[k] == 0) {
2846 DisplayPipeLineDeliveryTimeChroma = 0;
2847 } else {
2848 if (VRatioChroma[k] <= 1) {
2849 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
2850 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
2851 } else {
2852 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
2853 / Dppclk[k];
2854 }
2855 }
2856
2857 if (BytePerPixelC[k] > 0) {
2858 DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] *
2859 BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
2860 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] /
2861 32.0 / DisplayPipeLineDeliveryTimeChroma);
2862 } else {
2863 DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] /
2864 64.0 / DisplayPipeLineDeliveryTimeLuma;
2865 }
2866 DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
2867
2868 #ifdef __DML_VBA_DEBUG__
2869 dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]);
2870 dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
2871 #endif
2872 }
2873
2874 for (k = 0; k < NumberOfActiveSurfaces; ++k)
2875 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
2876
2877 *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth);
2878
2879 #ifdef __DML_VBA_DEBUG__
2880 dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
2881 dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
2882 dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth);
2883 dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
2884 #endif
2885
2886 for (k = 0; k < NumberOfActiveSurfaces; ++k)
2887 *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
2888 #ifdef __DML_VBA_DEBUG__
2889 dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
2890 #endif
2891 } // CalculateDCFCLKDeepSleep
2892
dml32_CalculateWriteBackDelay(enum source_format_class WritebackPixelFormat,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackVTaps,unsigned int WritebackDestinationWidth,unsigned int WritebackDestinationHeight,unsigned int WritebackSourceHeight,unsigned int HTotal)2893 double dml32_CalculateWriteBackDelay(
2894 enum source_format_class WritebackPixelFormat,
2895 double WritebackHRatio,
2896 double WritebackVRatio,
2897 unsigned int WritebackVTaps,
2898 unsigned int WritebackDestinationWidth,
2899 unsigned int WritebackDestinationHeight,
2900 unsigned int WritebackSourceHeight,
2901 unsigned int HTotal)
2902 {
2903 double CalculateWriteBackDelay;
2904 double Line_length;
2905 double Output_lines_last_notclamped;
2906 double WritebackVInit;
2907
2908 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
2909 Line_length = dml_max((double) WritebackDestinationWidth,
2910 dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
2911 Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
2912 dml_ceil(((double)WritebackSourceHeight -
2913 (double) WritebackVInit) / (double)WritebackVRatio, 1.0);
2914 if (Output_lines_last_notclamped < 0) {
2915 CalculateWriteBackDelay = 0;
2916 } else {
2917 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
2918 (HTotal - WritebackDestinationWidth) + 80;
2919 }
2920 return CalculateWriteBackDelay;
2921 }
2922
dml32_UseMinimumDCFCLK(enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],bool DRRDisplay[],bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,unsigned int MaxInterDCNTileRepeaters,unsigned int MaxPrefetchMode,double DRAMClockChangeLatencyFinal,double FCLKChangeLatency,double SREnterPlusExitTime,unsigned int ReturnBusWidth,unsigned int RoundTripPingLatencyCycles,unsigned int ReorderingBytes,unsigned int PixelChunkSizeInKByte,unsigned int MetaChunkSize,bool GPUVMEnable,unsigned int GPUVMMaxPageTableLevels,bool HostVMEnable,unsigned int NumberOfActiveSurfaces,double HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels,bool DynamicMetadataVMEnabled,bool ImmediateFlipRequirement,bool ProgressiveToInterlaceUnitInOPP,double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,unsigned int VTotal[],unsigned int VActive[],unsigned int DynamicMetadataTransmittedBytes[],unsigned int DynamicMetadataLinesBeforeActiveRequired[],bool Interlace[],double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],double RequiredDISPCLK[][2],double UrgLatency[],unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],double ProjectedDCFClkDeepSleep[][2],double MaximumVStartup[][2][DC__NUM_DPP__MAX],unsigned int TotalNumberOfActiveDPP[][2],unsigned int TotalNumberOfDCCActiveDPP[][2],unsigned int dpte_group_bytes[],double PrefetchLinesY[][2][DC__NUM_DPP__MAX],double PrefetchLinesC[][2][DC__NUM_DPP__MAX],unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],unsigned int HTotal[],double PixelClock[],double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],double MetaRowBytes[][2][DC__NUM_DPP__MAX],bool DynamicMetadataEnable[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double DCFCLKPerState[],double DCFCLKState[][2])2923 void dml32_UseMinimumDCFCLK(
2924 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
2925 bool DRRDisplay[],
2926 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2927 unsigned int MaxInterDCNTileRepeaters,
2928 unsigned int MaxPrefetchMode,
2929 double DRAMClockChangeLatencyFinal,
2930 double FCLKChangeLatency,
2931 double SREnterPlusExitTime,
2932 unsigned int ReturnBusWidth,
2933 unsigned int RoundTripPingLatencyCycles,
2934 unsigned int ReorderingBytes,
2935 unsigned int PixelChunkSizeInKByte,
2936 unsigned int MetaChunkSize,
2937 bool GPUVMEnable,
2938 unsigned int GPUVMMaxPageTableLevels,
2939 bool HostVMEnable,
2940 unsigned int NumberOfActiveSurfaces,
2941 double HostVMMinPageSize,
2942 unsigned int HostVMMaxNonCachedPageTableLevels,
2943 bool DynamicMetadataVMEnabled,
2944 bool ImmediateFlipRequirement,
2945 bool ProgressiveToInterlaceUnitInOPP,
2946 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
2947 double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
2948 unsigned int VTotal[],
2949 unsigned int VActive[],
2950 unsigned int DynamicMetadataTransmittedBytes[],
2951 unsigned int DynamicMetadataLinesBeforeActiveRequired[],
2952 bool Interlace[],
2953 double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
2954 double RequiredDISPCLK[][2],
2955 double UrgLatency[],
2956 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
2957 double ProjectedDCFClkDeepSleep[][2],
2958 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
2959 unsigned int TotalNumberOfActiveDPP[][2],
2960 unsigned int TotalNumberOfDCCActiveDPP[][2],
2961 unsigned int dpte_group_bytes[],
2962 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
2963 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
2964 unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
2965 unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
2966 unsigned int BytePerPixelY[],
2967 unsigned int BytePerPixelC[],
2968 unsigned int HTotal[],
2969 double PixelClock[],
2970 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
2971 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
2972 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
2973 bool DynamicMetadataEnable[],
2974 double ReadBandwidthLuma[],
2975 double ReadBandwidthChroma[],
2976 double DCFCLKPerState[],
2977 /* Output */
2978 double DCFCLKState[][2])
2979 {
2980 unsigned int i, j, k;
2981 unsigned int dummy1;
2982 double dummy2, dummy3;
2983 double NormalEfficiency;
2984 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
2985
2986 NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
2987 for (i = 0; i < DC__VOLTAGE_STATES; ++i) {
2988 for (j = 0; j <= 1; ++j) {
2989 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
2990 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
2991 double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX];
2992 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
2993 double MinimumTWait = 0.0;
2994 double DPTEBandwidth;
2995 double DCFCLKRequiredForAverageBandwidth;
2996 unsigned int ExtraLatencyBytes;
2997 double ExtraLatencyCycles;
2998 double DCFCLKRequiredForPeakBandwidth;
2999 unsigned int NoOfDPPState[DC__NUM_DPP__MAX];
3000 double MinimumTvmPlus2Tr0;
3001
3002 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
3003 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3004 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
3005 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
3006 / (15.75 * HTotal[k] / PixelClock[k]);
3007 }
3008
3009 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
3010 NoOfDPPState[k] = NoOfDPP[i][j][k];
3011
3012 DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
3013 DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth);
3014
3015 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
3016 TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
3017 TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
3018 NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize,
3019 HostVMMaxNonCachedPageTableLevels);
3020 ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__
3021 + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
3022 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3023 double DCFCLKCyclesRequiredInPrefetch;
3024 double PrefetchTime;
3025
3026 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
3027 * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
3028 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
3029 * BytePerPixelC[k]) / NormalEfficiency
3030 / ReturnBusWidth;
3031 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
3032 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
3033 / NormalEfficiency / ReturnBusWidth
3034 * (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
3035 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
3036 / ReturnBusWidth
3037 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
3038 + PixelDCFCLKCyclesRequiredInPrefetch[k];
3039 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k])
3040 * HTotal[k] / PixelClock[k];
3041 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true &&
3042 DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
3043 UrgLatency[i] * GPUVMMaxPageTableLevels *
3044 (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
3045
3046 MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode,
3047 UseMALLForPStateChange[k],
3048 SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3049 DRRDisplay[k],
3050 DRAMClockChangeLatencyFinal,
3051 FCLKChangeLatency,
3052 UrgLatency[i],
3053 SREnterPlusExitTime);
3054
3055 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
3056 MinimumTWait - UrgLatency[i] *
3057 ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
3058 GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ?
3059 HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
3060 DynamicMetadataVMExtraLatency[k];
3061
3062 if (PrefetchTime > 0) {
3063 double ExpectedVRatioPrefetch;
3064
3065 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
3066 PixelDCFCLKCyclesRequiredInPrefetch[k] /
3067 DCFCLKCyclesRequiredInPrefetch);
3068 DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
3069 PixelDCFCLKCyclesRequiredInPrefetch[k] /
3070 PrefetchPixelLinesTime[k] *
3071 dml_max(1.0, ExpectedVRatioPrefetch) *
3072 dml_max(1.0, ExpectedVRatioPrefetch / 4);
3073 if (HostVMEnable == true || ImmediateFlipRequirement == true) {
3074 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3075 DCFCLKRequiredForPeakBandwidthPerSurface[k] +
3076 NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
3077 NormalEfficiency / ReturnBusWidth;
3078 }
3079 } else {
3080 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3081 }
3082 if (DynamicMetadataEnable[k] == true) {
3083 double TSetupPipe;
3084 double TdmbfPipe;
3085 double TdmsksPipe;
3086 double TdmecPipe;
3087 double AllowedTimeForUrgentExtraLatency;
3088
3089 dml32_CalculateVUpdateAndDynamicMetadataParameters(
3090 MaxInterDCNTileRepeaters,
3091 RequiredDPPCLKPerSurface[i][j][k],
3092 RequiredDISPCLK[i][j],
3093 ProjectedDCFClkDeepSleep[i][j],
3094 PixelClock[k],
3095 HTotal[k],
3096 VTotal[k] - VActive[k],
3097 DynamicMetadataTransmittedBytes[k],
3098 DynamicMetadataLinesBeforeActiveRequired[k],
3099 Interlace[k],
3100 ProgressiveToInterlaceUnitInOPP,
3101
3102 /* output */
3103 &TSetupPipe,
3104 &TdmbfPipe,
3105 &TdmecPipe,
3106 &TdmsksPipe,
3107 &dummy1,
3108 &dummy2,
3109 &dummy3);
3110 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
3111 PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
3112 TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
3113 if (AllowedTimeForUrgentExtraLatency > 0)
3114 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3115 dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k],
3116 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
3117 else
3118 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3119 }
3120 }
3121 DCFCLKRequiredForPeakBandwidth = 0;
3122 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
3123 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
3124 DCFCLKRequiredForPeakBandwidthPerSurface[k];
3125 }
3126 MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ?
3127 (HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) *
3128 (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
3129 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3130 double MaximumTvmPlus2Tr0PlusTsw;
3131
3132 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
3133 PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
3134 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
3135 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
3136 } else {
3137 DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth,
3138 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
3139 MinimumTvmPlus2Tr0 -
3140 PrefetchPixelLinesTime[k] / 4),
3141 (2 * ExtraLatencyCycles +
3142 PixelDCFCLKCyclesRequiredInPrefetch[k]) /
3143 (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
3144 }
3145 }
3146 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 *
3147 dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
3148 }
3149 }
3150 }
3151
dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,unsigned int TotalNumberOfActiveDPP,unsigned int PixelChunkSizeInKByte,unsigned int TotalNumberOfDCCActiveDPP,unsigned int MetaChunkSize,bool GPUVMEnable,bool HostVMEnable,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],unsigned int dpte_group_bytes[],double HostVMInefficiencyFactor,double HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels)3152 unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
3153 unsigned int TotalNumberOfActiveDPP,
3154 unsigned int PixelChunkSizeInKByte,
3155 unsigned int TotalNumberOfDCCActiveDPP,
3156 unsigned int MetaChunkSize,
3157 bool GPUVMEnable,
3158 bool HostVMEnable,
3159 unsigned int NumberOfActiveSurfaces,
3160 unsigned int NumberOfDPP[],
3161 unsigned int dpte_group_bytes[],
3162 double HostVMInefficiencyFactor,
3163 double HostVMMinPageSize,
3164 unsigned int HostVMMaxNonCachedPageTableLevels)
3165 {
3166 unsigned int k;
3167 double ret;
3168 unsigned int HostVMDynamicLevels;
3169
3170 if (GPUVMEnable == true && HostVMEnable == true) {
3171 if (HostVMMinPageSize < 2048)
3172 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
3173 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
3174 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
3175 else
3176 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
3177 } else {
3178 HostVMDynamicLevels = 0;
3179 }
3180
3181 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
3182 TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
3183
3184 if (GPUVMEnable == true) {
3185 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3186 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
3187 (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
3188 }
3189 }
3190 return ret;
3191 }
3192
dml32_CalculateVUpdateAndDynamicMetadataParameters(unsigned int MaxInterDCNTileRepeaters,double Dppclk,double Dispclk,double DCFClkDeepSleep,double PixelClock,unsigned int HTotal,unsigned int VBlank,unsigned int DynamicMetadataTransmittedBytes,unsigned int DynamicMetadataLinesBeforeActiveRequired,unsigned int InterlaceEnable,bool ProgressiveToInterlaceUnitInOPP,double * TSetup,double * Tdmbf,double * Tdmec,double * Tdmsks,unsigned int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)3193 void dml32_CalculateVUpdateAndDynamicMetadataParameters(
3194 unsigned int MaxInterDCNTileRepeaters,
3195 double Dppclk,
3196 double Dispclk,
3197 double DCFClkDeepSleep,
3198 double PixelClock,
3199 unsigned int HTotal,
3200 unsigned int VBlank,
3201 unsigned int DynamicMetadataTransmittedBytes,
3202 unsigned int DynamicMetadataLinesBeforeActiveRequired,
3203 unsigned int InterlaceEnable,
3204 bool ProgressiveToInterlaceUnitInOPP,
3205
3206 /* output */
3207 double *TSetup,
3208 double *Tdmbf,
3209 double *Tdmec,
3210 double *Tdmsks,
3211 unsigned int *VUpdateOffsetPix,
3212 double *VUpdateWidthPix,
3213 double *VReadyOffsetPix)
3214 {
3215 double TotalRepeaterDelayTime;
3216
3217 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
3218 *VUpdateWidthPix =
3219 dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0);
3220 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / Dppclk,
3221 TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0);
3222 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0);
3223 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3224 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
3225 *Tdmec = HTotal / PixelClock;
3226
3227 if (DynamicMetadataLinesBeforeActiveRequired == 0)
3228 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3229 else
3230 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3231
3232 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false)
3233 *Tdmsks = *Tdmsks / 2;
3234 #ifdef __DML_VBA_DEBUG__
3235 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3236 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3237 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3238
3239 dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",
3240 __func__, DynamicMetadataLinesBeforeActiveRequired);
3241 dml_print("DML::%s: VBlank = %d\n", __func__, VBlank);
3242 dml_print("DML::%s: HTotal = %d\n", __func__, HTotal);
3243 dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
3244 dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
3245 #endif
3246 }
3247
dml32_CalculateTWait(unsigned int PrefetchMode,enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,bool DRRDisplay,double DRAMClockChangeLatency,double FCLKChangeLatency,double UrgentLatency,double SREnterPlusExitTime)3248 double dml32_CalculateTWait(
3249 unsigned int PrefetchMode,
3250 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
3251 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3252 bool DRRDisplay,
3253 double DRAMClockChangeLatency,
3254 double FCLKChangeLatency,
3255 double UrgentLatency,
3256 double SREnterPlusExitTime)
3257 {
3258 double TWait = 0.0;
3259
3260 if (PrefetchMode == 0 &&
3261 !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
3262 !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
3263 !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
3264 !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
3265 TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3266 } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3267 TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3268 } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3269 TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
3270 } else {
3271 TWait = UrgentLatency;
3272 }
3273
3274 #ifdef __DML_VBA_DEBUG__
3275 dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode);
3276 dml_print("DML::%s: TWait = %f\n", __func__, TWait);
3277 #endif
3278 return TWait;
3279 } // CalculateTWait
3280
3281 // Function: get_return_bw_mbps
3282 // Megabyte per second
dml32_get_return_bw_mbps(const soc_bounding_box_st * soc,const int VoltageLevel,const bool HostVMEnable,const double DCFCLK,const double FabricClock,const double DRAMSpeed)3283 double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
3284 const int VoltageLevel,
3285 const bool HostVMEnable,
3286 const double DCFCLK,
3287 const double FabricClock,
3288 const double DRAMSpeed)
3289 {
3290 double ReturnBW = 0.;
3291 double IdealSDPPortBandwidth = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
3292 double IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
3293 double IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
3294 double PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3295 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3296 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3297 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3298 double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3299 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3300 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3301 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3302
3303 if (HostVMEnable != true)
3304 ReturnBW = PixelDataOnlyReturnBW;
3305 else
3306 ReturnBW = PixelMixedWithVMDataReturnBW;
3307
3308 #ifdef __DML_VBA_DEBUG__
3309 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3310 dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable);
3311 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
3312 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
3313 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
3314 dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth);
3315 dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth);
3316 dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth);
3317 dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW);
3318 dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
3319 dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW);
3320 #endif
3321 return ReturnBW;
3322 }
3323
3324 // Function: get_return_bw_mbps_vm_only
3325 // Megabyte per second
dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st * soc,const int VoltageLevel,const double DCFCLK,const double FabricClock,const double DRAMSpeed)3326 double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
3327 const int VoltageLevel,
3328 const double DCFCLK,
3329 const double FabricClock,
3330 const double DRAMSpeed)
3331 {
3332 double VMDataOnlyReturnBW = dml_min3(
3333 soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3334 FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
3335 * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3336 DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
3337 * (VoltageLevel < 2 ?
3338 soc->pct_ideal_dram_bw_after_urgent_strobe :
3339 soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
3340 #ifdef __DML_VBA_DEBUG__
3341 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3342 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
3343 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
3344 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
3345 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
3346 #endif
3347 return VMDataOnlyReturnBW;
3348 }
3349
dml32_CalculateExtraLatency(unsigned int RoundTripPingLatencyCycles,unsigned int ReorderingBytes,double DCFCLK,unsigned int TotalNumberOfActiveDPP,unsigned int PixelChunkSizeInKByte,unsigned int TotalNumberOfDCCActiveDPP,unsigned int MetaChunkSize,double ReturnBW,bool GPUVMEnable,bool HostVMEnable,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],unsigned int dpte_group_bytes[],double HostVMInefficiencyFactor,double HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels)3350 double dml32_CalculateExtraLatency(
3351 unsigned int RoundTripPingLatencyCycles,
3352 unsigned int ReorderingBytes,
3353 double DCFCLK,
3354 unsigned int TotalNumberOfActiveDPP,
3355 unsigned int PixelChunkSizeInKByte,
3356 unsigned int TotalNumberOfDCCActiveDPP,
3357 unsigned int MetaChunkSize,
3358 double ReturnBW,
3359 bool GPUVMEnable,
3360 bool HostVMEnable,
3361 unsigned int NumberOfActiveSurfaces,
3362 unsigned int NumberOfDPP[],
3363 unsigned int dpte_group_bytes[],
3364 double HostVMInefficiencyFactor,
3365 double HostVMMinPageSize,
3366 unsigned int HostVMMaxNonCachedPageTableLevels)
3367 {
3368 double ExtraLatencyBytes;
3369 double ExtraLatency;
3370
3371 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
3372 ReorderingBytes,
3373 TotalNumberOfActiveDPP,
3374 PixelChunkSizeInKByte,
3375 TotalNumberOfDCCActiveDPP,
3376 MetaChunkSize,
3377 GPUVMEnable,
3378 HostVMEnable,
3379 NumberOfActiveSurfaces,
3380 NumberOfDPP,
3381 dpte_group_bytes,
3382 HostVMInefficiencyFactor,
3383 HostVMMinPageSize,
3384 HostVMMaxNonCachedPageTableLevels);
3385
3386 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
3387
3388 #ifdef __DML_VBA_DEBUG__
3389 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
3390 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
3391 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
3392 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
3393 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
3394 #endif
3395
3396 return ExtraLatency;
3397 } // CalculateExtraLatency
3398
dml32_CalculatePrefetchSchedule(struct vba_vars_st * v,unsigned int k,double HostVMInefficiencyFactor,DmlPipe * myPipe,unsigned int DSCDelay,unsigned int DPP_RECOUT_WIDTH,unsigned int VStartup,unsigned int MaxVStartup,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,unsigned int VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,unsigned int SwathWidthC,unsigned int VInitPreFillC,unsigned int MaxNumSwathC,unsigned int swath_width_luma_ub,unsigned int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,double TPreReq,bool ExtendPrefetchIfPossible,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,bool * NotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,double * Tdmdl_vm,double * Tdmdl,double * TSetup,unsigned int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)3399 bool dml32_CalculatePrefetchSchedule(
3400 struct vba_vars_st *v,
3401 unsigned int k,
3402 double HostVMInefficiencyFactor,
3403 DmlPipe *myPipe,
3404 unsigned int DSCDelay,
3405 unsigned int DPP_RECOUT_WIDTH,
3406 unsigned int VStartup,
3407 unsigned int MaxVStartup,
3408 double UrgentLatency,
3409 double UrgentExtraLatency,
3410 double TCalc,
3411 unsigned int PDEAndMetaPTEBytesFrame,
3412 unsigned int MetaRowByte,
3413 unsigned int PixelPTEBytesPerRow,
3414 double PrefetchSourceLinesY,
3415 unsigned int SwathWidthY,
3416 unsigned int VInitPreFillY,
3417 unsigned int MaxNumSwathY,
3418 double PrefetchSourceLinesC,
3419 unsigned int SwathWidthC,
3420 unsigned int VInitPreFillC,
3421 unsigned int MaxNumSwathC,
3422 unsigned int swath_width_luma_ub,
3423 unsigned int swath_width_chroma_ub,
3424 unsigned int SwathHeightY,
3425 unsigned int SwathHeightC,
3426 double TWait,
3427 double TPreReq,
3428 bool ExtendPrefetchIfPossible,
3429 /* Output */
3430 double *DSTXAfterScaler,
3431 double *DSTYAfterScaler,
3432 double *DestinationLinesForPrefetch,
3433 double *PrefetchBandwidth,
3434 double *DestinationLinesToRequestVMInVBlank,
3435 double *DestinationLinesToRequestRowInVBlank,
3436 double *VRatioPrefetchY,
3437 double *VRatioPrefetchC,
3438 double *RequiredPrefetchPixDataBWLuma,
3439 double *RequiredPrefetchPixDataBWChroma,
3440 bool *NotEnoughTimeForDynamicMetadata,
3441 double *Tno_bw,
3442 double *prefetch_vmrow_bw,
3443 double *Tdmdl_vm,
3444 double *Tdmdl,
3445 double *TSetup,
3446 unsigned int *VUpdateOffsetPix,
3447 double *VUpdateWidthPix,
3448 double *VReadyOffsetPix)
3449 {
3450 double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater;
3451 bool MyError = false;
3452 unsigned int DPPCycles, DISPCLKCycles;
3453 double DSTTotalPixelsAfterScaler;
3454 double LineTime;
3455 double dst_y_prefetch_equ;
3456 double prefetch_bw_oto;
3457 double Tvm_oto;
3458 double Tr0_oto;
3459 double Tvm_oto_lines;
3460 double Tr0_oto_lines;
3461 double dst_y_prefetch_oto;
3462 double TimeForFetchingMetaPTE = 0;
3463 double TimeForFetchingRowInVBlank = 0;
3464 double LinesToRequestPrefetchPixelData = 0;
3465 double LinesForPrefetchBandwidth = 0;
3466 unsigned int HostVMDynamicLevelsTrips;
3467 double trip_to_mem;
3468 double Tvm_trips;
3469 double Tr0_trips;
3470 double Tvm_trips_rounded;
3471 double Tr0_trips_rounded;
3472 double Lsw_oto;
3473 double Tpre_rounded;
3474 double prefetch_bw_equ;
3475 double Tvm_equ;
3476 double Tr0_equ;
3477 double Tdmbf;
3478 double Tdmec;
3479 double Tdmsks;
3480 double prefetch_sw_bytes;
3481 double bytes_pp;
3482 double dep_bytes;
3483 unsigned int max_vratio_pre = v->MaxVRatioPre;
3484 double min_Lsw;
3485 double Tsw_est1 = 0;
3486 double Tsw_est3 = 0;
3487
3488 if (v->GPUVMEnable == true && v->HostVMEnable == true)
3489 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3490 else
3491 HostVMDynamicLevelsTrips = 0;
3492 #ifdef __DML_VBA_DEBUG__
3493 dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable);
3494 dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels);
3495 dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
3496 dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
3497 __func__, v->HostVMEnable, HostVMInefficiencyFactor);
3498 #endif
3499 dml32_CalculateVUpdateAndDynamicMetadataParameters(
3500 v->MaxInterDCNTileRepeaters,
3501 myPipe->Dppclk,
3502 myPipe->Dispclk,
3503 myPipe->DCFClkDeepSleep,
3504 myPipe->PixelClock,
3505 myPipe->HTotal,
3506 myPipe->VBlank,
3507 v->DynamicMetadataTransmittedBytes[k],
3508 v->DynamicMetadataLinesBeforeActiveRequired[k],
3509 myPipe->InterlaceEnable,
3510 myPipe->ProgressiveToInterlaceUnitInOPP,
3511 TSetup,
3512
3513 /* output */
3514 &Tdmbf,
3515 &Tdmec,
3516 &Tdmsks,
3517 VUpdateOffsetPix,
3518 VUpdateWidthPix,
3519 VReadyOffsetPix);
3520
3521 LineTime = myPipe->HTotal / myPipe->PixelClock;
3522 trip_to_mem = UrgentLatency;
3523 Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
3524
3525 if (v->DynamicMetadataVMEnabled == true)
3526 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
3527 else
3528 *Tdmdl = TWait + UrgentExtraLatency;
3529
3530 #ifdef __DML_VBA_ALLOW_DELTA__
3531 if (v->DynamicMetadataEnable[k] == false)
3532 *Tdmdl = 0.0;
3533 #endif
3534
3535 if (v->DynamicMetadataEnable[k] == true) {
3536 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
3537 *NotEnoughTimeForDynamicMetadata = true;
3538 #ifdef __DML_VBA_DEBUG__
3539 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
3540 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
3541 __func__, Tdmbf);
3542 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3543 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
3544 __func__, Tdmsks);
3545 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
3546 __func__, *Tdmdl);
3547 #endif
3548 } else {
3549 *NotEnoughTimeForDynamicMetadata = false;
3550 }
3551 } else {
3552 *NotEnoughTimeForDynamicMetadata = false;
3553 }
3554
3555 *Tdmdl_vm = (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true &&
3556 v->GPUVMEnable == true ? TWait + Tvm_trips : 0);
3557
3558 if (myPipe->ScalerEnabled)
3559 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL;
3560 else
3561 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly;
3562
3563 DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor;
3564
3565 DISPCLKCycles = v->DISPCLKDelaySubtotal;
3566
3567 if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
3568 return true;
3569
3570 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
3571 myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
3572
3573 *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
3574 + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
3575 + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
3576 myPipe->HActive / 2 : 0)
3577 + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
3578
3579 #ifdef __DML_VBA_DEBUG__
3580 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
3581 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
3582 dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
3583 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
3584 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk);
3585 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
3586 dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode);
3587 dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH);
3588 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
3589 #endif
3590
3591 if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
3592 *DSTYAfterScaler = 1;
3593 else
3594 *DSTYAfterScaler = 0;
3595
3596 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
3597 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
3598 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
3599 #ifdef __DML_VBA_DEBUG__
3600 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
3601 dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
3602 #endif
3603
3604 MyError = false;
3605
3606 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
3607
3608 if (v->GPUVMEnable == true) {
3609 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
3610 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3611 if (v->GPUVMMaxPageTableLevels >= 3) {
3612 *Tno_bw = UrgentExtraLatency + trip_to_mem *
3613 (double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
3614 } else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true) {
3615 Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
3616 4.0 * LineTime; // VBA_ERROR
3617 *Tno_bw = UrgentExtraLatency;
3618 } else {
3619 *Tno_bw = 0;
3620 }
3621 } else if (myPipe->DCCEnable == true) {
3622 Tvm_trips_rounded = LineTime / 4.0;
3623 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3624 *Tno_bw = 0;
3625 } else {
3626 Tvm_trips_rounded = LineTime / 4.0;
3627 Tr0_trips_rounded = LineTime / 2.0;
3628 *Tno_bw = 0;
3629 }
3630 Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
3631 Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);
3632
3633 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
3634 || myPipe->SourcePixelFormat == dm_420_12) {
3635 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
3636 } else {
3637 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
3638 }
3639
3640 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
3641 + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
3642 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
3643 prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
3644
3645 min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
3646 min_Lsw = dml_max(min_Lsw, 1.0);
3647 Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;
3648
3649 if (v->GPUVMEnable == true) {
3650 Tvm_oto = dml_max3(
3651 Tvm_trips,
3652 *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
3653 LineTime / 4.0);
3654 } else
3655 Tvm_oto = LineTime / 4.0;
3656
3657 if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
3658 Tr0_oto = dml_max4(
3659 Tr0_trips,
3660 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
3661 (LineTime - Tvm_oto)/2.0,
3662 LineTime / 4.0);
3663 #ifdef __DML_VBA_DEBUG__
3664 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
3665 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
3666 dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
3667 dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
3668 dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
3669 #endif
3670 } else
3671 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
3672
3673 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
3674 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
3675 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
3676
3677 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
3678 (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
3679
3680 dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, __DML_VBA_MAX_DST_Y_PRE__);
3681 #ifdef __DML_VBA_DEBUG__
3682 dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
3683 dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
3684 dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
3685 dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
3686 dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
3687 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3688 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3689 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3690 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
3691 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3692 dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
3693 dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
3694 dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
3695 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3696 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
3697 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
3698 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3699 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
3700 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
3701 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
3702 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
3703 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
3704 dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
3705 dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
3706 dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
3707 dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
3708 dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
3709 #endif
3710
3711 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
3712 Tpre_rounded = dst_y_prefetch_equ * LineTime;
3713 #ifdef __DML_VBA_DEBUG__
3714 dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
3715 dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
3716 dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
3717 dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
3718 __func__, VStartup * LineTime);
3719 dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
3720 dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
3721 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
3722 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3723 dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
3724 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
3725 dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
3726 __func__, *DSTYAfterScaler);
3727 #endif
3728 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
3729 MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
3730
3731 if (prefetch_sw_bytes < dep_bytes)
3732 prefetch_sw_bytes = 2 * dep_bytes;
3733
3734 *PrefetchBandwidth = 0;
3735 *DestinationLinesToRequestVMInVBlank = 0;
3736 *DestinationLinesToRequestRowInVBlank = 0;
3737 *VRatioPrefetchY = 0;
3738 *VRatioPrefetchC = 0;
3739 *RequiredPrefetchPixDataBWLuma = 0;
3740 if (dst_y_prefetch_equ > 1 &&
3741 (Tpre_rounded >= TPreReq || dst_y_prefetch_equ == __DML_VBA_MAX_DST_Y_PRE__)) {
3742 double PrefetchBandwidth1;
3743 double PrefetchBandwidth2;
3744 double PrefetchBandwidth3;
3745 double PrefetchBandwidth4;
3746
3747 if (Tpre_rounded - *Tno_bw > 0) {
3748 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3749 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3750 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
3751 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
3752 } else
3753 PrefetchBandwidth1 = 0;
3754
3755 if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
3756 && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
3757 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3758 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3759 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
3760 }
3761
3762 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
3763 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
3764 (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
3765 else
3766 PrefetchBandwidth2 = 0;
3767
3768 if (Tpre_rounded - Tvm_trips_rounded > 0) {
3769 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3770 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
3771 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
3772 } else
3773 PrefetchBandwidth3 = 0;
3774
3775
3776 if (VStartup == MaxVStartup &&
3777 (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
3778 LineTime - Tvm_trips_rounded > 0) {
3779 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3780 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
3781 }
3782
3783 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
3784 PrefetchBandwidth4 = prefetch_sw_bytes /
3785 (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
3786 } else {
3787 PrefetchBandwidth4 = 0;
3788 }
3789
3790 #ifdef __DML_VBA_DEBUG__
3791 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
3792 dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
3793 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
3794 dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
3795 dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
3796 dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
3797 dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
3798 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
3799 dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4);
3800 #endif
3801 {
3802 bool Case1OK;
3803 bool Case2OK;
3804 bool Case3OK;
3805
3806 if (PrefetchBandwidth1 > 0) {
3807 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
3808 >= Tvm_trips_rounded
3809 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3810 / PrefetchBandwidth1 >= Tr0_trips_rounded) {
3811 Case1OK = true;
3812 } else {
3813 Case1OK = false;
3814 }
3815 } else {
3816 Case1OK = false;
3817 }
3818
3819 if (PrefetchBandwidth2 > 0) {
3820 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
3821 >= Tvm_trips_rounded
3822 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3823 / PrefetchBandwidth2 < Tr0_trips_rounded) {
3824 Case2OK = true;
3825 } else {
3826 Case2OK = false;
3827 }
3828 } else {
3829 Case2OK = false;
3830 }
3831
3832 if (PrefetchBandwidth3 > 0) {
3833 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
3834 Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
3835 HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
3836 Tr0_trips_rounded) {
3837 Case3OK = true;
3838 } else {
3839 Case3OK = false;
3840 }
3841 } else {
3842 Case3OK = false;
3843 }
3844
3845 if (Case1OK)
3846 prefetch_bw_equ = PrefetchBandwidth1;
3847 else if (Case2OK)
3848 prefetch_bw_equ = PrefetchBandwidth2;
3849 else if (Case3OK)
3850 prefetch_bw_equ = PrefetchBandwidth3;
3851 else
3852 prefetch_bw_equ = PrefetchBandwidth4;
3853
3854 #ifdef __DML_VBA_DEBUG__
3855 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
3856 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
3857 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
3858 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
3859 #endif
3860
3861 if (prefetch_bw_equ > 0) {
3862 if (v->GPUVMEnable == true) {
3863 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
3864 HostVMInefficiencyFactor / prefetch_bw_equ,
3865 Tvm_trips, LineTime / 4);
3866 } else {
3867 Tvm_equ = LineTime / 4;
3868 }
3869
3870 if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
3871 Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
3872 HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
3873 (LineTime - Tvm_equ) / 2, LineTime / 4);
3874 } else {
3875 Tr0_equ = (LineTime - Tvm_equ) / 2;
3876 }
3877 } else {
3878 Tvm_equ = 0;
3879 Tr0_equ = 0;
3880 #ifdef __DML_VBA_DEBUG__
3881 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
3882 #endif
3883 }
3884 }
3885
3886 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
3887 if (dst_y_prefetch_oto * LineTime < TPreReq) {
3888 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
3889 } else {
3890 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
3891 }
3892 TimeForFetchingMetaPTE = Tvm_oto;
3893 TimeForFetchingRowInVBlank = Tr0_oto;
3894 *PrefetchBandwidth = prefetch_bw_oto;
3895 /* Clamp to oto for bandwidth calculation */
3896 LinesForPrefetchBandwidth = dst_y_prefetch_oto;
3897 } else {
3898 /* For mode programming we want to extend the prefetch as much as possible
3899 * (up to oto, or as long as we can for equ) if we're not already applying
3900 * the 60us prefetch requirement. This is to avoid intermittent underflow
3901 * issues during prefetch.
3902 *
3903 * The prefetch extension is applied under the following scenarios:
3904 * 1. We're in prefetch mode > 0 (i.e. we don't support MCLK switch in blank)
3905 * 2. We're using subvp or drr methods of p-state switch, in which case we
3906 * we don't care if prefetch takes up more of the blanking time
3907 *
3908 * Mode programming typically chooses the smallest prefetch time possible
3909 * (i.e. highest bandwidth during prefetch) presumably to create margin between
3910 * p-states / c-states that happen in vblank and prefetch. Therefore we only
3911 * apply this prefetch extension when p-state in vblank is not required (UCLK
3912 * p-states take up the most vblank time).
3913 */
3914 if (ExtendPrefetchIfPossible && TPreReq == 0 && VStartup < MaxVStartup) {
3915 MyError = true;
3916 } else {
3917 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
3918 TimeForFetchingMetaPTE = Tvm_equ;
3919 TimeForFetchingRowInVBlank = Tr0_equ;
3920 *PrefetchBandwidth = prefetch_bw_equ;
3921 /* Clamp to equ for bandwidth calculation */
3922 LinesForPrefetchBandwidth = dst_y_prefetch_equ;
3923 }
3924 }
3925
3926 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
3927
3928 *DestinationLinesToRequestRowInVBlank =
3929 dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
3930
3931 LinesToRequestPrefetchPixelData = LinesForPrefetchBandwidth -
3932 *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
3933
3934 #ifdef __DML_VBA_DEBUG__
3935 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
3936 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3937 __func__, *DestinationLinesToRequestVMInVBlank);
3938 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
3939 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3940 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
3941 __func__, *DestinationLinesToRequestRowInVBlank);
3942 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3943 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
3944 #endif
3945
3946 if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
3947 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
3948 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3949 #ifdef __DML_VBA_DEBUG__
3950 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3951 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
3952 dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
3953 #endif
3954 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
3955 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
3956 *VRatioPrefetchY =
3957 dml_max((double) PrefetchSourceLinesY /
3958 LinesToRequestPrefetchPixelData,
3959 (double) MaxNumSwathY * SwathHeightY /
3960 (LinesToRequestPrefetchPixelData -
3961 (VInitPreFillY - 3.0) / 2.0));
3962 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3963 } else {
3964 MyError = true;
3965 *VRatioPrefetchY = 0;
3966 }
3967 #ifdef __DML_VBA_DEBUG__
3968 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3969 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3970 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
3971 #endif
3972 }
3973
3974 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
3975 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3976
3977 #ifdef __DML_VBA_DEBUG__
3978 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3979 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
3980 dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
3981 #endif
3982 if ((SwathHeightC > 4)) {
3983 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
3984 *VRatioPrefetchC =
3985 dml_max(*VRatioPrefetchC,
3986 (double) MaxNumSwathC * SwathHeightC /
3987 (LinesToRequestPrefetchPixelData -
3988 (VInitPreFillC - 3.0) / 2.0));
3989 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3990 } else {
3991 MyError = true;
3992 *VRatioPrefetchC = 0;
3993 }
3994 #ifdef __DML_VBA_DEBUG__
3995 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3996 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3997 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
3998 #endif
3999 }
4000
4001 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
4002 / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
4003 / LineTime;
4004
4005 #ifdef __DML_VBA_DEBUG__
4006 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
4007 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
4008 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
4009 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
4010 __func__, *RequiredPrefetchPixDataBWLuma);
4011 #endif
4012 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
4013 LinesToRequestPrefetchPixelData
4014 * myPipe->BytePerPixelC
4015 * swath_width_chroma_ub / LineTime;
4016 } else {
4017 MyError = true;
4018 #ifdef __DML_VBA_DEBUG__
4019 dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
4020 __func__, LinesToRequestPrefetchPixelData);
4021 #endif
4022 *VRatioPrefetchY = 0;
4023 *VRatioPrefetchC = 0;
4024 *RequiredPrefetchPixDataBWLuma = 0;
4025 *RequiredPrefetchPixDataBWChroma = 0;
4026 }
4027 #ifdef __DML_VBA_DEBUG__
4028 dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
4029 (double)LinesToRequestPrefetchPixelData * LineTime +
4030 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
4031 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
4032 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
4033 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
4034 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
4035 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
4036 TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
4037 ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
4038 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
4039 PixelPTEBytesPerRow);
4040 #endif
4041 } else {
4042 MyError = true;
4043 #ifdef __DML_VBA_DEBUG__
4044 dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
4045 __func__, dst_y_prefetch_equ);
4046 #endif
4047 }
4048
4049 {
4050 double prefetch_vm_bw;
4051 double prefetch_row_bw;
4052
4053 if (PDEAndMetaPTEBytesFrame == 0) {
4054 prefetch_vm_bw = 0;
4055 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
4056 #ifdef __DML_VBA_DEBUG__
4057 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
4058 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
4059 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
4060 __func__, *DestinationLinesToRequestVMInVBlank);
4061 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
4062 #endif
4063 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
4064 (*DestinationLinesToRequestVMInVBlank * LineTime);
4065 #ifdef __DML_VBA_DEBUG__
4066 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
4067 #endif
4068 } else {
4069 prefetch_vm_bw = 0;
4070 MyError = true;
4071 #ifdef __DML_VBA_DEBUG__
4072 dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",
4073 __func__, *DestinationLinesToRequestVMInVBlank);
4074 #endif
4075 }
4076
4077 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
4078 prefetch_row_bw = 0;
4079 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
4080 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
4081 (*DestinationLinesToRequestRowInVBlank * LineTime);
4082
4083 #ifdef __DML_VBA_DEBUG__
4084 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
4085 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
4086 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
4087 __func__, *DestinationLinesToRequestRowInVBlank);
4088 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
4089 #endif
4090 } else {
4091 prefetch_row_bw = 0;
4092 MyError = true;
4093 #ifdef __DML_VBA_DEBUG__
4094 dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",
4095 __func__, *DestinationLinesToRequestRowInVBlank);
4096 #endif
4097 }
4098
4099 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
4100 }
4101
4102 if (MyError) {
4103 *PrefetchBandwidth = 0;
4104 *DestinationLinesToRequestVMInVBlank = 0;
4105 *DestinationLinesToRequestRowInVBlank = 0;
4106 *DestinationLinesForPrefetch = 0;
4107 *VRatioPrefetchY = 0;
4108 *VRatioPrefetchC = 0;
4109 *RequiredPrefetchPixDataBWLuma = 0;
4110 *RequiredPrefetchPixDataBWChroma = 0;
4111 }
4112
4113 return MyError;
4114 } // CalculatePrefetchSchedule
4115
dml32_CalculateFlipSchedule(double HostVMInefficiencyFactor,double UrgentExtraLatency,double UrgentLatency,unsigned int GPUVMMaxPageTableLevels,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,bool GPUVMEnable,double HostVMMinPageSize,double PDEAndMetaPTEBytesPerFrame,double MetaRowBytes,double DPTEBytesPerRow,double BandwidthAvailableForImmediateFlip,unsigned int TotImmediateFlipBytes,enum source_format_class SourcePixelFormat,double LineTime,double VRatio,double VRatioChroma,double Tno_bw,bool DCCEnable,unsigned int dpte_row_height,unsigned int meta_row_height,unsigned int dpte_row_height_chroma,unsigned int meta_row_height_chroma,bool use_one_row_for_frame_flip,double * DestinationLinesToRequestVMInImmediateFlip,double * DestinationLinesToRequestRowInImmediateFlip,double * final_flip_bw,bool * ImmediateFlipSupportedForPipe)4116 void dml32_CalculateFlipSchedule(
4117 double HostVMInefficiencyFactor,
4118 double UrgentExtraLatency,
4119 double UrgentLatency,
4120 unsigned int GPUVMMaxPageTableLevels,
4121 bool HostVMEnable,
4122 unsigned int HostVMMaxNonCachedPageTableLevels,
4123 bool GPUVMEnable,
4124 double HostVMMinPageSize,
4125 double PDEAndMetaPTEBytesPerFrame,
4126 double MetaRowBytes,
4127 double DPTEBytesPerRow,
4128 double BandwidthAvailableForImmediateFlip,
4129 unsigned int TotImmediateFlipBytes,
4130 enum source_format_class SourcePixelFormat,
4131 double LineTime,
4132 double VRatio,
4133 double VRatioChroma,
4134 double Tno_bw,
4135 bool DCCEnable,
4136 unsigned int dpte_row_height,
4137 unsigned int meta_row_height,
4138 unsigned int dpte_row_height_chroma,
4139 unsigned int meta_row_height_chroma,
4140 bool use_one_row_for_frame_flip,
4141
4142 /* Output */
4143 double *DestinationLinesToRequestVMInImmediateFlip,
4144 double *DestinationLinesToRequestRowInImmediateFlip,
4145 double *final_flip_bw,
4146 bool *ImmediateFlipSupportedForPipe)
4147 {
4148 double min_row_time = 0.0;
4149 unsigned int HostVMDynamicLevelsTrips;
4150 double TimeForFetchingMetaPTEImmediateFlip;
4151 double TimeForFetchingRowInVBlankImmediateFlip;
4152 double ImmediateFlipBW = 1.0;
4153
4154 if (GPUVMEnable == true && HostVMEnable == true)
4155 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
4156 else
4157 HostVMDynamicLevelsTrips = 0;
4158
4159 #ifdef __DML_VBA_DEBUG__
4160 dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes);
4161 dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
4162 #endif
4163
4164 if (TotImmediateFlipBytes > 0) {
4165 if (use_one_row_for_frame_flip) {
4166 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
4167 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4168 } else {
4169 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
4170 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4171 }
4172 if (GPUVMEnable == true) {
4173 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame *
4174 HostVMInefficiencyFactor / ImmediateFlipBW,
4175 UrgentExtraLatency + UrgentLatency *
4176 (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
4177 LineTime / 4.0);
4178 } else {
4179 TimeForFetchingMetaPTEImmediateFlip = 0;
4180 }
4181 if ((GPUVMEnable == true || DCCEnable == true)) {
4182 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
4183 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
4184 UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
4185 } else {
4186 TimeForFetchingRowInVBlankImmediateFlip = 0;
4187 }
4188
4189 *DestinationLinesToRequestVMInImmediateFlip =
4190 dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
4191 *DestinationLinesToRequestRowInImmediateFlip =
4192 dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
4193
4194 if (GPUVMEnable == true) {
4195 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
4196 (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
4197 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4198 (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
4199 } else if ((GPUVMEnable == true || DCCEnable == true)) {
4200 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4201 (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
4202 } else {
4203 *final_flip_bw = 0;
4204 }
4205 } else {
4206 TimeForFetchingMetaPTEImmediateFlip = 0;
4207 TimeForFetchingRowInVBlankImmediateFlip = 0;
4208 *DestinationLinesToRequestVMInImmediateFlip = 0;
4209 *DestinationLinesToRequestRowInImmediateFlip = 0;
4210 *final_flip_bw = 0;
4211 }
4212
4213 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
4214 if (GPUVMEnable == true && DCCEnable != true) {
4215 min_row_time = dml_min(dpte_row_height *
4216 LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
4217 } else if (GPUVMEnable != true && DCCEnable == true) {
4218 min_row_time = dml_min(meta_row_height *
4219 LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
4220 } else {
4221 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height *
4222 LineTime / VRatio, dpte_row_height_chroma * LineTime /
4223 VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
4224 }
4225 } else {
4226 if (GPUVMEnable == true && DCCEnable != true) {
4227 min_row_time = dpte_row_height * LineTime / VRatio;
4228 } else if (GPUVMEnable != true && DCCEnable == true) {
4229 min_row_time = meta_row_height * LineTime / VRatio;
4230 } else {
4231 min_row_time =
4232 dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
4233 }
4234 }
4235
4236 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
4237 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
4238 > min_row_time) {
4239 *ImmediateFlipSupportedForPipe = false;
4240 } else {
4241 *ImmediateFlipSupportedForPipe = true;
4242 }
4243
4244 #ifdef __DML_VBA_DEBUG__
4245 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
4246 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
4247 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",
4248 __func__, *DestinationLinesToRequestVMInImmediateFlip);
4249 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",
4250 __func__, *DestinationLinesToRequestRowInImmediateFlip);
4251 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
4252 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",
4253 __func__, TimeForFetchingRowInVBlankImmediateFlip);
4254 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
4255 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
4256 #endif
4257 } // CalculateFlipSchedule
4258
dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(struct vba_vars_st * v,unsigned int PrefetchMode,double DCFCLK,double ReturnBW,SOCParametersList mmSOCParameters,double SOCCLK,double DCFClkDeepSleep,unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],double SwathWidthY[],double SwathWidthC[],unsigned int DPPPerSurface[],double BytePerPixelDETY[],double BytePerPixelDETC[],double DSTXAfterScaler[],double DSTYAfterScaler[],bool UnboundedRequestEnabled,unsigned int CompressedBufferSizeInkByte,enum clock_change_support * DRAMClockChangeSupport,double MaxActiveDRAMClockChangeLatencySupported[],unsigned int SubViewportLinesNeededInMALL[],enum dm_fclock_change_support * FCLKChangeSupport,double * MinActiveFCLKChangeLatencySupported,bool * USRRetrainingSupport,double ActiveDRAMClockChangeLatencyMargin[])4259 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
4260 struct vba_vars_st *v,
4261 unsigned int PrefetchMode,
4262 double DCFCLK,
4263 double ReturnBW,
4264 SOCParametersList mmSOCParameters,
4265 double SOCCLK,
4266 double DCFClkDeepSleep,
4267 unsigned int DETBufferSizeY[],
4268 unsigned int DETBufferSizeC[],
4269 unsigned int SwathHeightY[],
4270 unsigned int SwathHeightC[],
4271 double SwathWidthY[],
4272 double SwathWidthC[],
4273 unsigned int DPPPerSurface[],
4274 double BytePerPixelDETY[],
4275 double BytePerPixelDETC[],
4276 double DSTXAfterScaler[],
4277 double DSTYAfterScaler[],
4278 bool UnboundedRequestEnabled,
4279 unsigned int CompressedBufferSizeInkByte,
4280
4281 /* Output */
4282 enum clock_change_support *DRAMClockChangeSupport,
4283 double MaxActiveDRAMClockChangeLatencySupported[],
4284 unsigned int SubViewportLinesNeededInMALL[],
4285 enum dm_fclock_change_support *FCLKChangeSupport,
4286 double *MinActiveFCLKChangeLatencySupported,
4287 bool *USRRetrainingSupport,
4288 double ActiveDRAMClockChangeLatencyMargin[])
4289 {
4290 unsigned int i, j, k;
4291 unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
4292 unsigned int DRAMClockChangeSupportNumber = 0;
4293 unsigned int LastSurfaceWithoutMargin = 0;
4294 unsigned int DRAMClockChangeMethod = 0;
4295 bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
4296 double MinActiveFCLKChangeMargin = 0.;
4297 double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
4298 double ActiveClockChangeLatencyHidingY;
4299 double ActiveClockChangeLatencyHidingC;
4300 double ActiveClockChangeLatencyHiding;
4301 double EffectiveDETBufferSizeY;
4302 double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
4303 double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
4304 double TotalPixelBW = 0.0;
4305 bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
4306 double EffectiveLBLatencyHidingY;
4307 double EffectiveLBLatencyHidingC;
4308 double LinesInDETY[DC__NUM_DPP__MAX];
4309 double LinesInDETC[DC__NUM_DPP__MAX];
4310 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
4311 unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
4312 double FullDETBufferingTimeY;
4313 double FullDETBufferingTimeC;
4314 double WritebackDRAMClockChangeLatencyMargin;
4315 double WritebackFCLKChangeLatencyMargin;
4316 double WritebackLatencyHiding;
4317 bool SameTimingForFCLKChange;
4318
4319 unsigned int TotalActiveWriteback = 0;
4320 unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
4321 unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
4322
4323 v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
4324 v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
4325 + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
4326 v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark;
4327 v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark;
4328 v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
4329 + 10 / DCFClkDeepSleep;
4330 v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
4331 + 10 / DCFClkDeepSleep;
4332 v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
4333 + 10 / DCFClkDeepSleep;
4334 v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
4335 + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
4336
4337 #ifdef __DML_VBA_DEBUG__
4338 dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
4339 dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
4340 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
4341 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark);
4342 dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark);
4343 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark);
4344 dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark);
4345 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark);
4346 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark);
4347 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark);
4348 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
4349 __func__, v->Watermark.Z8StutterEnterPlusExitWatermark);
4350 #endif
4351
4352
4353 TotalActiveWriteback = 0;
4354 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4355 if (v->WritebackEnable[k] == true)
4356 TotalActiveWriteback = TotalActiveWriteback + 1;
4357 }
4358
4359 if (TotalActiveWriteback <= 1) {
4360 v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
4361 } else {
4362 v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
4363 + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4364 }
4365 if (v->USRRetrainingRequiredFinal)
4366 v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
4367 + mmSOCParameters.USRRetrainingLatency;
4368
4369 if (TotalActiveWriteback <= 1) {
4370 v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4371 + mmSOCParameters.WritebackLatency;
4372 v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4373 + mmSOCParameters.WritebackLatency;
4374 } else {
4375 v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4376 + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4377 v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4378 + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK;
4379 }
4380
4381 if (v->USRRetrainingRequiredFinal)
4382 v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
4383 + mmSOCParameters.USRRetrainingLatency;
4384
4385 if (v->USRRetrainingRequiredFinal)
4386 v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark
4387 + mmSOCParameters.USRRetrainingLatency;
4388
4389 #ifdef __DML_VBA_DEBUG__
4390 dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
4391 __func__, v->Watermark.WritebackDRAMClockChangeWatermark);
4392 dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark);
4393 dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark);
4394 dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal);
4395 dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
4396 #endif
4397
4398 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4399 TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] +
4400 SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]);
4401 }
4402
4403 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4404
4405 LBLatencyHidingSourceLinesY[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
4406 LBLatencyHidingSourceLinesC[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
4407
4408
4409 #ifdef __DML_VBA_DEBUG__
4410 dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines);
4411 dml_print("DML::%s: k=%d, v->LineBufferSizeFinal = %d\n", __func__, k, v->LineBufferSizeFinal);
4412 dml_print("DML::%s: k=%d, v->LBBitPerPixel = %d\n", __func__, k, v->LBBitPerPixel[k]);
4413 dml_print("DML::%s: k=%d, v->HRatio = %f\n", __func__, k, v->HRatio[k]);
4414 dml_print("DML::%s: k=%d, v->vtaps = %d\n", __func__, k, v->vtaps[k]);
4415 #endif
4416
4417 EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
4418 EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
4419 EffectiveDETBufferSizeY = DETBufferSizeY[k];
4420
4421 if (UnboundedRequestEnabled) {
4422 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
4423 + CompressedBufferSizeInkByte * 1024
4424 * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k])
4425 / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
4426 }
4427
4428 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
4429 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
4430 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
4431
4432 ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
4433 - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k];
4434
4435 if (v->NumberOfActiveSurfaces > 1) {
4436 ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
4437 - (1.0 - 1.0 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k]
4438 / v->PixelClock[k] / v->VRatio[k];
4439 }
4440
4441 if (BytePerPixelDETC[k] > 0) {
4442 LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
4443 LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]);
4444 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k])
4445 / v->VRatioChroma[k];
4446 ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
4447 - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k]
4448 / v->PixelClock[k];
4449 if (v->NumberOfActiveSurfaces > 1) {
4450 ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC
4451 - (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k]
4452 / v->PixelClock[k] / v->VRatioChroma[k];
4453 }
4454 ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY,
4455 ActiveClockChangeLatencyHidingC);
4456 } else {
4457 ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY;
4458 }
4459
4460 ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4461 - v->Watermark.DRAMClockChangeWatermark;
4462 ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4463 - v->Watermark.FCLKChangeWatermark;
4464 USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark;
4465
4466 if (v->WritebackEnable[k]) {
4467 WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024
4468 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4469 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
4470 if (v->WritebackPixelFormat[k] == dm_444_64)
4471 WritebackLatencyHiding = WritebackLatencyHiding / 2;
4472
4473 WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding
4474 - v->Watermark.WritebackDRAMClockChangeWatermark;
4475
4476 WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding
4477 - v->Watermark.WritebackFCLKChangeWatermark;
4478
4479 ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
4480 WritebackFCLKChangeLatencyMargin);
4481 ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k],
4482 WritebackDRAMClockChangeLatencyMargin);
4483 }
4484 MaxActiveDRAMClockChangeLatencySupported[k] =
4485 (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
4486 0 :
4487 (ActiveDRAMClockChangeLatencyMargin[k]
4488 + mmSOCParameters.DRAMClockChangeLatency);
4489 }
4490
4491 for (i = 0; i < v->NumberOfActiveSurfaces; ++i) {
4492 for (j = 0; j < v->NumberOfActiveSurfaces; ++j) {
4493 if (i == j ||
4494 (v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) ||
4495 (v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) ||
4496 (v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) ||
4497 (v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] &&
4498 v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] &&
4499 v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
4500 (v->DRRDisplay[i] || v->DRRDisplay[j]))) {
4501 SynchronizedSurfaces[i][j] = true;
4502 } else {
4503 SynchronizedSurfaces[i][j] = false;
4504 }
4505 }
4506 }
4507
4508 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4509 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4510 (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
4511 ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) {
4512 FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
4513 MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k];
4514 SurfaceWithMinActiveFCLKChangeMargin = k;
4515 }
4516 }
4517
4518 *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
4519
4520 SameTimingForFCLKChange = true;
4521 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4522 if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) {
4523 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4524 (SameTimingForFCLKChange ||
4525 ActiveFCLKChangeLatencyMargin[k] <
4526 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
4527 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k];
4528 }
4529 SameTimingForFCLKChange = false;
4530 }
4531 }
4532
4533 if (MinActiveFCLKChangeMargin > 0) {
4534 *FCLKChangeSupport = dm_fclock_change_vactive;
4535 } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
4536 (PrefetchMode <= 1)) {
4537 *FCLKChangeSupport = dm_fclock_change_vblank;
4538 } else {
4539 *FCLKChangeSupport = dm_fclock_change_unsupported;
4540 }
4541
4542 *USRRetrainingSupport = true;
4543 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4544 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4545 (USRRetrainingLatencyMargin[k] < 0)) {
4546 *USRRetrainingSupport = false;
4547 }
4548 }
4549
4550 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4551 if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
4552 v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
4553 v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
4554 ActiveDRAMClockChangeLatencyMargin[k] < 0) {
4555 if (PrefetchMode > 0) {
4556 DRAMClockChangeSupportNumber = 2;
4557 } else if (DRAMClockChangeSupportNumber == 0) {
4558 DRAMClockChangeSupportNumber = 1;
4559 LastSurfaceWithoutMargin = k;
4560 } else if (DRAMClockChangeSupportNumber == 1 &&
4561 !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) {
4562 DRAMClockChangeSupportNumber = 2;
4563 }
4564 }
4565 }
4566
4567 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4568 if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
4569 DRAMClockChangeMethod = 1;
4570 else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
4571 DRAMClockChangeMethod = 2;
4572 }
4573
4574 if (DRAMClockChangeMethod == 0) {
4575 if (DRAMClockChangeSupportNumber == 0)
4576 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
4577 else if (DRAMClockChangeSupportNumber == 1)
4578 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
4579 else
4580 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4581 } else if (DRAMClockChangeMethod == 1) {
4582 if (DRAMClockChangeSupportNumber == 0)
4583 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
4584 else if (DRAMClockChangeSupportNumber == 1)
4585 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
4586 else
4587 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4588 } else {
4589 if (DRAMClockChangeSupportNumber == 0)
4590 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
4591 else if (DRAMClockChangeSupportNumber == 1)
4592 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
4593 else
4594 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4595 }
4596
4597 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4598 unsigned int dst_y_pstate;
4599 unsigned int src_y_pstate_l;
4600 unsigned int src_y_pstate_c;
4601 unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
4602
4603 dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), 1);
4604 src_y_pstate_l = dml_ceil(dst_y_pstate * v->VRatio[k], SwathHeightY[k]);
4605 src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k];
4606 sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k];
4607
4608 #ifdef __DML_VBA_DEBUG__
4609 dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
4610 dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
4611 dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
4612 dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
4613 dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]);
4614 dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate);
4615 dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l);
4616 dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l);
4617 dml_print("DML::%s: k=%d, v->meta_row_height = %d\n", __func__, k, v->meta_row_height[k]);
4618 dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l);
4619 #endif
4620 SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
4621
4622 if (BytePerPixelDETC[k] > 0) {
4623 src_y_pstate_c = dml_ceil(dst_y_pstate * v->VRatioChroma[k], SwathHeightC[k]);
4624 src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k];
4625 sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k];
4626 SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
4627
4628 #ifdef __DML_VBA_DEBUG__
4629 dml_print("DML::%s: k=%d, src_y_pstate_c = %d\n", __func__, k, src_y_pstate_c);
4630 dml_print("DML::%s: k=%d, src_y_ahead_c = %d\n", __func__, k, src_y_ahead_c);
4631 dml_print("DML::%s: k=%d, v->meta_row_height_chroma = %d\n", __func__, k, v->meta_row_height_chroma[k]);
4632 dml_print("DML::%s: k=%d, sub_vp_lines_c = %d\n", __func__, k, sub_vp_lines_c);
4633 #endif
4634 }
4635 }
4636 #ifdef __DML_VBA_DEBUG__
4637 dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport);
4638 dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport);
4639 dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",
4640 __func__, *MinActiveFCLKChangeLatencySupported);
4641 dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport);
4642 #endif
4643 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
4644
dml32_CalculateWriteBackDISPCLK(enum source_format_class WritebackPixelFormat,double PixelClock,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackHTaps,unsigned int WritebackVTaps,unsigned int WritebackSourceWidth,unsigned int WritebackDestinationWidth,unsigned int HTotal,unsigned int WritebackLineBufferSize,double DISPCLKDPPCLKVCOSpeed)4645 double dml32_CalculateWriteBackDISPCLK(
4646 enum source_format_class WritebackPixelFormat,
4647 double PixelClock,
4648 double WritebackHRatio,
4649 double WritebackVRatio,
4650 unsigned int WritebackHTaps,
4651 unsigned int WritebackVTaps,
4652 unsigned int WritebackSourceWidth,
4653 unsigned int WritebackDestinationWidth,
4654 unsigned int HTotal,
4655 unsigned int WritebackLineBufferSize,
4656 double DISPCLKDPPCLKVCOSpeed)
4657 {
4658 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4659
4660 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
4661 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
4662 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
4663 WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
4664 return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
4665 }
4666
dml32_CalculateMinAndMaxPrefetchMode(enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal,unsigned int * MinPrefetchMode,unsigned int * MaxPrefetchMode)4667 void dml32_CalculateMinAndMaxPrefetchMode(
4668 enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal,
4669 unsigned int *MinPrefetchMode,
4670 unsigned int *MaxPrefetchMode)
4671 {
4672 if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
4673 *MinPrefetchMode = 3;
4674 *MaxPrefetchMode = 3;
4675 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
4676 *MinPrefetchMode = 2;
4677 *MaxPrefetchMode = 2;
4678 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
4679 *MinPrefetchMode = 1;
4680 *MaxPrefetchMode = 1;
4681 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
4682 *MinPrefetchMode = 0;
4683 *MaxPrefetchMode = 0;
4684 } else {
4685 *MinPrefetchMode = 0;
4686 *MaxPrefetchMode = 3;
4687 }
4688 } // CalculateMinAndMaxPrefetchMode
4689
dml32_CalculatePixelDeliveryTimes(unsigned int NumberOfActiveSurfaces,double VRatio[],double VRatioChroma[],double VRatioPrefetchY[],double VRatioPrefetchC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],unsigned int DPPPerSurface[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],unsigned int BytePerPixelC[],enum dm_rotation_angle SourceRotation[],unsigned int NumberOfCursors[],unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],unsigned int BlockWidth256BytesY[],unsigned int BlockHeight256BytesY[],unsigned int BlockWidth256BytesC[],unsigned int BlockHeight256BytesC[],double DisplayPipeLineDeliveryTimeLuma[],double DisplayPipeLineDeliveryTimeChroma[],double DisplayPipeLineDeliveryTimeLumaPrefetch[],double DisplayPipeLineDeliveryTimeChromaPrefetch[],double DisplayPipeRequestDeliveryTimeLuma[],double DisplayPipeRequestDeliveryTimeChroma[],double DisplayPipeRequestDeliveryTimeLumaPrefetch[],double DisplayPipeRequestDeliveryTimeChromaPrefetch[],double CursorRequestDeliveryTime[],double CursorRequestDeliveryTimePrefetch[])4690 void dml32_CalculatePixelDeliveryTimes(
4691 unsigned int NumberOfActiveSurfaces,
4692 double VRatio[],
4693 double VRatioChroma[],
4694 double VRatioPrefetchY[],
4695 double VRatioPrefetchC[],
4696 unsigned int swath_width_luma_ub[],
4697 unsigned int swath_width_chroma_ub[],
4698 unsigned int DPPPerSurface[],
4699 double HRatio[],
4700 double HRatioChroma[],
4701 double PixelClock[],
4702 double PSCL_THROUGHPUT[],
4703 double PSCL_THROUGHPUT_CHROMA[],
4704 double Dppclk[],
4705 unsigned int BytePerPixelC[],
4706 enum dm_rotation_angle SourceRotation[],
4707 unsigned int NumberOfCursors[],
4708 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
4709 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
4710 unsigned int BlockWidth256BytesY[],
4711 unsigned int BlockHeight256BytesY[],
4712 unsigned int BlockWidth256BytesC[],
4713 unsigned int BlockHeight256BytesC[],
4714
4715 /* Output */
4716 double DisplayPipeLineDeliveryTimeLuma[],
4717 double DisplayPipeLineDeliveryTimeChroma[],
4718 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
4719 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
4720 double DisplayPipeRequestDeliveryTimeLuma[],
4721 double DisplayPipeRequestDeliveryTimeChroma[],
4722 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
4723 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
4724 double CursorRequestDeliveryTime[],
4725 double CursorRequestDeliveryTimePrefetch[])
4726 {
4727 double req_per_swath_ub;
4728 unsigned int k;
4729
4730 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4731
4732 #ifdef __DML_VBA_DEBUG__
4733 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
4734 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
4735 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
4736 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
4737 dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
4738 dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
4739 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
4740 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
4741 dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
4742 dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]);
4743 dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]);
4744 #endif
4745
4746 if (VRatio[k] <= 1) {
4747 DisplayPipeLineDeliveryTimeLuma[k] =
4748 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4749 } else {
4750 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4751 }
4752
4753 if (BytePerPixelC[k] == 0) {
4754 DisplayPipeLineDeliveryTimeChroma[k] = 0;
4755 } else {
4756 if (VRatioChroma[k] <= 1) {
4757 DisplayPipeLineDeliveryTimeChroma[k] =
4758 swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4759 } else {
4760 DisplayPipeLineDeliveryTimeChroma[k] =
4761 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4762 }
4763 }
4764
4765 if (VRatioPrefetchY[k] <= 1) {
4766 DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4767 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4768 } else {
4769 DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4770 swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4771 }
4772
4773 if (BytePerPixelC[k] == 0) {
4774 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
4775 } else {
4776 if (VRatioPrefetchC[k] <= 1) {
4777 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
4778 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4779 } else {
4780 DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
4781 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4782 }
4783 }
4784 #ifdef __DML_VBA_DEBUG__
4785 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",
4786 __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
4787 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",
4788 __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
4789 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",
4790 __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
4791 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",
4792 __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
4793 #endif
4794 }
4795
4796 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4797 if (!IsVertical(SourceRotation[k]))
4798 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
4799 else
4800 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
4801 #ifdef __DML_VBA_DEBUG__
4802 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
4803 #endif
4804
4805 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
4806 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
4807 DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
4808 if (BytePerPixelC[k] == 0) {
4809 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
4810 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
4811 } else {
4812 if (!IsVertical(SourceRotation[k]))
4813 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
4814 else
4815 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
4816 #ifdef __DML_VBA_DEBUG__
4817 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
4818 #endif
4819 DisplayPipeRequestDeliveryTimeChroma[k] =
4820 DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
4821 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
4822 DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
4823 }
4824 #ifdef __DML_VBA_DEBUG__
4825 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",
4826 __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
4827 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",
4828 __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
4829 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",
4830 __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
4831 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",
4832 __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
4833 #endif
4834 }
4835
4836 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4837 unsigned int cursor_req_per_width;
4838
4839 cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
4840 256.0 / 8.0, 1.0);
4841 if (NumberOfCursors[k] > 0) {
4842 if (VRatio[k] <= 1) {
4843 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4844 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4845 } else {
4846 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4847 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4848 }
4849 if (VRatioPrefetchY[k] <= 1) {
4850 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4851 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4852 } else {
4853 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4854 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4855 }
4856 } else {
4857 CursorRequestDeliveryTime[k] = 0;
4858 CursorRequestDeliveryTimePrefetch[k] = 0;
4859 }
4860 #ifdef __DML_VBA_DEBUG__
4861 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",
4862 __func__, k, NumberOfCursors[k]);
4863 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",
4864 __func__, k, CursorRequestDeliveryTime[k]);
4865 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",
4866 __func__, k, CursorRequestDeliveryTimePrefetch[k]);
4867 #endif
4868 }
4869 } // CalculatePixelDeliveryTimes
4870
dml32_CalculateMetaAndPTETimes(bool use_one_row_for_frame[],unsigned int NumberOfActiveSurfaces,bool GPUVMEnable,unsigned int MetaChunkSize,unsigned int MinMetaChunkSizeBytes,unsigned int HTotal[],double VRatio[],double VRatioChroma[],double DestinationLinesToRequestRowInVBlank[],double DestinationLinesToRequestRowInImmediateFlip[],bool DCCEnable[],double PixelClock[],unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],enum dm_rotation_angle SourceRotation[],unsigned int dpte_row_height[],unsigned int dpte_row_height_chroma[],unsigned int meta_row_width[],unsigned int meta_row_width_chroma[],unsigned int meta_row_height[],unsigned int meta_row_height_chroma[],unsigned int meta_req_width[],unsigned int meta_req_width_chroma[],unsigned int meta_req_height[],unsigned int meta_req_height_chroma[],unsigned int dpte_group_bytes[],unsigned int PTERequestSizeY[],unsigned int PTERequestSizeC[],unsigned int PixelPTEReqWidthY[],unsigned int PixelPTEReqHeightY[],unsigned int PixelPTEReqWidthC[],unsigned int PixelPTEReqHeightC[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],double DST_Y_PER_PTE_ROW_NOM_L[],double DST_Y_PER_PTE_ROW_NOM_C[],double DST_Y_PER_META_ROW_NOM_L[],double DST_Y_PER_META_ROW_NOM_C[],double TimePerMetaChunkNominal[],double TimePerChromaMetaChunkNominal[],double TimePerMetaChunkVBlank[],double TimePerChromaMetaChunkVBlank[],double TimePerMetaChunkFlip[],double TimePerChromaMetaChunkFlip[],double time_per_pte_group_nom_luma[],double time_per_pte_group_vblank_luma[],double time_per_pte_group_flip_luma[],double time_per_pte_group_nom_chroma[],double time_per_pte_group_vblank_chroma[],double time_per_pte_group_flip_chroma[])4871 void dml32_CalculateMetaAndPTETimes(
4872 bool use_one_row_for_frame[],
4873 unsigned int NumberOfActiveSurfaces,
4874 bool GPUVMEnable,
4875 unsigned int MetaChunkSize,
4876 unsigned int MinMetaChunkSizeBytes,
4877 unsigned int HTotal[],
4878 double VRatio[],
4879 double VRatioChroma[],
4880 double DestinationLinesToRequestRowInVBlank[],
4881 double DestinationLinesToRequestRowInImmediateFlip[],
4882 bool DCCEnable[],
4883 double PixelClock[],
4884 unsigned int BytePerPixelY[],
4885 unsigned int BytePerPixelC[],
4886 enum dm_rotation_angle SourceRotation[],
4887 unsigned int dpte_row_height[],
4888 unsigned int dpte_row_height_chroma[],
4889 unsigned int meta_row_width[],
4890 unsigned int meta_row_width_chroma[],
4891 unsigned int meta_row_height[],
4892 unsigned int meta_row_height_chroma[],
4893 unsigned int meta_req_width[],
4894 unsigned int meta_req_width_chroma[],
4895 unsigned int meta_req_height[],
4896 unsigned int meta_req_height_chroma[],
4897 unsigned int dpte_group_bytes[],
4898 unsigned int PTERequestSizeY[],
4899 unsigned int PTERequestSizeC[],
4900 unsigned int PixelPTEReqWidthY[],
4901 unsigned int PixelPTEReqHeightY[],
4902 unsigned int PixelPTEReqWidthC[],
4903 unsigned int PixelPTEReqHeightC[],
4904 unsigned int dpte_row_width_luma_ub[],
4905 unsigned int dpte_row_width_chroma_ub[],
4906
4907 /* Output */
4908 double DST_Y_PER_PTE_ROW_NOM_L[],
4909 double DST_Y_PER_PTE_ROW_NOM_C[],
4910 double DST_Y_PER_META_ROW_NOM_L[],
4911 double DST_Y_PER_META_ROW_NOM_C[],
4912 double TimePerMetaChunkNominal[],
4913 double TimePerChromaMetaChunkNominal[],
4914 double TimePerMetaChunkVBlank[],
4915 double TimePerChromaMetaChunkVBlank[],
4916 double TimePerMetaChunkFlip[],
4917 double TimePerChromaMetaChunkFlip[],
4918 double time_per_pte_group_nom_luma[],
4919 double time_per_pte_group_vblank_luma[],
4920 double time_per_pte_group_flip_luma[],
4921 double time_per_pte_group_nom_chroma[],
4922 double time_per_pte_group_vblank_chroma[],
4923 double time_per_pte_group_flip_chroma[])
4924 {
4925 unsigned int meta_chunk_width;
4926 unsigned int min_meta_chunk_width;
4927 unsigned int meta_chunk_per_row_int;
4928 unsigned int meta_row_remainder;
4929 unsigned int meta_chunk_threshold;
4930 unsigned int meta_chunks_per_row_ub;
4931 unsigned int meta_chunk_width_chroma;
4932 unsigned int min_meta_chunk_width_chroma;
4933 unsigned int meta_chunk_per_row_int_chroma;
4934 unsigned int meta_row_remainder_chroma;
4935 unsigned int meta_chunk_threshold_chroma;
4936 unsigned int meta_chunks_per_row_ub_chroma;
4937 unsigned int dpte_group_width_luma;
4938 unsigned int dpte_groups_per_row_luma_ub;
4939 unsigned int dpte_group_width_chroma;
4940 unsigned int dpte_groups_per_row_chroma_ub;
4941 unsigned int k;
4942
4943 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4944 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
4945 if (BytePerPixelC[k] == 0)
4946 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
4947 else
4948 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
4949 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
4950 if (BytePerPixelC[k] == 0)
4951 DST_Y_PER_META_ROW_NOM_C[k] = 0;
4952 else
4953 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
4954 }
4955
4956 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4957 if (DCCEnable[k] == true) {
4958 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
4959 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
4960 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
4961 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
4962 if (!IsVertical(SourceRotation[k]))
4963 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
4964 else
4965 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
4966
4967 if (meta_row_remainder <= meta_chunk_threshold)
4968 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
4969 else
4970 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
4971
4972 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
4973 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4974 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4975 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4976 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4977 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4978 if (BytePerPixelC[k] == 0) {
4979 TimePerChromaMetaChunkNominal[k] = 0;
4980 TimePerChromaMetaChunkVBlank[k] = 0;
4981 TimePerChromaMetaChunkFlip[k] = 0;
4982 } else {
4983 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
4984 meta_row_height_chroma[k];
4985 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
4986 meta_row_height_chroma[k];
4987 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
4988 meta_chunk_width_chroma;
4989 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
4990 if (!IsVertical(SourceRotation[k])) {
4991 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4992 meta_req_width_chroma[k];
4993 } else {
4994 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4995 meta_req_height_chroma[k];
4996 }
4997 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
4998 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
4999 else
5000 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
5001
5002 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
5003 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5004 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
5005 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5006 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5007 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5008 }
5009 } else {
5010 TimePerMetaChunkNominal[k] = 0;
5011 TimePerMetaChunkVBlank[k] = 0;
5012 TimePerMetaChunkFlip[k] = 0;
5013 TimePerChromaMetaChunkNominal[k] = 0;
5014 TimePerChromaMetaChunkVBlank[k] = 0;
5015 TimePerChromaMetaChunkFlip[k] = 0;
5016 }
5017 }
5018
5019 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5020 if (GPUVMEnable == true) {
5021 if (!IsVertical(SourceRotation[k])) {
5022 dpte_group_width_luma = (double) dpte_group_bytes[k] /
5023 (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
5024 } else {
5025 dpte_group_width_luma = (double) dpte_group_bytes[k] /
5026 (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
5027 }
5028
5029 if (use_one_row_for_frame[k]) {
5030 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5031 (double) dpte_group_width_luma / 2.0, 1.0);
5032 } else {
5033 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5034 (double) dpte_group_width_luma, 1.0);
5035 }
5036 #ifdef __DML_VBA_DEBUG__
5037 dml_print("DML::%s: k=%0d, use_one_row_for_frame = %d\n",
5038 __func__, k, use_one_row_for_frame[k]);
5039 dml_print("DML::%s: k=%0d, dpte_group_bytes = %d\n",
5040 __func__, k, dpte_group_bytes[k]);
5041 dml_print("DML::%s: k=%0d, PTERequestSizeY = %d\n",
5042 __func__, k, PTERequestSizeY[k]);
5043 dml_print("DML::%s: k=%0d, PixelPTEReqWidthY = %d\n",
5044 __func__, k, PixelPTEReqWidthY[k]);
5045 dml_print("DML::%s: k=%0d, PixelPTEReqHeightY = %d\n",
5046 __func__, k, PixelPTEReqHeightY[k]);
5047 dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub = %d\n",
5048 __func__, k, dpte_row_width_luma_ub[k]);
5049 dml_print("DML::%s: k=%0d, dpte_group_width_luma = %d\n",
5050 __func__, k, dpte_group_width_luma);
5051 dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub = %d\n",
5052 __func__, k, dpte_groups_per_row_luma_ub);
5053 #endif
5054
5055 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
5056 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5057 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
5058 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5059 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5060 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5061 if (BytePerPixelC[k] == 0) {
5062 time_per_pte_group_nom_chroma[k] = 0;
5063 time_per_pte_group_vblank_chroma[k] = 0;
5064 time_per_pte_group_flip_chroma[k] = 0;
5065 } else {
5066 if (!IsVertical(SourceRotation[k])) {
5067 dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5068 (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5069 } else {
5070 dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5071 (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5072 }
5073
5074 if (use_one_row_for_frame[k]) {
5075 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5076 (double) dpte_group_width_chroma / 2.0, 1.0);
5077 } else {
5078 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5079 (double) dpte_group_width_chroma, 1.0);
5080 }
5081 #ifdef __DML_VBA_DEBUG__
5082 dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub = %d\n",
5083 __func__, k, dpte_row_width_chroma_ub[k]);
5084 dml_print("DML::%s: k=%0d, dpte_group_width_chroma = %d\n",
5085 __func__, k, dpte_group_width_chroma);
5086 dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub = %d\n",
5087 __func__, k, dpte_groups_per_row_chroma_ub);
5088 #endif
5089 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
5090 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5091 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
5092 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5093 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5094 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5095 }
5096 } else {
5097 time_per_pte_group_nom_luma[k] = 0;
5098 time_per_pte_group_vblank_luma[k] = 0;
5099 time_per_pte_group_flip_luma[k] = 0;
5100 time_per_pte_group_nom_chroma[k] = 0;
5101 time_per_pte_group_vblank_chroma[k] = 0;
5102 time_per_pte_group_flip_chroma[k] = 0;
5103 }
5104 #ifdef __DML_VBA_DEBUG__
5105 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank = %f\n",
5106 __func__, k, DestinationLinesToRequestRowInVBlank[k]);
5107 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip = %f\n",
5108 __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
5109 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L = %f\n",
5110 __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
5111 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C = %f\n",
5112 __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
5113 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L = %f\n",
5114 __func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
5115 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C = %f\n",
5116 __func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
5117 dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal = %f\n",
5118 __func__, k, TimePerMetaChunkNominal[k]);
5119 dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank = %f\n",
5120 __func__, k, TimePerMetaChunkVBlank[k]);
5121 dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip = %f\n",
5122 __func__, k, TimePerMetaChunkFlip[k]);
5123 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal = %f\n",
5124 __func__, k, TimePerChromaMetaChunkNominal[k]);
5125 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank = %f\n",
5126 __func__, k, TimePerChromaMetaChunkVBlank[k]);
5127 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip = %f\n",
5128 __func__, k, TimePerChromaMetaChunkFlip[k]);
5129 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma = %f\n",
5130 __func__, k, time_per_pte_group_nom_luma[k]);
5131 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma = %f\n",
5132 __func__, k, time_per_pte_group_vblank_luma[k]);
5133 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma = %f\n",
5134 __func__, k, time_per_pte_group_flip_luma[k]);
5135 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma = %f\n",
5136 __func__, k, time_per_pte_group_nom_chroma[k]);
5137 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",
5138 __func__, k, time_per_pte_group_vblank_chroma[k]);
5139 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma = %f\n",
5140 __func__, k, time_per_pte_group_flip_chroma[k]);
5141 #endif
5142 }
5143 } // CalculateMetaAndPTETimes
5144
dml32_CalculateVMGroupAndRequestTimes(unsigned int NumberOfActiveSurfaces,bool GPUVMEnable,unsigned int GPUVMMaxPageTableLevels,unsigned int HTotal[],unsigned int BytePerPixelC[],double DestinationLinesToRequestVMInVBlank[],double DestinationLinesToRequestVMInImmediateFlip[],bool DCCEnable[],double PixelClock[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],unsigned int vm_group_bytes[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],unsigned int meta_pte_bytes_per_frame_ub_l[],unsigned int meta_pte_bytes_per_frame_ub_c[],double TimePerVMGroupVBlank[],double TimePerVMGroupFlip[],double TimePerVMRequestVBlank[],double TimePerVMRequestFlip[])5145 void dml32_CalculateVMGroupAndRequestTimes(
5146 unsigned int NumberOfActiveSurfaces,
5147 bool GPUVMEnable,
5148 unsigned int GPUVMMaxPageTableLevels,
5149 unsigned int HTotal[],
5150 unsigned int BytePerPixelC[],
5151 double DestinationLinesToRequestVMInVBlank[],
5152 double DestinationLinesToRequestVMInImmediateFlip[],
5153 bool DCCEnable[],
5154 double PixelClock[],
5155 unsigned int dpte_row_width_luma_ub[],
5156 unsigned int dpte_row_width_chroma_ub[],
5157 unsigned int vm_group_bytes[],
5158 unsigned int dpde0_bytes_per_frame_ub_l[],
5159 unsigned int dpde0_bytes_per_frame_ub_c[],
5160 unsigned int meta_pte_bytes_per_frame_ub_l[],
5161 unsigned int meta_pte_bytes_per_frame_ub_c[],
5162
5163 /* Output */
5164 double TimePerVMGroupVBlank[],
5165 double TimePerVMGroupFlip[],
5166 double TimePerVMRequestVBlank[],
5167 double TimePerVMRequestFlip[])
5168 {
5169 unsigned int k;
5170 unsigned int num_group_per_lower_vm_stage;
5171 unsigned int num_req_per_lower_vm_stage;
5172
5173 #ifdef __DML_VBA_DEBUG__
5174 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
5175 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
5176 #endif
5177 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5178
5179 #ifdef __DML_VBA_DEBUG__
5180 dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]);
5181 dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]);
5182 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",
5183 __func__, k, dpde0_bytes_per_frame_ub_l[k]);
5184 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",
5185 __func__, k, dpde0_bytes_per_frame_ub_c[k]);
5186 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",
5187 __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
5188 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",
5189 __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
5190 #endif
5191
5192 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5193 if (DCCEnable[k] == false) {
5194 if (BytePerPixelC[k] > 0) {
5195 num_group_per_lower_vm_stage = dml_ceil(
5196 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5197 (double) (vm_group_bytes[k]), 1.0) +
5198 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5199 (double) (vm_group_bytes[k]), 1.0);
5200 } else {
5201 num_group_per_lower_vm_stage = dml_ceil(
5202 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5203 (double) (vm_group_bytes[k]), 1.0);
5204 }
5205 } else {
5206 if (GPUVMMaxPageTableLevels == 1) {
5207 if (BytePerPixelC[k] > 0) {
5208 num_group_per_lower_vm_stage = dml_ceil(
5209 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5210 (double) (vm_group_bytes[k]), 1.0) +
5211 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5212 (double) (vm_group_bytes[k]), 1.0);
5213 } else {
5214 num_group_per_lower_vm_stage = dml_ceil(
5215 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5216 (double) (vm_group_bytes[k]), 1.0);
5217 }
5218 } else {
5219 if (BytePerPixelC[k] > 0) {
5220 num_group_per_lower_vm_stage = 2 + dml_ceil(
5221 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5222 (double) (vm_group_bytes[k]), 1) +
5223 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5224 (double) (vm_group_bytes[k]), 1) +
5225 dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) /
5226 (double) (vm_group_bytes[k]), 1) +
5227 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5228 (double) (vm_group_bytes[k]), 1);
5229 } else {
5230 num_group_per_lower_vm_stage = 1 + dml_ceil(
5231 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5232 (double) (vm_group_bytes[k]), 1) + dml_ceil(
5233 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5234 (double) (vm_group_bytes[k]), 1);
5235 }
5236 }
5237 }
5238
5239 if (DCCEnable[k] == false) {
5240 if (BytePerPixelC[k] > 0) {
5241 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
5242 dpde0_bytes_per_frame_ub_c[k] / 64;
5243 } else {
5244 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5245 }
5246 } else {
5247 if (GPUVMMaxPageTableLevels == 1) {
5248 if (BytePerPixelC[k] > 0) {
5249 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
5250 meta_pte_bytes_per_frame_ub_c[k] / 64;
5251 } else {
5252 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5253 }
5254 } else {
5255 if (BytePerPixelC[k] > 0) {
5256 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5257 64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
5258 meta_pte_bytes_per_frame_ub_l[k] / 64 +
5259 meta_pte_bytes_per_frame_ub_c[k] / 64;
5260 } else {
5261 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5262 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5263 }
5264 }
5265 }
5266
5267 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5268 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5269 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5270 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5271 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5272 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5273 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5274 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5275
5276 if (GPUVMMaxPageTableLevels > 2) {
5277 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
5278 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
5279 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
5280 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
5281 }
5282
5283 } else {
5284 TimePerVMGroupVBlank[k] = 0;
5285 TimePerVMGroupFlip[k] = 0;
5286 TimePerVMRequestVBlank[k] = 0;
5287 TimePerVMRequestFlip[k] = 0;
5288 }
5289
5290 #ifdef __DML_VBA_DEBUG__
5291 dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
5292 dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
5293 dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
5294 dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
5295 #endif
5296 }
5297 } // CalculateVMGroupAndRequestTimes
5298
dml32_CalculateDCCConfiguration(bool DCCEnabled,bool DCCProgrammingAssumesScanDirectionUnknown,enum source_format_class SourcePixelFormat,unsigned int SurfaceWidthLuma,unsigned int SurfaceWidthChroma,unsigned int SurfaceHeightLuma,unsigned int SurfaceHeightChroma,unsigned int nomDETInKByte,unsigned int RequestHeight256ByteLuma,unsigned int RequestHeight256ByteChroma,enum dm_swizzle_mode TilingFormat,unsigned int BytePerPixelY,unsigned int BytePerPixelC,double BytePerPixelDETY,double BytePerPixelDETC,enum dm_rotation_angle SourceRotation,unsigned int * MaxUncompressedBlockLuma,unsigned int * MaxUncompressedBlockChroma,unsigned int * MaxCompressedBlockLuma,unsigned int * MaxCompressedBlockChroma,unsigned int * IndependentBlockLuma,unsigned int * IndependentBlockChroma)5299 void dml32_CalculateDCCConfiguration(
5300 bool DCCEnabled,
5301 bool DCCProgrammingAssumesScanDirectionUnknown,
5302 enum source_format_class SourcePixelFormat,
5303 unsigned int SurfaceWidthLuma,
5304 unsigned int SurfaceWidthChroma,
5305 unsigned int SurfaceHeightLuma,
5306 unsigned int SurfaceHeightChroma,
5307 unsigned int nomDETInKByte,
5308 unsigned int RequestHeight256ByteLuma,
5309 unsigned int RequestHeight256ByteChroma,
5310 enum dm_swizzle_mode TilingFormat,
5311 unsigned int BytePerPixelY,
5312 unsigned int BytePerPixelC,
5313 double BytePerPixelDETY,
5314 double BytePerPixelDETC,
5315 enum dm_rotation_angle SourceRotation,
5316 /* Output */
5317 unsigned int *MaxUncompressedBlockLuma,
5318 unsigned int *MaxUncompressedBlockChroma,
5319 unsigned int *MaxCompressedBlockLuma,
5320 unsigned int *MaxCompressedBlockChroma,
5321 unsigned int *IndependentBlockLuma,
5322 unsigned int *IndependentBlockChroma)
5323 {
5324 typedef enum {
5325 REQ_256Bytes,
5326 REQ_128BytesNonContiguous,
5327 REQ_128BytesContiguous,
5328 REQ_NA
5329 } RequestType;
5330
5331 RequestType RequestLuma;
5332 RequestType RequestChroma;
5333
5334 unsigned int segment_order_horz_contiguous_luma;
5335 unsigned int segment_order_horz_contiguous_chroma;
5336 unsigned int segment_order_vert_contiguous_luma;
5337 unsigned int segment_order_vert_contiguous_chroma;
5338 unsigned int req128_horz_wc_l;
5339 unsigned int req128_horz_wc_c;
5340 unsigned int req128_vert_wc_l;
5341 unsigned int req128_vert_wc_c;
5342 unsigned int MAS_vp_horz_limit;
5343 unsigned int MAS_vp_vert_limit;
5344 unsigned int max_vp_horz_width;
5345 unsigned int max_vp_vert_height;
5346 unsigned int eff_surf_width_l;
5347 unsigned int eff_surf_width_c;
5348 unsigned int eff_surf_height_l;
5349 unsigned int eff_surf_height_c;
5350 unsigned int full_swath_bytes_horz_wc_l;
5351 unsigned int full_swath_bytes_horz_wc_c;
5352 unsigned int full_swath_bytes_vert_wc_l;
5353 unsigned int full_swath_bytes_vert_wc_c;
5354 unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
5355
5356 unsigned int yuv420;
5357 unsigned int horz_div_l;
5358 unsigned int horz_div_c;
5359 unsigned int vert_div_l;
5360 unsigned int vert_div_c;
5361
5362 unsigned int swath_buf_size;
5363 double detile_buf_vp_horz_limit;
5364 double detile_buf_vp_vert_limit;
5365
5366 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
5367 SourcePixelFormat == dm_420_12) ? 1 : 0);
5368 horz_div_l = 1;
5369 horz_div_c = 1;
5370 vert_div_l = 1;
5371 vert_div_c = 1;
5372
5373 if (BytePerPixelY == 1)
5374 vert_div_l = 0;
5375 if (BytePerPixelC == 1)
5376 vert_div_c = 0;
5377
5378 if (BytePerPixelC == 0) {
5379 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
5380 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5381 BytePerPixelY / (1 + horz_div_l));
5382 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5383 (1 + vert_div_l));
5384 } else {
5385 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
5386 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5387 BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
5388 BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
5389 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5390 (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
5391 (1 + vert_div_c) / (1 + yuv420));
5392 }
5393
5394 if (SourcePixelFormat == dm_420_10) {
5395 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
5396 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
5397 }
5398
5399 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
5400 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
5401
5402 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
5403 MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
5404 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
5405 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
5406 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
5407 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
5408 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
5409 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
5410
5411 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
5412 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
5413 if (BytePerPixelC > 0) {
5414 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
5415 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
5416 } else {
5417 full_swath_bytes_horz_wc_c = 0;
5418 full_swath_bytes_vert_wc_c = 0;
5419 }
5420
5421 if (SourcePixelFormat == dm_420_10) {
5422 full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0);
5423 full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0);
5424 full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0);
5425 full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0);
5426 }
5427
5428 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5429 req128_horz_wc_l = 0;
5430 req128_horz_wc_c = 0;
5431 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
5432 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5433 req128_horz_wc_l = 0;
5434 req128_horz_wc_c = 1;
5435 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
5436 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5437 req128_horz_wc_l = 1;
5438 req128_horz_wc_c = 0;
5439 } else {
5440 req128_horz_wc_l = 1;
5441 req128_horz_wc_c = 1;
5442 }
5443
5444 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5445 req128_vert_wc_l = 0;
5446 req128_vert_wc_c = 0;
5447 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
5448 full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5449 req128_vert_wc_l = 0;
5450 req128_vert_wc_c = 1;
5451 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
5452 full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5453 req128_vert_wc_l = 1;
5454 req128_vert_wc_c = 0;
5455 } else {
5456 req128_vert_wc_l = 1;
5457 req128_vert_wc_c = 1;
5458 }
5459
5460 if (BytePerPixelY == 2) {
5461 segment_order_horz_contiguous_luma = 0;
5462 segment_order_vert_contiguous_luma = 1;
5463 } else {
5464 segment_order_horz_contiguous_luma = 1;
5465 segment_order_vert_contiguous_luma = 0;
5466 }
5467
5468 if (BytePerPixelC == 2) {
5469 segment_order_horz_contiguous_chroma = 0;
5470 segment_order_vert_contiguous_chroma = 1;
5471 } else {
5472 segment_order_horz_contiguous_chroma = 1;
5473 segment_order_vert_contiguous_chroma = 0;
5474 }
5475 #ifdef __DML_VBA_DEBUG__
5476 dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled);
5477 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
5478 dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC);
5479 dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l);
5480 dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c);
5481 dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l);
5482 dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c);
5483 dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma);
5484 dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",
5485 __func__, segment_order_horz_contiguous_chroma);
5486 #endif
5487
5488 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
5489 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
5490 RequestLuma = REQ_256Bytes;
5491 else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
5492 (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
5493 RequestLuma = REQ_128BytesNonContiguous;
5494 else
5495 RequestLuma = REQ_128BytesContiguous;
5496
5497 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
5498 RequestChroma = REQ_256Bytes;
5499 else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
5500 (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
5501 RequestChroma = REQ_128BytesNonContiguous;
5502 else
5503 RequestChroma = REQ_128BytesContiguous;
5504
5505 } else if (!IsVertical(SourceRotation)) {
5506 if (req128_horz_wc_l == 0)
5507 RequestLuma = REQ_256Bytes;
5508 else if (segment_order_horz_contiguous_luma == 0)
5509 RequestLuma = REQ_128BytesNonContiguous;
5510 else
5511 RequestLuma = REQ_128BytesContiguous;
5512
5513 if (req128_horz_wc_c == 0)
5514 RequestChroma = REQ_256Bytes;
5515 else if (segment_order_horz_contiguous_chroma == 0)
5516 RequestChroma = REQ_128BytesNonContiguous;
5517 else
5518 RequestChroma = REQ_128BytesContiguous;
5519
5520 } else {
5521 if (req128_vert_wc_l == 0)
5522 RequestLuma = REQ_256Bytes;
5523 else if (segment_order_vert_contiguous_luma == 0)
5524 RequestLuma = REQ_128BytesNonContiguous;
5525 else
5526 RequestLuma = REQ_128BytesContiguous;
5527
5528 if (req128_vert_wc_c == 0)
5529 RequestChroma = REQ_256Bytes;
5530 else if (segment_order_vert_contiguous_chroma == 0)
5531 RequestChroma = REQ_128BytesNonContiguous;
5532 else
5533 RequestChroma = REQ_128BytesContiguous;
5534 }
5535
5536 if (RequestLuma == REQ_256Bytes) {
5537 *MaxUncompressedBlockLuma = 256;
5538 *MaxCompressedBlockLuma = 256;
5539 *IndependentBlockLuma = 0;
5540 } else if (RequestLuma == REQ_128BytesContiguous) {
5541 *MaxUncompressedBlockLuma = 256;
5542 *MaxCompressedBlockLuma = 128;
5543 *IndependentBlockLuma = 128;
5544 } else {
5545 *MaxUncompressedBlockLuma = 256;
5546 *MaxCompressedBlockLuma = 64;
5547 *IndependentBlockLuma = 64;
5548 }
5549
5550 if (RequestChroma == REQ_256Bytes) {
5551 *MaxUncompressedBlockChroma = 256;
5552 *MaxCompressedBlockChroma = 256;
5553 *IndependentBlockChroma = 0;
5554 } else if (RequestChroma == REQ_128BytesContiguous) {
5555 *MaxUncompressedBlockChroma = 256;
5556 *MaxCompressedBlockChroma = 128;
5557 *IndependentBlockChroma = 128;
5558 } else {
5559 *MaxUncompressedBlockChroma = 256;
5560 *MaxCompressedBlockChroma = 64;
5561 *IndependentBlockChroma = 64;
5562 }
5563
5564 if (DCCEnabled != true || BytePerPixelC == 0) {
5565 *MaxUncompressedBlockChroma = 0;
5566 *MaxCompressedBlockChroma = 0;
5567 *IndependentBlockChroma = 0;
5568 }
5569
5570 if (DCCEnabled != true) {
5571 *MaxUncompressedBlockLuma = 0;
5572 *MaxCompressedBlockLuma = 0;
5573 *IndependentBlockLuma = 0;
5574 }
5575
5576 #ifdef __DML_VBA_DEBUG__
5577 dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma);
5578 dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma);
5579 dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma);
5580 dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma);
5581 dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma);
5582 dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma);
5583 #endif
5584
5585 } // CalculateDCCConfiguration
5586
dml32_CalculateStutterEfficiency(unsigned int CompressedBufferSizeInkByte,enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],bool UnboundedRequestEnabled,unsigned int MetaFIFOSizeInKEntries,unsigned int ZeroSizeBufferEntries,unsigned int PixelChunkSizeInKByte,unsigned int NumberOfActiveSurfaces,unsigned int ROBBufferSizeInKByte,double TotalDataReadBandwidth,double DCFCLK,double ReturnBW,unsigned int CompbufReservedSpace64B,unsigned int CompbufReservedSpaceZs,double SRExitTime,double SRExitZ8Time,bool SynchronizeTimingsFinal,unsigned int BlendingAndTiming[],double StutterEnterPlusExitWatermark,double Z8StutterEnterPlusExitWatermark,bool ProgressiveToInterlaceUnitInOPP,bool Interlace[],double MinTTUVBlank[],unsigned int DPPPerSurface[],unsigned int DETBufferSizeY[],unsigned int BytePerPixelY[],double BytePerPixelDETY[],double SwathWidthY[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],double NetDCCRateLuma[],double NetDCCRateChroma[],double DCCFractionOfZeroSizeRequestsLuma[],double DCCFractionOfZeroSizeRequestsChroma[],unsigned int HTotal[],unsigned int VTotal[],double PixelClock[],double VRatio[],enum dm_rotation_angle SourceRotation[],unsigned int BlockHeight256BytesY[],unsigned int BlockWidth256BytesY[],unsigned int BlockHeight256BytesC[],unsigned int BlockWidth256BytesC[],unsigned int DCCYMaxUncompressedBlock[],unsigned int DCCCMaxUncompressedBlock[],unsigned int VActive[],bool DCCEnable[],bool WritebackEnable[],double ReadBandwidthSurfaceLuma[],double ReadBandwidthSurfaceChroma[],double meta_row_bw[],double dpte_row_bw[],double * StutterEfficiencyNotIncludingVBlank,double * StutterEfficiency,unsigned int * NumberOfStutterBurstsPerFrame,double * Z8StutterEfficiencyNotIncludingVBlank,double * Z8StutterEfficiency,unsigned int * Z8NumberOfStutterBurstsPerFrame,double * StutterPeriod,bool * DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)5587 void dml32_CalculateStutterEfficiency(
5588 unsigned int CompressedBufferSizeInkByte,
5589 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
5590 bool UnboundedRequestEnabled,
5591 unsigned int MetaFIFOSizeInKEntries,
5592 unsigned int ZeroSizeBufferEntries,
5593 unsigned int PixelChunkSizeInKByte,
5594 unsigned int NumberOfActiveSurfaces,
5595 unsigned int ROBBufferSizeInKByte,
5596 double TotalDataReadBandwidth,
5597 double DCFCLK,
5598 double ReturnBW,
5599 unsigned int CompbufReservedSpace64B,
5600 unsigned int CompbufReservedSpaceZs,
5601 double SRExitTime,
5602 double SRExitZ8Time,
5603 bool SynchronizeTimingsFinal,
5604 unsigned int BlendingAndTiming[],
5605 double StutterEnterPlusExitWatermark,
5606 double Z8StutterEnterPlusExitWatermark,
5607 bool ProgressiveToInterlaceUnitInOPP,
5608 bool Interlace[],
5609 double MinTTUVBlank[],
5610 unsigned int DPPPerSurface[],
5611 unsigned int DETBufferSizeY[],
5612 unsigned int BytePerPixelY[],
5613 double BytePerPixelDETY[],
5614 double SwathWidthY[],
5615 unsigned int SwathHeightY[],
5616 unsigned int SwathHeightC[],
5617 double NetDCCRateLuma[],
5618 double NetDCCRateChroma[],
5619 double DCCFractionOfZeroSizeRequestsLuma[],
5620 double DCCFractionOfZeroSizeRequestsChroma[],
5621 unsigned int HTotal[],
5622 unsigned int VTotal[],
5623 double PixelClock[],
5624 double VRatio[],
5625 enum dm_rotation_angle SourceRotation[],
5626 unsigned int BlockHeight256BytesY[],
5627 unsigned int BlockWidth256BytesY[],
5628 unsigned int BlockHeight256BytesC[],
5629 unsigned int BlockWidth256BytesC[],
5630 unsigned int DCCYMaxUncompressedBlock[],
5631 unsigned int DCCCMaxUncompressedBlock[],
5632 unsigned int VActive[],
5633 bool DCCEnable[],
5634 bool WritebackEnable[],
5635 double ReadBandwidthSurfaceLuma[],
5636 double ReadBandwidthSurfaceChroma[],
5637 double meta_row_bw[],
5638 double dpte_row_bw[],
5639
5640 /* Output */
5641 double *StutterEfficiencyNotIncludingVBlank,
5642 double *StutterEfficiency,
5643 unsigned int *NumberOfStutterBurstsPerFrame,
5644 double *Z8StutterEfficiencyNotIncludingVBlank,
5645 double *Z8StutterEfficiency,
5646 unsigned int *Z8NumberOfStutterBurstsPerFrame,
5647 double *StutterPeriod,
5648 bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
5649 {
5650
5651 bool FoundCriticalSurface = false;
5652 unsigned int SwathSizeCriticalSurface = 0;
5653 unsigned int LastChunkOfSwathSize;
5654 unsigned int MissingPartOfLastSwathOfDETSize;
5655 double LastZ8StutterPeriod = 0.0;
5656 double LastStutterPeriod = 0.0;
5657 unsigned int TotalNumberOfActiveOTG = 0;
5658 double doublePixelClock = 0;
5659 unsigned int doubleHTotal = 0;
5660 unsigned int doubleVTotal = 0;
5661 bool SameTiming = true;
5662 double DETBufferingTimeY;
5663 double SwathWidthYCriticalSurface = 0.0;
5664 double SwathHeightYCriticalSurface = 0.0;
5665 double VActiveTimeCriticalSurface = 0.0;
5666 double FrameTimeCriticalSurface = 0.0;
5667 unsigned int BytePerPixelYCriticalSurface = 0;
5668 double LinesToFinishSwathTransferStutterCriticalSurface = 0.0;
5669 unsigned int DETBufferSizeYCriticalSurface = 0;
5670 double MinTTUVBlankCriticalSurface = 0.0;
5671 unsigned int BlockWidth256BytesYCriticalSurface = 0;
5672 bool doublePlaneCriticalSurface = 0;
5673 bool doublePipeCriticalSurface = 0;
5674 double TotalCompressedReadBandwidth;
5675 double TotalRowReadBandwidth;
5676 double AverageDCCCompressionRate;
5677 double EffectiveCompressedBufferSize;
5678 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
5679 double StutterBurstTime;
5680 unsigned int TotalActiveWriteback;
5681 double LinesInDETY;
5682 double LinesInDETYRoundedDownToSwath;
5683 double MaximumEffectiveCompressionLuma;
5684 double MaximumEffectiveCompressionChroma;
5685 double TotalZeroSizeRequestReadBandwidth;
5686 double TotalZeroSizeCompressedReadBandwidth;
5687 double AverageDCCZeroSizeFraction;
5688 double AverageZeroSizeCompressionRate;
5689 unsigned int k;
5690
5691 TotalZeroSizeRequestReadBandwidth = 0;
5692 TotalZeroSizeCompressedReadBandwidth = 0;
5693 TotalRowReadBandwidth = 0;
5694 TotalCompressedReadBandwidth = 0;
5695
5696 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5697 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5698 if (DCCEnable[k] == true) {
5699 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k])
5700 || (!IsVertical(SourceRotation[k])
5701 && BlockHeight256BytesY[k] > SwathHeightY[k])
5702 || DCCYMaxUncompressedBlock[k] < 256) {
5703 MaximumEffectiveCompressionLuma = 2;
5704 } else {
5705 MaximumEffectiveCompressionLuma = 4;
5706 }
5707 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5708 + ReadBandwidthSurfaceLuma[k]
5709 / dml_min(NetDCCRateLuma[k],
5710 MaximumEffectiveCompressionLuma);
5711 #ifdef __DML_VBA_DEBUG__
5712 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5713 __func__, k, ReadBandwidthSurfaceLuma[k]);
5714 dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n",
5715 __func__, k, NetDCCRateLuma[k]);
5716 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n",
5717 __func__, k, MaximumEffectiveCompressionLuma);
5718 #endif
5719 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5720 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
5721 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5722 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]
5723 / MaximumEffectiveCompressionLuma;
5724
5725 if (ReadBandwidthSurfaceChroma[k] > 0) {
5726 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k])
5727 || (!IsVertical(SourceRotation[k])
5728 && BlockHeight256BytesC[k] > SwathHeightC[k])
5729 || DCCCMaxUncompressedBlock[k] < 256) {
5730 MaximumEffectiveCompressionChroma = 2;
5731 } else {
5732 MaximumEffectiveCompressionChroma = 4;
5733 }
5734 TotalCompressedReadBandwidth =
5735 TotalCompressedReadBandwidth
5736 + ReadBandwidthSurfaceChroma[k]
5737 / dml_min(NetDCCRateChroma[k],
5738 MaximumEffectiveCompressionChroma);
5739 #ifdef __DML_VBA_DEBUG__
5740 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n",
5741 __func__, k, ReadBandwidthSurfaceChroma[k]);
5742 dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n",
5743 __func__, k, NetDCCRateChroma[k]);
5744 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n",
5745 __func__, k, MaximumEffectiveCompressionChroma);
5746 #endif
5747 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5748 + ReadBandwidthSurfaceChroma[k]
5749 * DCCFractionOfZeroSizeRequestsChroma[k];
5750 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5751 + ReadBandwidthSurfaceChroma[k]
5752 * DCCFractionOfZeroSizeRequestsChroma[k]
5753 / MaximumEffectiveCompressionChroma;
5754 }
5755 } else {
5756 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5757 + ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k];
5758 }
5759 TotalRowReadBandwidth = TotalRowReadBandwidth
5760 + DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]);
5761 }
5762 }
5763
5764 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
5765 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
5766
5767 #ifdef __DML_VBA_DEBUG__
5768 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
5769 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
5770 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
5771 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n",
5772 __func__, TotalZeroSizeCompressedReadBandwidth);
5773 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
5774 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
5775 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5776 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
5777 dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B);
5778 dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs);
5779 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
5780 #endif
5781 if (AverageDCCZeroSizeFraction == 1) {
5782 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5783 / TotalZeroSizeCompressedReadBandwidth;
5784 EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64
5785 * AverageZeroSizeCompressionRate
5786 + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5787 * AverageZeroSizeCompressionRate;
5788 } else if (AverageDCCZeroSizeFraction > 0) {
5789 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5790 / TotalZeroSizeCompressedReadBandwidth;
5791 EffectiveCompressedBufferSize = dml_min(
5792 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5793 (double) MetaFIFOSizeInKEntries * 1024 * 64
5794 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate
5795 + 1 / AverageDCCCompressionRate))
5796 + dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5797 * AverageDCCCompressionRate,
5798 ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5799 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5800
5801 #ifdef __DML_VBA_DEBUG__
5802 dml_print("DML::%s: min 1 = %f\n", __func__,
5803 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5804 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 /
5805 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 /
5806 AverageDCCCompressionRate));
5807 dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 -
5808 CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
5809 dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 /
5810 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5811 #endif
5812 } else {
5813 EffectiveCompressedBufferSize = dml_min(
5814 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5815 (double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate)
5816 + ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5817 * AverageDCCCompressionRate;
5818
5819 #ifdef __DML_VBA_DEBUG__
5820 dml_print("DML::%s: min 1 = %f\n", __func__,
5821 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5822 dml_print("DML::%s: min 2 = %f\n", __func__,
5823 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
5824 #endif
5825 }
5826
5827 #ifdef __DML_VBA_DEBUG__
5828 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
5829 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
5830 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5831 #endif
5832
5833 *StutterPeriod = 0;
5834
5835 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5836 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5837 LinesInDETY = ((double) DETBufferSizeY[k]
5838 + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0)
5839 * ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth)
5840 / BytePerPixelDETY[k] / SwathWidthY[k];
5841 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
5842 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k])
5843 / VRatio[k];
5844 #ifdef __DML_VBA_DEBUG__
5845 dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
5846 dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
5847 dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
5848 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5849 __func__, k, ReadBandwidthSurfaceLuma[k]);
5850 dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
5851 dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY);
5852 dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n",
5853 __func__, k, LinesInDETYRoundedDownToSwath);
5854 dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]);
5855 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5856 dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]);
5857 dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
5858 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5859 #endif
5860
5861 if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) {
5862 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
5863
5864 FoundCriticalSurface = true;
5865 *StutterPeriod = DETBufferingTimeY;
5866 FrameTimeCriticalSurface = (
5867 isInterlaceTiming ?
5868 dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k])
5869 * (double) HTotal[k] / PixelClock[k];
5870 VActiveTimeCriticalSurface = (
5871 isInterlaceTiming ?
5872 dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k])
5873 * (double) HTotal[k] / PixelClock[k];
5874 BytePerPixelYCriticalSurface = BytePerPixelY[k];
5875 SwathWidthYCriticalSurface = SwathWidthY[k];
5876 SwathHeightYCriticalSurface = SwathHeightY[k];
5877 BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k];
5878 LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k]
5879 - (LinesInDETY - LinesInDETYRoundedDownToSwath);
5880 DETBufferSizeYCriticalSurface = DETBufferSizeY[k];
5881 MinTTUVBlankCriticalSurface = MinTTUVBlank[k];
5882 doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0);
5883 doublePipeCriticalSurface = (DPPPerSurface[k] == 1);
5884
5885 #ifdef __DML_VBA_DEBUG__
5886 dml_print("DML::%s: k=%0d, FoundCriticalSurface = %d\n",
5887 __func__, k, FoundCriticalSurface);
5888 dml_print("DML::%s: k=%0d, StutterPeriod = %f\n",
5889 __func__, k, *StutterPeriod);
5890 dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface = %f\n",
5891 __func__, k, MinTTUVBlankCriticalSurface);
5892 dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface = %f\n",
5893 __func__, k, FrameTimeCriticalSurface);
5894 dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface = %f\n",
5895 __func__, k, VActiveTimeCriticalSurface);
5896 dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface = %d\n",
5897 __func__, k, BytePerPixelYCriticalSurface);
5898 dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface = %f\n",
5899 __func__, k, SwathWidthYCriticalSurface);
5900 dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface = %f\n",
5901 __func__, k, SwathHeightYCriticalSurface);
5902 dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface = %d\n",
5903 __func__, k, BlockWidth256BytesYCriticalSurface);
5904 dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface = %d\n",
5905 __func__, k, doublePlaneCriticalSurface);
5906 dml_print("DML::%s: k=%0d, doublePipeCriticalSurface = %d\n",
5907 __func__, k, doublePipeCriticalSurface);
5908 dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n",
5909 __func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
5910 #endif
5911 }
5912 }
5913 }
5914
5915 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth,
5916 EffectiveCompressedBufferSize);
5917 #ifdef __DML_VBA_DEBUG__
5918 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
5919 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5920 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5921 __func__, *StutterPeriod * TotalDataReadBandwidth);
5922 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5923 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__,
5924 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
5925 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
5926 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
5927 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
5928 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5929 #endif
5930
5931 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate
5932 / ReturnBW
5933 + (*StutterPeriod * TotalDataReadBandwidth
5934 - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
5935 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
5936 #ifdef __DML_VBA_DEBUG__
5937 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer /
5938 AverageDCCCompressionRate / ReturnBW);
5939 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5940 __func__, (*StutterPeriod * TotalDataReadBandwidth));
5941 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth -
5942 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
5943 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
5944 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5945 #endif
5946 StutterBurstTime = dml_max(StutterBurstTime,
5947 LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface
5948 * SwathWidthYCriticalSurface / ReturnBW);
5949
5950 #ifdef __DML_VBA_DEBUG__
5951 dml_print("DML::%s: Time to finish residue swath=%f\n",
5952 __func__,
5953 LinesToFinishSwathTransferStutterCriticalSurface *
5954 BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW);
5955 #endif
5956
5957 TotalActiveWriteback = 0;
5958 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5959 if (WritebackEnable[k])
5960 TotalActiveWriteback = TotalActiveWriteback + 1;
5961 }
5962
5963 if (TotalActiveWriteback == 0) {
5964 #ifdef __DML_VBA_DEBUG__
5965 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
5966 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
5967 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
5968 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5969 #endif
5970 *StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5971 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
5972 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5973 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
5974 *NumberOfStutterBurstsPerFrame = (
5975 *StutterEfficiencyNotIncludingVBlank > 0 ?
5976 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5977 *Z8NumberOfStutterBurstsPerFrame = (
5978 *Z8StutterEfficiencyNotIncludingVBlank > 0 ?
5979 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5980 } else {
5981 *StutterEfficiencyNotIncludingVBlank = 0.;
5982 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
5983 *NumberOfStutterBurstsPerFrame = 0;
5984 *Z8NumberOfStutterBurstsPerFrame = 0;
5985 }
5986 #ifdef __DML_VBA_DEBUG__
5987 dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
5988 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5989 __func__, *StutterEfficiencyNotIncludingVBlank);
5990 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n",
5991 __func__, *Z8StutterEfficiencyNotIncludingVBlank);
5992 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
5993 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5994 #endif
5995
5996 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5997 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5998 if (BlendingAndTiming[k] == k) {
5999 if (TotalNumberOfActiveOTG == 0) {
6000 doublePixelClock = PixelClock[k];
6001 doubleHTotal = HTotal[k];
6002 doubleVTotal = VTotal[k];
6003 } else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k]
6004 || doubleVTotal != VTotal[k]) {
6005 SameTiming = false;
6006 }
6007 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6008 }
6009 }
6010 }
6011
6012 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6013 LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6014
6015 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming
6016 && LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) {
6017 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime
6018 + StutterBurstTime * VActiveTimeCriticalSurface
6019 / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
6020 } else {
6021 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6022 }
6023 } else {
6024 *StutterEfficiency = 0;
6025 }
6026
6027 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6028 LastZ8StutterPeriod = VActiveTimeCriticalSurface
6029 - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6030 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod +
6031 MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) {
6032 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime
6033 * VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
6034 } else {
6035 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6036 }
6037 } else {
6038 *Z8StutterEfficiency = 0.;
6039 }
6040
6041 #ifdef __DML_VBA_DEBUG__
6042 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6043 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6044 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6045 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6046 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6047 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6048 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
6049 __func__, *StutterEfficiencyNotIncludingVBlank);
6050 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6051 #endif
6052
6053 SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface
6054 * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface);
6055 LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024);
6056 MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface)
6057 - DETBufferSizeYCriticalSurface;
6058
6059 *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1)
6060 && doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0)
6061 && (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0)
6062 && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
6063
6064 #ifdef __DML_VBA_DEBUG__
6065 dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface);
6066 dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize);
6067 dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize);
6068 dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
6069 #endif
6070 } // CalculateStutterEfficiency
6071
dml32_CalculateMaxDETAndMinCompressedBufferSize(unsigned int ConfigReturnBufferSizeInKByte,unsigned int ROBBufferSizeInKByte,unsigned int MaxNumDPP,bool nomDETInKByteOverrideEnable,unsigned int nomDETInKByteOverrideValue,unsigned int * MaxTotalDETInKByte,unsigned int * nomDETInKByte,unsigned int * MinCompressedBufferSizeInKByte)6072 void dml32_CalculateMaxDETAndMinCompressedBufferSize(
6073 unsigned int ConfigReturnBufferSizeInKByte,
6074 unsigned int ROBBufferSizeInKByte,
6075 unsigned int MaxNumDPP,
6076 bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
6077 unsigned int nomDETInKByteOverrideValue, // VBA_DELTA
6078
6079 /* Output */
6080 unsigned int *MaxTotalDETInKByte,
6081 unsigned int *nomDETInKByte,
6082 unsigned int *MinCompressedBufferSizeInKByte)
6083 {
6084 bool det_buff_size_override_en = nomDETInKByteOverrideEnable;
6085 unsigned int det_buff_size_override_val = nomDETInKByteOverrideValue;
6086
6087 *MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte +
6088 (double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64);
6089 *nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64);
6090 *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
6091
6092 #ifdef __DML_VBA_DEBUG__
6093 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte);
6094 dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte);
6095 dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP);
6096 dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte);
6097 dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte);
6098 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte);
6099 #endif
6100
6101 if (det_buff_size_override_en) {
6102 *nomDETInKByte = det_buff_size_override_val;
6103 #ifdef __DML_VBA_DEBUG__
6104 dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte);
6105 #endif
6106 }
6107 } // CalculateMaxDETAndMinCompressedBufferSize
6108
dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,double ReturnBW,bool NotUrgentLatencyHiding[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double cursor_bw[],double meta_row_bandwidth[],double dpte_row_bandwidth[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[])6109 bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
6110 double ReturnBW,
6111 bool NotUrgentLatencyHiding[],
6112 double ReadBandwidthLuma[],
6113 double ReadBandwidthChroma[],
6114 double cursor_bw[],
6115 double meta_row_bandwidth[],
6116 double dpte_row_bandwidth[],
6117 unsigned int NumberOfDPP[],
6118 double UrgentBurstFactorLuma[],
6119 double UrgentBurstFactorChroma[],
6120 double UrgentBurstFactorCursor[])
6121 {
6122 unsigned int k;
6123 bool NotEnoughUrgentLatencyHiding = false;
6124 bool CalculateVActiveBandwithSupport_val = false;
6125 double VActiveBandwith = 0;
6126
6127 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6128 if (NotUrgentLatencyHiding[k]) {
6129 NotEnoughUrgentLatencyHiding = true;
6130 }
6131 }
6132
6133 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6134 VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
6135 }
6136
6137 CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6138
6139 #ifdef __DML_VBA_DEBUG__
6140 dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, NotEnoughUrgentLatencyHiding);
6141 dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith);
6142 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6143 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val);
6144 #endif
6145 return CalculateVActiveBandwithSupport_val;
6146 }
6147
dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,double ReturnBW,bool NotUrgentLatencyHiding[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double cursor_bw[],double meta_row_bandwidth[],double dpte_row_bandwidth[],double cursor_bw_pre[],double prefetch_vmrow_bw[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[],double PrefetchBW[],double VRatio[],double MaxVRatioPre,double * MaxPrefetchBandwidth,double * FractionOfUrgentBandwidth,bool * PrefetchBandwidthSupport)6148 void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
6149 double ReturnBW,
6150 bool NotUrgentLatencyHiding[],
6151 double ReadBandwidthLuma[],
6152 double ReadBandwidthChroma[],
6153 double PrefetchBandwidthLuma[],
6154 double PrefetchBandwidthChroma[],
6155 double cursor_bw[],
6156 double meta_row_bandwidth[],
6157 double dpte_row_bandwidth[],
6158 double cursor_bw_pre[],
6159 double prefetch_vmrow_bw[],
6160 unsigned int NumberOfDPP[],
6161 double UrgentBurstFactorLuma[],
6162 double UrgentBurstFactorChroma[],
6163 double UrgentBurstFactorCursor[],
6164 double UrgentBurstFactorLumaPre[],
6165 double UrgentBurstFactorChromaPre[],
6166 double UrgentBurstFactorCursorPre[],
6167 double PrefetchBW[],
6168 double VRatio[],
6169 double MaxVRatioPre,
6170
6171 /* output */
6172 double *MaxPrefetchBandwidth,
6173 double *FractionOfUrgentBandwidth,
6174 bool *PrefetchBandwidthSupport)
6175 {
6176 unsigned int k;
6177 double ActiveBandwidthPerSurface;
6178 bool NotEnoughUrgentLatencyHiding = false;
6179 double TotalActiveBandwidth = 0;
6180 double TotalPrefetchBandwidth = 0;
6181
6182 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6183 if (NotUrgentLatencyHiding[k]) {
6184 NotEnoughUrgentLatencyHiding = true;
6185 }
6186 }
6187
6188 *MaxPrefetchBandwidth = 0;
6189 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6190 ActiveBandwidthPerSurface = ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]);
6191
6192 TotalActiveBandwidth += ActiveBandwidthPerSurface;
6193
6194 TotalPrefetchBandwidth = TotalPrefetchBandwidth + PrefetchBW[k] * VRatio[k];
6195
6196 *MaxPrefetchBandwidth = *MaxPrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6197 ActiveBandwidthPerSurface,
6198 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6199 }
6200
6201 if (MaxVRatioPre == __DML_MAX_VRATIO_PRE__)
6202 *PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && (TotalPrefetchBandwidth <= TotalActiveBandwidth * __DML_MAX_BW_RATIO_PRE__) && !NotEnoughUrgentLatencyHiding;
6203 else
6204 *PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6205
6206 *FractionOfUrgentBandwidth = *MaxPrefetchBandwidth / ReturnBW;
6207 }
6208
dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,double ReturnBW,double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double cursor_bw[],double cursor_bw_pre[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[])6209 double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
6210 double ReturnBW,
6211 double ReadBandwidthLuma[],
6212 double ReadBandwidthChroma[],
6213 double PrefetchBandwidthLuma[],
6214 double PrefetchBandwidthChroma[],
6215 double cursor_bw[],
6216 double cursor_bw_pre[],
6217 unsigned int NumberOfDPP[],
6218 double UrgentBurstFactorLuma[],
6219 double UrgentBurstFactorChroma[],
6220 double UrgentBurstFactorCursor[],
6221 double UrgentBurstFactorLumaPre[],
6222 double UrgentBurstFactorChromaPre[],
6223 double UrgentBurstFactorCursorPre[])
6224 {
6225 unsigned int k;
6226 double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW;
6227
6228 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6229 CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6230 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6231 }
6232
6233 return CalculateBandwidthAvailableForImmediateFlip_val;
6234 }
6235
dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,double ReturnBW,enum immediate_flip_requirement ImmediateFlipRequirement[],double final_flip_bw[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double cursor_bw[],double meta_row_bandwidth[],double dpte_row_bandwidth[],double cursor_bw_pre[],double prefetch_vmrow_bw[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[],double * TotalBandwidth,double * FractionOfUrgentBandwidth,bool * ImmediateFlipBandwidthSupport)6236 void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
6237 double ReturnBW,
6238 enum immediate_flip_requirement ImmediateFlipRequirement[],
6239 double final_flip_bw[],
6240 double ReadBandwidthLuma[],
6241 double ReadBandwidthChroma[],
6242 double PrefetchBandwidthLuma[],
6243 double PrefetchBandwidthChroma[],
6244 double cursor_bw[],
6245 double meta_row_bandwidth[],
6246 double dpte_row_bandwidth[],
6247 double cursor_bw_pre[],
6248 double prefetch_vmrow_bw[],
6249 unsigned int NumberOfDPP[],
6250 double UrgentBurstFactorLuma[],
6251 double UrgentBurstFactorChroma[],
6252 double UrgentBurstFactorCursor[],
6253 double UrgentBurstFactorLumaPre[],
6254 double UrgentBurstFactorChromaPre[],
6255 double UrgentBurstFactorCursorPre[],
6256
6257 /* output */
6258 double *TotalBandwidth,
6259 double *FractionOfUrgentBandwidth,
6260 bool *ImmediateFlipBandwidthSupport)
6261 {
6262 unsigned int k;
6263 *TotalBandwidth = 0;
6264 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6265 if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
6266 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6267 NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6268 NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6269 } else {
6270 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6271 NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6272 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6273 }
6274 }
6275 *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
6276 *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
6277 }
6278
dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces,double ReturnBW,double UrgentLatency,unsigned int SwathHeightY[],unsigned int SwathHeightC[],unsigned int SwathWidthY[],unsigned int SwathWidthC[],double BytePerPixelInDETY[],double BytePerPixelInDETC[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],unsigned int NumOfDPP[],unsigned int HTotal[],double PixelClock[],double VRatioY[],double VRatioC[],enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],enum unbounded_requesting_policy UseUnboundedRequesting)6279 bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces,
6280 double ReturnBW,
6281 double UrgentLatency,
6282 unsigned int SwathHeightY[],
6283 unsigned int SwathHeightC[],
6284 unsigned int SwathWidthY[],
6285 unsigned int SwathWidthC[],
6286 double BytePerPixelInDETY[],
6287 double BytePerPixelInDETC[],
6288 unsigned int DETBufferSizeY[],
6289 unsigned int DETBufferSizeC[],
6290 unsigned int NumOfDPP[],
6291 unsigned int HTotal[],
6292 double PixelClock[],
6293 double VRatioY[],
6294 double VRatioC[],
6295 enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],
6296 enum unbounded_requesting_policy UseUnboundedRequesting)
6297 {
6298 int k;
6299 double SwathSizeAllSurfaces = 0;
6300 double SwathSizeAllSurfacesInFetchTimeUs;
6301 double DETSwathLatencyHidingUs;
6302 double DETSwathLatencyHidingYUs;
6303 double DETSwathLatencyHidingCUs;
6304 double SwathSizePerSurfaceY[DC__NUM_DPP__MAX];
6305 double SwathSizePerSurfaceC[DC__NUM_DPP__MAX];
6306 bool NotEnoughDETSwathFillLatencyHiding = false;
6307
6308 if (UseUnboundedRequesting == dm_unbounded_requesting)
6309 return false;
6310
6311 /* calculate sum of single swath size for all pipes in bytes */
6312 for (k = 0; k < NumberOfActiveSurfaces; k++) {
6313 SwathSizePerSurfaceY[k] = SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k];
6314
6315 if (SwathHeightC[k] != 0)
6316 SwathSizePerSurfaceC[k] = SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k];
6317 else
6318 SwathSizePerSurfaceC[k] = 0;
6319
6320 SwathSizeAllSurfaces += SwathSizePerSurfaceY[k] + SwathSizePerSurfaceC[k];
6321 }
6322
6323 SwathSizeAllSurfacesInFetchTimeUs = SwathSizeAllSurfaces / ReturnBW + UrgentLatency;
6324
6325 /* ensure all DET - 1 swath can hide a fetch for all surfaces */
6326 for (k = 0; k < NumberOfActiveSurfaces; k++) {
6327 double LineTime = HTotal[k] / PixelClock[k];
6328
6329 /* only care if surface is not phantom */
6330 if (UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
6331 DETSwathLatencyHidingYUs = (dml_floor(DETBufferSizeY[k] / BytePerPixelInDETY[k] / SwathWidthY[k], 1.0) - SwathHeightY[k]) / VRatioY[k] * LineTime;
6332
6333 if (SwathHeightC[k] != 0) {
6334 DETSwathLatencyHidingCUs = (dml_floor(DETBufferSizeC[k] / BytePerPixelInDETC[k] / SwathWidthC[k], 1.0) - SwathHeightC[k]) / VRatioC[k] * LineTime;
6335
6336 DETSwathLatencyHidingUs = dml_min(DETSwathLatencyHidingYUs, DETSwathLatencyHidingCUs);
6337 } else {
6338 DETSwathLatencyHidingUs = DETSwathLatencyHidingYUs;
6339 }
6340
6341 /* DET must be able to hide time to fetch 1 swath for each surface */
6342 if (DETSwathLatencyHidingUs < SwathSizeAllSurfacesInFetchTimeUs) {
6343 NotEnoughDETSwathFillLatencyHiding = true;
6344 break;
6345 }
6346 }
6347 }
6348
6349 return NotEnoughDETSwathFillLatencyHiding;
6350 }
6351