xref: /linux/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c (revision 2c1ed907520c50326b8f604907a8478b27881a2e)
1 /*
2  * Copyright 2022 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: AMD
23  *
24  */
25 #include "display_mode_vba_util_32.h"
26 #include "../dml_inline_defs.h"
27 #include "display_mode_vba_32.h"
28 #include "../display_mode_lib.h"
29 
30 #define DCN32_MAX_FMT_420_BUFFER_WIDTH 4096
31 
dml32_dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat,enum output_encoder_class Output)32 unsigned int dml32_dscceComputeDelay(
33 		unsigned int bpc,
34 		double BPP,
35 		unsigned int sliceWidth,
36 		unsigned int numSlices,
37 		enum output_format_class pixelFormat,
38 		enum output_encoder_class Output)
39 {
40 	// valid bpc         = source bits per component in the set of {8, 10, 12}
41 	// valid bpp         = increments of 1/16 of a bit
42 	//                    min = 6/7/8 in N420/N422/444, respectively
43 	//                    max = such that compression is 1:1
44 	//valid sliceWidth  = number of pixels per slice line,
45 	//	must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
46 	//valid numSlices   = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
47 	//valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
48 
49 	// fixed value
50 	unsigned int rcModelSize = 8192;
51 
52 	// N422/N420 operate at 2 pixels per clock
53 	unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
54 	Delay, pixels;
55 
56 	if (pixelFormat == dm_420)
57 		pixelsPerClock = 2;
58 	else if (pixelFormat == dm_n422)
59 		pixelsPerClock = 2;
60 	// #all other modes operate at 1 pixel per clock
61 	else
62 		pixelsPerClock = 1;
63 
64 	//initial transmit delay as per PPS
65 	initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
66 
67 	//compute ssm delay
68 	if (bpc == 8)
69 		D = 81;
70 	else if (bpc == 10)
71 		D = 89;
72 	else
73 		D = 113;
74 
75 	//divide by pixel per cycle to compute slice width as seen by DSC
76 	w = sliceWidth / pixelsPerClock;
77 
78 	//422 mode has an additional cycle of delay
79 	if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
80 		s = 0;
81 	else
82 		s = 1;
83 
84 	//main calculation for the dscce
85 	ix = initalXmitDelay + 45;
86 	wx = (w + 2) / 3;
87 	p = 3 * wx - w;
88 	l0 = ix / w;
89 	a = ix + p * l0;
90 	ax = (a + 2) / 3 + D + 6 + 1;
91 	L = (ax + wx - 1) / wx;
92 	if ((ix % w) == 0 && p != 0)
93 		lstall = 1;
94 	else
95 		lstall = 0;
96 	Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
97 
98 	//dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
99 	pixels = Delay * 3 * pixelsPerClock;
100 
101 #ifdef __DML_VBA_DEBUG__
102 	dml_print("DML::%s: bpc: %d\n", __func__, bpc);
103 	dml_print("DML::%s: BPP: %f\n", __func__, BPP);
104 	dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth);
105 	dml_print("DML::%s: numSlices: %d\n", __func__, numSlices);
106 	dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat);
107 	dml_print("DML::%s: Output: %d\n", __func__, Output);
108 	dml_print("DML::%s: pixels: %d\n", __func__, pixels);
109 #endif
110 
111 	return pixels;
112 }
113 
dml32_dscComputeDelay(enum output_format_class pixelFormat,enum output_encoder_class Output)114 unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
115 {
116 	unsigned int Delay = 0;
117 
118 	if (pixelFormat == dm_420) {
119 		//   sfr
120 		Delay = Delay + 2;
121 		//   dsccif
122 		Delay = Delay + 0;
123 		//   dscc - input deserializer
124 		Delay = Delay + 3;
125 		//   dscc gets pixels every other cycle
126 		Delay = Delay + 2;
127 		//   dscc - input cdc fifo
128 		Delay = Delay + 12;
129 		//   dscc gets pixels every other cycle
130 		Delay = Delay + 13;
131 		//   dscc - cdc uncertainty
132 		Delay = Delay + 2;
133 		//   dscc - output cdc fifo
134 		Delay = Delay + 7;
135 		//   dscc gets pixels every other cycle
136 		Delay = Delay + 3;
137 		//   dscc - cdc uncertainty
138 		Delay = Delay + 2;
139 		//   dscc - output serializer
140 		Delay = Delay + 1;
141 		//   sft
142 		Delay = Delay + 1;
143 	} else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
144 		//   sfr
145 		Delay = Delay + 2;
146 		//   dsccif
147 		Delay = Delay + 1;
148 		//   dscc - input deserializer
149 		Delay = Delay + 5;
150 		//  dscc - input cdc fifo
151 		Delay = Delay + 25;
152 		//   dscc - cdc uncertainty
153 		Delay = Delay + 2;
154 		//   dscc - output cdc fifo
155 		Delay = Delay + 10;
156 		//   dscc - cdc uncertainty
157 		Delay = Delay + 2;
158 		//   dscc - output serializer
159 		Delay = Delay + 1;
160 		//   sft
161 		Delay = Delay + 1;
162 	} else {
163 		//   sfr
164 		Delay = Delay + 2;
165 		//   dsccif
166 		Delay = Delay + 0;
167 		//   dscc - input deserializer
168 		Delay = Delay + 3;
169 		//   dscc - input cdc fifo
170 		Delay = Delay + 12;
171 		//   dscc - cdc uncertainty
172 		Delay = Delay + 2;
173 		//   dscc - output cdc fifo
174 		Delay = Delay + 7;
175 		//   dscc - output serializer
176 		Delay = Delay + 1;
177 		//   dscc - cdc uncertainty
178 		Delay = Delay + 2;
179 		//   sft
180 		Delay = Delay + 1;
181 	}
182 
183 	return Delay;
184 }
185 
186 
IsVertical(enum dm_rotation_angle Scan)187 bool IsVertical(enum dm_rotation_angle Scan)
188 {
189 	bool is_vert = false;
190 
191 	if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
192 		is_vert = true;
193 	else
194 		is_vert = false;
195 	return is_vert;
196 }
197 
dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(double HRatio,double HRatioChroma,double VRatio,double VRatioChroma,double MaxDCHUBToPSCLThroughput,double MaxPSCLToLBThroughput,double PixelClock,enum source_format_class SourcePixelFormat,unsigned int HTaps,unsigned int HTapsChroma,unsigned int VTaps,unsigned int VTapsChroma,double * PSCL_THROUGHPUT,double * PSCL_THROUGHPUT_CHROMA,double * DPPCLKUsingSingleDPP)198 void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
199 		double HRatio,
200 		double HRatioChroma,
201 		double VRatio,
202 		double VRatioChroma,
203 		double MaxDCHUBToPSCLThroughput,
204 		double MaxPSCLToLBThroughput,
205 		double PixelClock,
206 		enum source_format_class SourcePixelFormat,
207 		unsigned int HTaps,
208 		unsigned int HTapsChroma,
209 		unsigned int VTaps,
210 		unsigned int VTapsChroma,
211 
212 		/* output */
213 		double *PSCL_THROUGHPUT,
214 		double *PSCL_THROUGHPUT_CHROMA,
215 		double *DPPCLKUsingSingleDPP)
216 {
217 	double DPPCLKUsingSingleDPPLuma;
218 	double DPPCLKUsingSingleDPPChroma;
219 
220 	if (HRatio > 1) {
221 		*PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
222 				dml_ceil((double) HTaps / 6.0, 1.0));
223 	} else {
224 		*PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
225 	}
226 
227 	DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
228 			*PSCL_THROUGHPUT, 1);
229 
230 	if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
231 		DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
232 
233 	if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
234 			SourcePixelFormat != dm_rgbe_alpha)) {
235 		*PSCL_THROUGHPUT_CHROMA = 0;
236 		*DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
237 	} else {
238 		if (HRatioChroma > 1) {
239 			*PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
240 					HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
241 		} else {
242 			*PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
243 		}
244 		DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
245 				HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
246 		if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
247 			DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
248 		*DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
249 	}
250 }
251 
dml32_CalculateBytePerPixelAndBlockSizes(enum source_format_class SourcePixelFormat,enum dm_swizzle_mode SurfaceTiling,unsigned int * BytePerPixelY,unsigned int * BytePerPixelC,double * BytePerPixelDETY,double * BytePerPixelDETC,unsigned int * BlockHeight256BytesY,unsigned int * BlockHeight256BytesC,unsigned int * BlockWidth256BytesY,unsigned int * BlockWidth256BytesC,unsigned int * MacroTileHeightY,unsigned int * MacroTileHeightC,unsigned int * MacroTileWidthY,unsigned int * MacroTileWidthC)252 void dml32_CalculateBytePerPixelAndBlockSizes(
253 		enum source_format_class SourcePixelFormat,
254 		enum dm_swizzle_mode SurfaceTiling,
255 
256 		/* Output */
257 		unsigned int *BytePerPixelY,
258 		unsigned int *BytePerPixelC,
259 		double  *BytePerPixelDETY,
260 		double  *BytePerPixelDETC,
261 		unsigned int *BlockHeight256BytesY,
262 		unsigned int *BlockHeight256BytesC,
263 		unsigned int *BlockWidth256BytesY,
264 		unsigned int *BlockWidth256BytesC,
265 		unsigned int *MacroTileHeightY,
266 		unsigned int *MacroTileHeightC,
267 		unsigned int *MacroTileWidthY,
268 		unsigned int *MacroTileWidthC)
269 {
270 	if (SourcePixelFormat == dm_444_64) {
271 		*BytePerPixelDETY = 8;
272 		*BytePerPixelDETC = 0;
273 		*BytePerPixelY = 8;
274 		*BytePerPixelC = 0;
275 	} else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
276 		*BytePerPixelDETY = 4;
277 		*BytePerPixelDETC = 0;
278 		*BytePerPixelY = 4;
279 		*BytePerPixelC = 0;
280 	} else if (SourcePixelFormat == dm_444_16) {
281 		*BytePerPixelDETY = 2;
282 		*BytePerPixelDETC = 0;
283 		*BytePerPixelY = 2;
284 		*BytePerPixelC = 0;
285 	} else if (SourcePixelFormat == dm_444_8) {
286 		*BytePerPixelDETY = 1;
287 		*BytePerPixelDETC = 0;
288 		*BytePerPixelY = 1;
289 		*BytePerPixelC = 0;
290 	} else if (SourcePixelFormat == dm_rgbe_alpha) {
291 		*BytePerPixelDETY = 4;
292 		*BytePerPixelDETC = 1;
293 		*BytePerPixelY = 4;
294 		*BytePerPixelC = 1;
295 	} else if (SourcePixelFormat == dm_420_8) {
296 		*BytePerPixelDETY = 1;
297 		*BytePerPixelDETC = 2;
298 		*BytePerPixelY = 1;
299 		*BytePerPixelC = 2;
300 	} else if (SourcePixelFormat == dm_420_12) {
301 		*BytePerPixelDETY = 2;
302 		*BytePerPixelDETC = 4;
303 		*BytePerPixelY = 2;
304 		*BytePerPixelC = 4;
305 	} else {
306 		*BytePerPixelDETY = 4.0 / 3;
307 		*BytePerPixelDETC = 8.0 / 3;
308 		*BytePerPixelY = 2;
309 		*BytePerPixelC = 4;
310 	}
311 #ifdef __DML_VBA_DEBUG__
312 	dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat);
313 	dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
314 	dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
315 	dml_print("DML::%s: BytePerPixelY    = %d\n", __func__, *BytePerPixelY);
316 	dml_print("DML::%s: BytePerPixelC    = %d\n", __func__, *BytePerPixelC);
317 #endif
318 	if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
319 			|| SourcePixelFormat == dm_444_16
320 			|| SourcePixelFormat == dm_444_8
321 			|| SourcePixelFormat == dm_mono_16
322 			|| SourcePixelFormat == dm_mono_8
323 			|| SourcePixelFormat == dm_rgbe)) {
324 		if (SurfaceTiling == dm_sw_linear)
325 			*BlockHeight256BytesY = 1;
326 		else if (SourcePixelFormat == dm_444_64)
327 			*BlockHeight256BytesY = 4;
328 		else if (SourcePixelFormat == dm_444_8)
329 			*BlockHeight256BytesY = 16;
330 		else
331 			*BlockHeight256BytesY = 8;
332 
333 		*BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
334 		*BlockHeight256BytesC = 0;
335 		*BlockWidth256BytesC = 0;
336 	} else {
337 		if (SurfaceTiling == dm_sw_linear) {
338 			*BlockHeight256BytesY = 1;
339 			*BlockHeight256BytesC = 1;
340 		} else if (SourcePixelFormat == dm_rgbe_alpha) {
341 			*BlockHeight256BytesY = 8;
342 			*BlockHeight256BytesC = 16;
343 		} else if (SourcePixelFormat == dm_420_8) {
344 			*BlockHeight256BytesY = 16;
345 			*BlockHeight256BytesC = 8;
346 		} else {
347 			*BlockHeight256BytesY = 8;
348 			*BlockHeight256BytesC = 8;
349 		}
350 		*BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
351 		*BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
352 	}
353 #ifdef __DML_VBA_DEBUG__
354 	dml_print("DML::%s: BlockWidth256BytesY  = %d\n", __func__, *BlockWidth256BytesY);
355 	dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY);
356 	dml_print("DML::%s: BlockWidth256BytesC  = %d\n", __func__, *BlockWidth256BytesC);
357 	dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC);
358 #endif
359 
360 	if (SurfaceTiling == dm_sw_linear) {
361 		*MacroTileHeightY = *BlockHeight256BytesY;
362 		*MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
363 		*MacroTileHeightC = *BlockHeight256BytesC;
364 		if (*MacroTileHeightC == 0)
365 			*MacroTileWidthC = 0;
366 		else
367 			*MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
368 	} else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
369 			SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
370 		*MacroTileHeightY = 16 * *BlockHeight256BytesY;
371 		*MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
372 		*MacroTileHeightC = 16 * *BlockHeight256BytesC;
373 		if (*MacroTileHeightC == 0)
374 			*MacroTileWidthC = 0;
375 		else
376 			*MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
377 	} else {
378 		*MacroTileHeightY = 32 * *BlockHeight256BytesY;
379 		*MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
380 		*MacroTileHeightC = 32 * *BlockHeight256BytesC;
381 		if (*MacroTileHeightC == 0)
382 			*MacroTileWidthC = 0;
383 		else
384 			*MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
385 	}
386 
387 #ifdef __DML_VBA_DEBUG__
388 	dml_print("DML::%s: MacroTileWidthY  = %d\n", __func__, *MacroTileWidthY);
389 	dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY);
390 	dml_print("DML::%s: MacroTileWidthC  = %d\n", __func__, *MacroTileWidthC);
391 	dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC);
392 #endif
393 } // CalculateBytePerPixelAndBlockSizes
394 
dml32_CalculateSwathAndDETConfiguration(unsigned int DETSizeOverride[],enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],unsigned int ConfigReturnBufferSizeInKByte,unsigned int MaxTotalDETInKByte,unsigned int MinCompressedBufferSizeInKByte,double ForceSingleDPP,unsigned int NumberOfActiveSurfaces,unsigned int nomDETInKByte,enum unbounded_requesting_policy UseUnboundedRequestingFinal,bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,unsigned int PixelChunkSizeKBytes,unsigned int ROBSizeKBytes,unsigned int CompressedBufferSegmentSizeInkByteFinal,enum output_encoder_class Output[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double MaximumSwathWidthLuma[],double MaximumSwathWidthChroma[],enum dm_rotation_angle SourceRotation[],bool ViewportStationary[],enum source_format_class SourcePixelFormat[],enum dm_swizzle_mode SurfaceTiling[],unsigned int ViewportWidth[],unsigned int ViewportHeight[],unsigned int ViewportXStart[],unsigned int ViewportYStart[],unsigned int ViewportXStartC[],unsigned int ViewportYStartC[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],enum odm_combine_mode ODMMode[],unsigned int BlendingAndTiming[],unsigned int BytePerPixY[],unsigned int BytePerPixC[],double BytePerPixDETY[],double BytePerPixDETC[],unsigned int HActive[],double HRatio[],double HRatioChroma[],unsigned int DPPPerSurface[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],double SwathWidth[],double SwathWidthChroma[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],unsigned int DETBufferSizeInKByte[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],bool * UnboundedRequestEnabled,unsigned int * CompressedBufferSizeInkByte,unsigned int * CompBufReservedSpaceKBytes,bool * CompBufReservedSpaceNeedAdjustment,bool ViewportSizeSupportPerSurface[],bool * ViewportSizeSupport)395 void dml32_CalculateSwathAndDETConfiguration(
396 		unsigned int DETSizeOverride[],
397 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
398 		unsigned int ConfigReturnBufferSizeInKByte,
399 		unsigned int MaxTotalDETInKByte,
400 		unsigned int MinCompressedBufferSizeInKByte,
401 		double ForceSingleDPP,
402 		unsigned int NumberOfActiveSurfaces,
403 		unsigned int nomDETInKByte,
404 		enum unbounded_requesting_policy UseUnboundedRequestingFinal,
405 		bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
406 		unsigned int PixelChunkSizeKBytes,
407 		unsigned int ROBSizeKBytes,
408 		unsigned int CompressedBufferSegmentSizeInkByteFinal,
409 		enum output_encoder_class Output[],
410 		double ReadBandwidthLuma[],
411 		double ReadBandwidthChroma[],
412 		double MaximumSwathWidthLuma[],
413 		double MaximumSwathWidthChroma[],
414 		enum dm_rotation_angle SourceRotation[],
415 		bool ViewportStationary[],
416 		enum source_format_class SourcePixelFormat[],
417 		enum dm_swizzle_mode SurfaceTiling[],
418 		unsigned int ViewportWidth[],
419 		unsigned int ViewportHeight[],
420 		unsigned int ViewportXStart[],
421 		unsigned int ViewportYStart[],
422 		unsigned int ViewportXStartC[],
423 		unsigned int ViewportYStartC[],
424 		unsigned int SurfaceWidthY[],
425 		unsigned int SurfaceWidthC[],
426 		unsigned int SurfaceHeightY[],
427 		unsigned int SurfaceHeightC[],
428 		unsigned int Read256BytesBlockHeightY[],
429 		unsigned int Read256BytesBlockHeightC[],
430 		unsigned int Read256BytesBlockWidthY[],
431 		unsigned int Read256BytesBlockWidthC[],
432 		enum odm_combine_mode ODMMode[],
433 		unsigned int BlendingAndTiming[],
434 		unsigned int BytePerPixY[],
435 		unsigned int BytePerPixC[],
436 		double BytePerPixDETY[],
437 		double BytePerPixDETC[],
438 		unsigned int HActive[],
439 		double HRatio[],
440 		double HRatioChroma[],
441 		unsigned int DPPPerSurface[],
442 
443 		/* Output */
444 		unsigned int swath_width_luma_ub[],
445 		unsigned int swath_width_chroma_ub[],
446 		double SwathWidth[],
447 		double SwathWidthChroma[],
448 		unsigned int SwathHeightY[],
449 		unsigned int SwathHeightC[],
450 		unsigned int DETBufferSizeInKByte[],
451 		unsigned int DETBufferSizeY[],
452 		unsigned int DETBufferSizeC[],
453 		bool *UnboundedRequestEnabled,
454 		unsigned int *CompressedBufferSizeInkByte,
455 		unsigned int *CompBufReservedSpaceKBytes,
456 		bool *CompBufReservedSpaceNeedAdjustment,
457 		bool ViewportSizeSupportPerSurface[],
458 		bool *ViewportSizeSupport)
459 {
460 	unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
461 	unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
462 	unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
463 	unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
464 	unsigned int RoundedUpSwathSizeBytesY;
465 	unsigned int RoundedUpSwathSizeBytesC;
466 	double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
467 	double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
468 	unsigned int k;
469 	unsigned int TotalActiveDPP = 0;
470 	bool NoChromaSurfaces = true;
471 	unsigned int DETBufferSizeInKByteForSwathCalculation;
472 
473 #ifdef __DML_VBA_DEBUG__
474 	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
475 	dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes);
476 	dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes);
477 #endif
478 	dml32_CalculateSwathWidth(ForceSingleDPP,
479 			NumberOfActiveSurfaces,
480 			SourcePixelFormat,
481 			SourceRotation,
482 			ViewportStationary,
483 			ViewportWidth,
484 			ViewportHeight,
485 			ViewportXStart,
486 			ViewportYStart,
487 			ViewportXStartC,
488 			ViewportYStartC,
489 			SurfaceWidthY,
490 			SurfaceWidthC,
491 			SurfaceHeightY,
492 			SurfaceHeightC,
493 			ODMMode,
494 			BytePerPixY,
495 			BytePerPixC,
496 			Read256BytesBlockHeightY,
497 			Read256BytesBlockHeightC,
498 			Read256BytesBlockWidthY,
499 			Read256BytesBlockWidthC,
500 			BlendingAndTiming,
501 			HActive,
502 			HRatio,
503 			DPPPerSurface,
504 
505 			/* Output */
506 			SwathWidthdoubleDPP,
507 			SwathWidthdoubleDPPChroma,
508 			SwathWidth,
509 			SwathWidthChroma,
510 			MaximumSwathHeightY,
511 			MaximumSwathHeightC,
512 			swath_width_luma_ub,
513 			swath_width_chroma_ub);
514 
515 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
516 		RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
517 		RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
518 #ifdef __DML_VBA_DEBUG__
519 		dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
520 		dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
521 		dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
522 		dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]);
523 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
524 				RoundedUpMaxSwathSizeBytesY[k]);
525 		dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
526 		dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
527 		dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]);
528 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
529 				RoundedUpMaxSwathSizeBytesC[k]);
530 #endif
531 
532 		if (SourcePixelFormat[k] == dm_420_10) {
533 			RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256);
534 			RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256);
535 		}
536 	}
537 
538 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
539 		TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
540 		if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
541 				SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
542 			NoChromaSurfaces = false;
543 		}
544 	}
545 
546 	// By default, just set the reserved space to 2 pixel chunks size
547 	*CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2;
548 
549 	// if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
550 	// - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
551 	// - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
552 	*CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512);
553 
554 	if (*CompBufReservedSpaceNeedAdjustment == 1) {
555 		*CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512;
556 	}
557 
558 	#ifdef __DML_VBA_DEBUG__
559 		dml_print("DML::%s: CompBufReservedSpaceKBytes          = %d\n",  __func__, *CompBufReservedSpaceKBytes);
560 		dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, *CompBufReservedSpaceNeedAdjustment);
561 	#endif
562 
563 	*UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
564 
565 	dml32_CalculateDETBufferSize(DETSizeOverride,
566 			UseMALLForPStateChange,
567 			ForceSingleDPP,
568 			NumberOfActiveSurfaces,
569 			*UnboundedRequestEnabled,
570 			nomDETInKByte,
571 			MaxTotalDETInKByte,
572 			ConfigReturnBufferSizeInKByte,
573 			MinCompressedBufferSizeInKByte,
574 			CompressedBufferSegmentSizeInkByteFinal,
575 			SourcePixelFormat,
576 			ReadBandwidthLuma,
577 			ReadBandwidthChroma,
578 			RoundedUpMaxSwathSizeBytesY,
579 			RoundedUpMaxSwathSizeBytesC,
580 			DPPPerSurface,
581 
582 			/* Output */
583 			DETBufferSizeInKByte,    // per hubp pipe
584 			CompressedBufferSizeInkByte);
585 
586 #ifdef __DML_VBA_DEBUG__
587 	dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
588 	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
589 	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
590 	dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
591 	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
592 	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
593 #endif
594 
595 	*ViewportSizeSupport = true;
596 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
597 
598 		DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
599 				dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
600 #ifdef __DML_VBA_DEBUG__
601 		dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
602 				DETBufferSizeInKByteForSwathCalculation);
603 #endif
604 
605 		if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <=
606 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
607 			SwathHeightY[k] = MaximumSwathHeightY[k];
608 			SwathHeightC[k] = MaximumSwathHeightC[k];
609 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
610 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
611 		} else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
612 				RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <=
613 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
614 			SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
615 			SwathHeightC[k] = MaximumSwathHeightC[k];
616 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
617 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
618 		} else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
619 				RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <=
620 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
621 			SwathHeightY[k] = MaximumSwathHeightY[k];
622 			SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
623 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
624 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
625 		} else {
626 			SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
627 			SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
628 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
629 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
630 		}
631 
632 		if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 >
633 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
634 				|| SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
635 						SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
636 			*ViewportSizeSupport = false;
637 			ViewportSizeSupportPerSurface[k] = false;
638 		} else {
639 			ViewportSizeSupportPerSurface[k] = true;
640 		}
641 
642 		if (SwathHeightC[k] == 0) {
643 #ifdef __DML_VBA_DEBUG__
644 			dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k);
645 #endif
646 			DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
647 			DETBufferSizeC[k] = 0;
648 		} else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
649 #ifdef __DML_VBA_DEBUG__
650 			dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
651 #endif
652 			DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
653 			DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
654 		} else {
655 #ifdef __DML_VBA_DEBUG__
656 			dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
657 #endif
658 			DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024);
659 			DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
660 		}
661 
662 #ifdef __DML_VBA_DEBUG__
663 		dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
664 		dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
665 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
666 				k, RoundedUpMaxSwathSizeBytesY[k]);
667 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
668 				k, RoundedUpMaxSwathSizeBytesC[k]);
669 		dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY);
670 		dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC);
671 		dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
672 		dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
673 		dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
674 		dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,
675 				ViewportSizeSupportPerSurface[k]);
676 #endif
677 
678 	}
679 } // CalculateSwathAndDETConfiguration
680 
dml32_CalculateSwathWidth(bool ForceSingleDPP,unsigned int NumberOfActiveSurfaces,enum source_format_class SourcePixelFormat[],enum dm_rotation_angle SourceRotation[],bool ViewportStationary[],unsigned int ViewportWidth[],unsigned int ViewportHeight[],unsigned int ViewportXStart[],unsigned int ViewportYStart[],unsigned int ViewportXStartC[],unsigned int ViewportYStartC[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],enum odm_combine_mode ODMMode[],unsigned int BytePerPixY[],unsigned int BytePerPixC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],unsigned int BlendingAndTiming[],unsigned int HActive[],double HRatio[],unsigned int DPPPerSurface[],double SwathWidthdoubleDPPY[],double SwathWidthdoubleDPPC[],double SwathWidthY[],double SwathWidthC[],unsigned int MaximumSwathHeightY[],unsigned int MaximumSwathHeightC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[])681 void dml32_CalculateSwathWidth(
682 		bool				ForceSingleDPP,
683 		unsigned int			NumberOfActiveSurfaces,
684 		enum source_format_class	SourcePixelFormat[],
685 		enum dm_rotation_angle		SourceRotation[],
686 		bool				ViewportStationary[],
687 		unsigned int			ViewportWidth[],
688 		unsigned int			ViewportHeight[],
689 		unsigned int			ViewportXStart[],
690 		unsigned int			ViewportYStart[],
691 		unsigned int			ViewportXStartC[],
692 		unsigned int			ViewportYStartC[],
693 		unsigned int			SurfaceWidthY[],
694 		unsigned int			SurfaceWidthC[],
695 		unsigned int			SurfaceHeightY[],
696 		unsigned int			SurfaceHeightC[],
697 		enum odm_combine_mode		ODMMode[],
698 		unsigned int			BytePerPixY[],
699 		unsigned int			BytePerPixC[],
700 		unsigned int			Read256BytesBlockHeightY[],
701 		unsigned int			Read256BytesBlockHeightC[],
702 		unsigned int			Read256BytesBlockWidthY[],
703 		unsigned int			Read256BytesBlockWidthC[],
704 		unsigned int			BlendingAndTiming[],
705 		unsigned int			HActive[],
706 		double				HRatio[],
707 		unsigned int			DPPPerSurface[],
708 
709 		/* Output */
710 		double				SwathWidthdoubleDPPY[],
711 		double				SwathWidthdoubleDPPC[],
712 		double				SwathWidthY[], // per-pipe
713 		double				SwathWidthC[], // per-pipe
714 		unsigned int			MaximumSwathHeightY[],
715 		unsigned int			MaximumSwathHeightC[],
716 		unsigned int			swath_width_luma_ub[], // per-pipe
717 		unsigned int			swath_width_chroma_ub[]) // per-pipe
718 {
719 	unsigned int k, j;
720 	enum odm_combine_mode MainSurfaceODMMode;
721 
722 	unsigned int surface_width_ub_l;
723 	unsigned int surface_height_ub_l;
724 	unsigned int surface_width_ub_c = 0;
725 	unsigned int surface_height_ub_c = 0;
726 
727 #ifdef __DML_VBA_DEBUG__
728 	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
729 	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
730 #endif
731 
732 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
733 		if (!IsVertical(SourceRotation[k]))
734 			SwathWidthdoubleDPPY[k] = ViewportWidth[k];
735 		else
736 			SwathWidthdoubleDPPY[k] = ViewportHeight[k];
737 
738 #ifdef __DML_VBA_DEBUG__
739 		dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
740 		dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
741 #endif
742 
743 		MainSurfaceODMMode = ODMMode[k];
744 		for (j = 0; j < NumberOfActiveSurfaces; ++j) {
745 			if (BlendingAndTiming[k] == j)
746 				MainSurfaceODMMode = ODMMode[j];
747 		}
748 
749 		if (ForceSingleDPP) {
750 			SwathWidthY[k] = SwathWidthdoubleDPPY[k];
751 		} else {
752 			if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
753 				SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
754 						dml_round(HActive[k] / 4.0 * HRatio[k]));
755 			} else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
756 				SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
757 						dml_round(HActive[k] / 2.0 * HRatio[k]));
758 			} else if (DPPPerSurface[k] == 2) {
759 				SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
760 			} else {
761 				SwathWidthY[k] = SwathWidthdoubleDPPY[k];
762 			}
763 		}
764 
765 #ifdef __DML_VBA_DEBUG__
766 		dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]);
767 		dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]);
768 		dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode);
769 		dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]);
770 		dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]);
771 #endif
772 
773 		if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
774 				SourcePixelFormat[k] == dm_420_12) {
775 			SwathWidthC[k] = SwathWidthY[k] / 2;
776 			SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
777 		} else {
778 			SwathWidthC[k] = SwathWidthY[k];
779 			SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
780 		}
781 
782 		if (ForceSingleDPP == true) {
783 			SwathWidthY[k] = SwathWidthdoubleDPPY[k];
784 			SwathWidthC[k] = SwathWidthdoubleDPPC[k];
785 		}
786 
787 		surface_width_ub_l  = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
788 		surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
789 
790 		if (!IsVertical(SourceRotation[k])) {
791 			MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
792 			MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
793 			if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
794 				swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
795 						dml_floor(ViewportXStart[k] +
796 								SwathWidthY[k] +
797 								Read256BytesBlockWidthY[k] - 1,
798 								Read256BytesBlockWidthY[k]) -
799 								dml_floor(ViewportXStart[k],
800 								Read256BytesBlockWidthY[k]));
801 			} else {
802 				swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
803 						dml_ceil(SwathWidthY[k] - 1,
804 								Read256BytesBlockWidthY[k]) +
805 								Read256BytesBlockWidthY[k]);
806 			}
807 			if (BytePerPixC[k] > 0) {
808 				surface_width_ub_c  = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
809 				if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
810 					swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
811 							dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
812 									Read256BytesBlockWidthC[k] - 1,
813 									Read256BytesBlockWidthC[k]) -
814 									dml_floor(ViewportXStartC[k],
815 									Read256BytesBlockWidthC[k]));
816 				} else {
817 					swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
818 							dml_ceil(SwathWidthC[k] - 1,
819 								Read256BytesBlockWidthC[k]) +
820 								Read256BytesBlockWidthC[k]);
821 				}
822 			} else {
823 				swath_width_chroma_ub[k] = 0;
824 			}
825 		} else {
826 			MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
827 			MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
828 
829 			if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
830 				swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] +
831 						SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
832 						Read256BytesBlockHeightY[k]) -
833 						dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]));
834 			} else {
835 				swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1,
836 						Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
837 			}
838 			if (BytePerPixC[k] > 0) {
839 				surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
840 				if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
841 					swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
842 							dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
843 									Read256BytesBlockHeightC[k] - 1,
844 									Read256BytesBlockHeightC[k]) -
845 									dml_floor(ViewportYStartC[k],
846 											Read256BytesBlockHeightC[k]));
847 				} else {
848 					swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
849 							dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) +
850 							Read256BytesBlockHeightC[k]);
851 				}
852 			} else {
853 				swath_width_chroma_ub[k] = 0;
854 			}
855 		}
856 
857 #ifdef __DML_VBA_DEBUG__
858 		dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
859 		dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
860 		dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
861 		dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
862 		dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
863 		dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
864 		dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
865 		dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
866 		dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
867 		dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
868 		dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
869 		dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
870 		dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
871 		dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]);
872 #endif
873 
874 	}
875 } // CalculateSwathWidth
876 
dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,unsigned int TotalNumberOfActiveDPP,bool NoChroma,enum output_encoder_class Output,enum dm_swizzle_mode SurfaceTiling,bool CompBufReservedSpaceNeedAdjustment,bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)877 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
878 			unsigned int TotalNumberOfActiveDPP,
879 			bool NoChroma,
880 			enum output_encoder_class Output,
881 			enum dm_swizzle_mode SurfaceTiling,
882 			bool CompBufReservedSpaceNeedAdjustment,
883 			bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
884 {
885 	bool ret_val = false;
886 
887 	ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
888 			TotalNumberOfActiveDPP == 1 && NoChroma);
889 	if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
890 		ret_val = false;
891 
892 	if (SurfaceTiling == dm_sw_linear)
893 		ret_val = false;
894 
895 	if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
896 		ret_val = false;
897 
898 #ifdef __DML_VBA_DEBUG__
899 	dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, CompBufReservedSpaceNeedAdjustment);
900 	dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
901 	dml_print("DML::%s: ret_val = %d\n",  __func__, ret_val);
902 #endif
903 
904 	return (ret_val);
905 }
906 
dml32_CalculateDETBufferSize(unsigned int DETSizeOverride[],enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],bool ForceSingleDPP,unsigned int NumberOfActiveSurfaces,bool UnboundedRequestEnabled,unsigned int nomDETInKByte,unsigned int MaxTotalDETInKByte,unsigned int ConfigReturnBufferSizeInKByte,unsigned int MinCompressedBufferSizeInKByte,unsigned int CompressedBufferSegmentSizeInkByteFinal,enum source_format_class SourcePixelFormat[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int RoundedUpMaxSwathSizeBytesY[],unsigned int RoundedUpMaxSwathSizeBytesC[],unsigned int DPPPerSurface[],unsigned int DETBufferSizeInKByte[],unsigned int * CompressedBufferSizeInkByte)907 void dml32_CalculateDETBufferSize(
908 		unsigned int DETSizeOverride[],
909 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
910 		bool ForceSingleDPP,
911 		unsigned int NumberOfActiveSurfaces,
912 		bool UnboundedRequestEnabled,
913 		unsigned int nomDETInKByte,
914 		unsigned int MaxTotalDETInKByte,
915 		unsigned int ConfigReturnBufferSizeInKByte,
916 		unsigned int MinCompressedBufferSizeInKByte,
917 		unsigned int CompressedBufferSegmentSizeInkByteFinal,
918 		enum source_format_class SourcePixelFormat[],
919 		double ReadBandwidthLuma[],
920 		double ReadBandwidthChroma[],
921 		unsigned int RoundedUpMaxSwathSizeBytesY[],
922 		unsigned int RoundedUpMaxSwathSizeBytesC[],
923 		unsigned int DPPPerSurface[],
924 		/* Output */
925 		unsigned int DETBufferSizeInKByte[],
926 		unsigned int *CompressedBufferSizeInkByte)
927 {
928 	unsigned int DETBufferSizePoolInKByte;
929 	unsigned int NextDETBufferPieceInKByte;
930 	bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX];
931 	bool NextPotentialSurfaceToAssignDETPieceFound;
932 	unsigned int NextSurfaceToAssignDETPiece;
933 	double TotalBandwidth;
934 	double BandwidthOfSurfacesNotAssignedDETPiece;
935 	unsigned int max_minDET;
936 	unsigned int minDET;
937 	unsigned int minDET_pipe;
938 	unsigned int j, k;
939 
940 #ifdef __DML_VBA_DEBUG__
941 	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
942 	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
943 	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
944 	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
945 	dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte);
946 	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
947 	dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte);
948 	dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,
949 			CompressedBufferSegmentSizeInkByteFinal);
950 #endif
951 
952 	// Note: Will use default det size if that fits 2 swaths
953 	if (UnboundedRequestEnabled) {
954 		if (DETSizeOverride[0] > 0) {
955 			DETBufferSizeInKByte[0] = DETSizeOverride[0];
956 		} else {
957 			DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 *
958 					((double) RoundedUpMaxSwathSizeBytesY[0] +
959 							(double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0));
960 		}
961 		*CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
962 	} else {
963 		DETBufferSizePoolInKByte = MaxTotalDETInKByte;
964 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
965 			DETBufferSizeInKByte[k] = nomDETInKByte;
966 			if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
967 					SourcePixelFormat[k] == dm_420_12) {
968 				max_minDET = nomDETInKByte - 64;
969 			} else {
970 				max_minDET = nomDETInKByte;
971 			}
972 			minDET = 128;
973 			minDET_pipe = 0;
974 
975 			// add DET resource until can hold 2 full swaths
976 			while (minDET <= max_minDET && minDET_pipe == 0) {
977 				if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
978 						(double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
979 					minDET_pipe = minDET;
980 				minDET = minDET + 64;
981 			}
982 
983 #ifdef __DML_VBA_DEBUG__
984 			dml_print("DML::%s: k=%0d minDET        = %d\n", __func__, k, minDET);
985 			dml_print("DML::%s: k=%0d max_minDET    = %d\n", __func__, k, max_minDET);
986 			dml_print("DML::%s: k=%0d minDET_pipe   = %d\n", __func__, k, minDET_pipe);
987 			dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
988 					RoundedUpMaxSwathSizeBytesY[k]);
989 			dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
990 					RoundedUpMaxSwathSizeBytesC[k]);
991 #endif
992 
993 			if (minDET_pipe == 0) {
994 				minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] +
995 						(double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64));
996 #ifdef __DML_VBA_DEBUG__
997 				dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",
998 						__func__, k, minDET_pipe);
999 #endif
1000 			}
1001 
1002 			if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1003 				DETBufferSizeInKByte[k] = 0;
1004 			} else if (DETSizeOverride[k] > 0) {
1005 				DETBufferSizeInKByte[k] = DETSizeOverride[k];
1006 				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1007 						(ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
1008 			} else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
1009 				DETBufferSizeInKByte[k] = minDET_pipe;
1010 				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1011 						(ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
1012 			}
1013 
1014 #ifdef __DML_VBA_DEBUG__
1015 			dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
1016 			dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]);
1017 			dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1018 			dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte);
1019 #endif
1020 		}
1021 
1022 		TotalBandwidth = 0;
1023 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1024 			if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
1025 				TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
1026 		}
1027 #ifdef __DML_VBA_DEBUG__
1028 		dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
1029 		for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
1030 			dml_print("DML::%s: k=%d DETBufferSizeInKByte   = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1031 		dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
1032 		dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
1033 #endif
1034 		BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
1035 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1036 
1037 			if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1038 				DETPieceAssignedToThisSurfaceAlready[k] = true;
1039 			} else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
1040 					(double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
1041 					((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
1042 				DETPieceAssignedToThisSurfaceAlready[k] = true;
1043 				BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1044 						ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1045 			} else {
1046 				DETPieceAssignedToThisSurfaceAlready[k] = false;
1047 			}
1048 #ifdef __DML_VBA_DEBUG__
1049 			dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,
1050 					DETPieceAssignedToThisSurfaceAlready[k]);
1051 			dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,
1052 					BandwidthOfSurfacesNotAssignedDETPiece);
1053 #endif
1054 		}
1055 
1056 		for (j = 0; j < NumberOfActiveSurfaces; ++j) {
1057 			NextPotentialSurfaceToAssignDETPieceFound = false;
1058 			NextSurfaceToAssignDETPiece = 0;
1059 
1060 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1061 #ifdef __DML_VBA_DEBUG__
1062 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,
1063 						ReadBandwidthLuma[k]);
1064 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,
1065 						ReadBandwidthChroma[k]);
1066 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,
1067 						ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1068 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,
1069 						ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1070 				dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,
1071 						NextSurfaceToAssignDETPiece);
1072 #endif
1073 				if (!DETPieceAssignedToThisSurfaceAlready[k] &&
1074 						(!NextPotentialSurfaceToAssignDETPieceFound ||
1075 						ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
1076 						ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1077 						ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
1078 					NextSurfaceToAssignDETPiece = k;
1079 					NextPotentialSurfaceToAssignDETPieceFound = true;
1080 				}
1081 #ifdef __DML_VBA_DEBUG__
1082 				dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",
1083 						__func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
1084 				dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",
1085 						__func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
1086 #endif
1087 			}
1088 
1089 			if (NextPotentialSurfaceToAssignDETPieceFound) {
1090 				// Note: To show the banker's rounding behavior in VBA and also the fact
1091 				// that the DET buffer size varies due to precision issue
1092 				//
1093 				//double tmp1 =  ((double) DETBufferSizePoolInKByte *
1094 				// (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1095 				// ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1096 				// BandwidthOfSurfacesNotAssignedDETPiece /
1097 				// ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1098 				//double tmp2 =  dml_round((double) DETBufferSizePoolInKByte *
1099 				// (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1100 				// ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1101 				 //BandwidthOfSurfacesNotAssignedDETPiece /
1102 				// ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1103 				//
1104 				//dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
1105 				//dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);
1106 
1107 				NextDETBufferPieceInKByte = dml_min(
1108 					dml_round((double) DETBufferSizePoolInKByte *
1109 						(ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1110 						ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1111 						BandwidthOfSurfacesNotAssignedDETPiece /
1112 						((ForceSingleDPP ? 1 :
1113 								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
1114 						(ForceSingleDPP ? 1 :
1115 								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
1116 						dml_floor((double) DETBufferSizePoolInKByte,
1117 						(ForceSingleDPP ? 1 :
1118 								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1119 
1120 				// Above calculation can assign the entire DET buffer allocation to a single pipe.
1121 				// We should limit the per-pipe DET size to the nominal / max per pipe.
1122 				if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1123 					if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
1124 							nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1125 						NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
1126 								DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
1127 					} else {
1128 						// Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1129 						// already has the max per-pipe value
1130 						NextDETBufferPieceInKByte = 0;
1131 					}
1132 				}
1133 
1134 #ifdef __DML_VBA_DEBUG__
1135 				dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,
1136 					DETBufferSizePoolInKByte);
1137 				dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,
1138 					NextSurfaceToAssignDETPiece);
1139 				dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,
1140 					NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1141 				dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,
1142 					NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1143 				dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",
1144 					__func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
1145 				dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,
1146 					NextDETBufferPieceInKByte);
1147 				dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",
1148 					__func__, j, NextSurfaceToAssignDETPiece,
1149 					DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1150 #endif
1151 
1152 				DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
1153 						DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1154 						+ NextDETBufferPieceInKByte
1155 						/ (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
1156 #ifdef __DML_VBA_DEBUG__
1157 				dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1158 #endif
1159 
1160 				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
1161 				DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
1162 				BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1163 						(ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1164 								ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1165 			}
1166 		}
1167 		*CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1168 	}
1169 	*CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
1170 
1171 #ifdef __DML_VBA_DEBUG__
1172 	dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
1173 	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
1174 	for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
1175 		dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",
1176 				__func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1177 	}
1178 #endif
1179 } // CalculateDETBufferSize
1180 
dml32_CalculateODMMode(unsigned int MaximumPixelsPerLinePerDSCUnit,unsigned int HActive,enum output_format_class OutFormat,enum output_encoder_class Output,enum odm_combine_policy ODMUse,double StateDispclk,double MaxDispclk,bool DSCEnable,unsigned int TotalNumberOfActiveDPP,unsigned int MaxNumDPP,double PixelClock,double DISPCLKDPPCLKDSCCLKDownSpreading,double DISPCLKRampingMargin,double DISPCLKDPPCLKVCOSpeed,unsigned int NumberOfDSCSlices,bool * TotalAvailablePipesSupport,unsigned int * NumberOfDPP,enum odm_combine_mode * ODMMode,double * RequiredDISPCLKPerSurface)1181 void dml32_CalculateODMMode(
1182 		unsigned int MaximumPixelsPerLinePerDSCUnit,
1183 		unsigned int HActive,
1184 		enum output_format_class OutFormat,
1185 		enum output_encoder_class Output,
1186 		enum odm_combine_policy ODMUse,
1187 		double StateDispclk,
1188 		double MaxDispclk,
1189 		bool DSCEnable,
1190 		unsigned int TotalNumberOfActiveDPP,
1191 		unsigned int MaxNumDPP,
1192 		double PixelClock,
1193 		double DISPCLKDPPCLKDSCCLKDownSpreading,
1194 		double DISPCLKRampingMargin,
1195 		double DISPCLKDPPCLKVCOSpeed,
1196 		unsigned int NumberOfDSCSlices,
1197 
1198 		/* Output */
1199 		bool *TotalAvailablePipesSupport,
1200 		unsigned int *NumberOfDPP,
1201 		enum odm_combine_mode *ODMMode,
1202 		double *RequiredDISPCLKPerSurface)
1203 {
1204 
1205 	double SurfaceRequiredDISPCLKWithoutODMCombine;
1206 	double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1207 	double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1208 
1209 	SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled,
1210 			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1211 			MaxDispclk);
1212 	SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1,
1213 			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1214 			MaxDispclk);
1215 	SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1,
1216 			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1217 			MaxDispclk);
1218 	*TotalAvailablePipesSupport = true;
1219 	*ODMMode = dm_odm_combine_mode_disabled; // initialize as disable
1220 
1221 	if (ODMUse == dm_odm_combine_policy_none)
1222 		*ODMMode = dm_odm_combine_mode_disabled;
1223 
1224 	*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
1225 	*NumberOfDPP = 0;
1226 
1227 	// FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
1228 	// (ODMUse == "" || ODMUse == "CombineAsNeeded")
1229 
1230 	if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
1231 			((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
1232 					(DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit))
1233 					|| NumberOfDSCSlices > 8)))) {
1234 		if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
1235 			*ODMMode = dm_odm_combine_mode_4to1;
1236 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1237 			*NumberOfDPP = 4;
1238 		} else {
1239 			*TotalAvailablePipesSupport = false;
1240 		}
1241 	} else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
1242 			(((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
1243 					SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
1244 					(DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit))
1245 					|| (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) {
1246 		if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
1247 			*ODMMode = dm_odm_combine_mode_2to1;
1248 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1249 			*NumberOfDPP = 2;
1250 		} else {
1251 			*TotalAvailablePipesSupport = false;
1252 		}
1253 	} else {
1254 		if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
1255 			*NumberOfDPP = 1;
1256 		else
1257 			*TotalAvailablePipesSupport = false;
1258 	}
1259 	if (OutFormat == dm_420 && HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH &&
1260 			ODMUse != dm_odm_combine_policy_4to1) {
1261 		if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 4) {
1262 			*ODMMode = dm_odm_combine_mode_disabled;
1263 			*NumberOfDPP = 0;
1264 			*TotalAvailablePipesSupport = false;
1265 		} else if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 2 ||
1266 				*ODMMode == dm_odm_combine_mode_4to1) {
1267 			*ODMMode = dm_odm_combine_mode_4to1;
1268 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1269 			*NumberOfDPP = 4;
1270 		} else {
1271 			*ODMMode = dm_odm_combine_mode_2to1;
1272 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1273 			*NumberOfDPP = 2;
1274 		}
1275 	}
1276 	if (Output == dm_hdmi && OutFormat == dm_420 &&
1277 			HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH) {
1278 		*ODMMode = dm_odm_combine_mode_disabled;
1279 		*NumberOfDPP = 0;
1280 		*TotalAvailablePipesSupport = false;
1281 	}
1282 }
1283 
dml32_CalculateRequiredDispclk(enum odm_combine_mode ODMMode,double PixelClock,double DISPCLKDPPCLKDSCCLKDownSpreading,double DISPCLKRampingMargin,double DISPCLKDPPCLKVCOSpeed,double MaxDispclk)1284 double dml32_CalculateRequiredDispclk(
1285 		enum odm_combine_mode ODMMode,
1286 		double PixelClock,
1287 		double DISPCLKDPPCLKDSCCLKDownSpreading,
1288 		double DISPCLKRampingMargin,
1289 		double DISPCLKDPPCLKVCOSpeed,
1290 		double MaxDispclk)
1291 {
1292 	double RequiredDispclk = 0.;
1293 	double PixelClockAfterODM;
1294 	double DISPCLKWithRampingRoundedToDFSGranularity;
1295 	double DISPCLKWithoutRampingRoundedToDFSGranularity;
1296 	double MaxDispclkRoundedDownToDFSGranularity;
1297 
1298 	if (ODMMode == dm_odm_combine_mode_4to1)
1299 		PixelClockAfterODM = PixelClock / 4;
1300 	else if (ODMMode == dm_odm_combine_mode_2to1)
1301 		PixelClockAfterODM = PixelClock / 2;
1302 	else
1303 		PixelClockAfterODM = PixelClock;
1304 
1305 
1306 	DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1307 			PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1308 					* (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed);
1309 
1310 	DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1311 			PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed);
1312 
1313 	MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
1314 
1315 	if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1316 		RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
1317 	else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1318 		RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
1319 	else
1320 		RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
1321 
1322 	return RequiredDispclk;
1323 }
1324 
dml32_RoundToDFSGranularity(double Clock,bool round_up,double VCOSpeed)1325 double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed)
1326 {
1327 	if (Clock <= 0.0)
1328 		return 0.0;
1329 
1330 	if (round_up)
1331 		return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
1332 	else
1333 		return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
1334 }
1335 
dml32_CalculateOutputLink(double PHYCLKPerState,double PHYCLKD18PerState,double PHYCLKD32PerState,double Downspreading,bool IsMainSurfaceUsingTheIndicatedTiming,enum output_encoder_class Output,enum output_format_class OutputFormat,unsigned int HTotal,unsigned int HActive,double PixelClockBackEnd,double ForcedOutputLinkBPP,unsigned int DSCInputBitPerComponent,unsigned int NumberOfDSCSlices,double AudioSampleRate,unsigned int AudioSampleLayout,enum odm_combine_mode ODMModeNoDSC,enum odm_combine_mode ODMModeDSC,bool DSCEnable,unsigned int OutputLinkDPLanes,enum dm_output_link_dp_rate OutputLinkDPRate,bool * RequiresDSC,double * RequiresFEC,double * OutBpp,enum dm_output_type * OutputType,enum dm_output_rate * OutputRate,unsigned int * RequiredSlots)1336 void dml32_CalculateOutputLink(
1337 		double PHYCLKPerState,
1338 		double PHYCLKD18PerState,
1339 		double PHYCLKD32PerState,
1340 		double Downspreading,
1341 		bool IsMainSurfaceUsingTheIndicatedTiming,
1342 		enum output_encoder_class Output,
1343 		enum output_format_class OutputFormat,
1344 		unsigned int HTotal,
1345 		unsigned int HActive,
1346 		double PixelClockBackEnd,
1347 		double ForcedOutputLinkBPP,
1348 		unsigned int DSCInputBitPerComponent,
1349 		unsigned int NumberOfDSCSlices,
1350 		double AudioSampleRate,
1351 		unsigned int AudioSampleLayout,
1352 		enum odm_combine_mode ODMModeNoDSC,
1353 		enum odm_combine_mode ODMModeDSC,
1354 		bool DSCEnable,
1355 		unsigned int OutputLinkDPLanes,
1356 		enum dm_output_link_dp_rate OutputLinkDPRate,
1357 
1358 		/* Output */
1359 		bool *RequiresDSC,
1360 		double *RequiresFEC,
1361 		double  *OutBpp,
1362 		enum dm_output_type *OutputType,
1363 		enum dm_output_rate *OutputRate,
1364 		unsigned int *RequiredSlots)
1365 {
1366 	bool LinkDSCEnable;
1367 	unsigned int dummy;
1368 	*RequiresDSC = false;
1369 	*RequiresFEC = false;
1370 	*OutBpp = 0;
1371 	*OutputType = dm_output_type_unknown;
1372 	*OutputRate = dm_output_rate_unknown;
1373 
1374 	if (IsMainSurfaceUsingTheIndicatedTiming) {
1375 		if (Output == dm_hdmi) {
1376 			*RequiresDSC = false;
1377 			*RequiresFEC = false;
1378 			*OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive,
1379 					PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat,
1380 					DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1381 					ODMModeNoDSC, ODMModeDSC, &dummy);
1382 			//OutputTypeAndRate = "HDMI";
1383 			*OutputType = dm_output_type_hdmi;
1384 
1385 		} else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
1386 			if (DSCEnable == true) {
1387 				*RequiresDSC = true;
1388 				LinkDSCEnable = true;
1389 				if (Output == dm_dp || Output == dm_dp2p0)
1390 					*RequiresFEC = true;
1391 				else
1392 					*RequiresFEC = false;
1393 			} else {
1394 				*RequiresDSC = false;
1395 				LinkDSCEnable = false;
1396 				if (Output == dm_dp2p0)
1397 					*RequiresFEC = true;
1398 				else
1399 					*RequiresFEC = false;
1400 			}
1401 			if (Output == dm_dp2p0) {
1402 				*OutBpp = 0;
1403 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
1404 						PHYCLKD32PerState >= 10000.0 / 32) {
1405 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1406 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1407 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1408 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1409 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1410 					if (*OutBpp == 0 && PHYCLKD32PerState < 13500.0 / 32 && DSCEnable == true &&
1411 							ForcedOutputLinkBPP == 0) {
1412 						*RequiresDSC = true;
1413 						LinkDSCEnable = true;
1414 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1415 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1416 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1417 								OutputFormat, DSCInputBitPerComponent,
1418 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1419 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1420 					}
1421 					//OutputTypeAndRate = Output & " UHBR10";
1422 					*OutputType = dm_output_type_dp2p0;
1423 					*OutputRate = dm_output_rate_dp_rate_uhbr10;
1424 				}
1425 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
1426 						*OutBpp == 0 && PHYCLKD32PerState >= 13500.0 / 32) {
1427 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1428 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1429 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1430 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1431 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1432 
1433 					if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true &&
1434 							ForcedOutputLinkBPP == 0) {
1435 						*RequiresDSC = true;
1436 						LinkDSCEnable = true;
1437 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1438 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1439 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1440 								OutputFormat, DSCInputBitPerComponent,
1441 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1442 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1443 					}
1444 					//OutputTypeAndRate = Output & " UHBR13p5";
1445 					*OutputType = dm_output_type_dp2p0;
1446 					*OutputRate = dm_output_rate_dp_rate_uhbr13p5;
1447 				}
1448 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
1449 						*OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
1450 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1451 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1452 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1453 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1454 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1455 					if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1456 						*RequiresDSC = true;
1457 						LinkDSCEnable = true;
1458 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1459 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1460 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1461 								OutputFormat, DSCInputBitPerComponent,
1462 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1463 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1464 					}
1465 					//OutputTypeAndRate = Output & " UHBR20";
1466 					*OutputType = dm_output_type_dp2p0;
1467 					*OutputRate = dm_output_rate_dp_rate_uhbr20;
1468 				}
1469 			} else {
1470 				*OutBpp = 0;
1471 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
1472 						PHYCLKPerState >= 270) {
1473 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1474 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1475 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1476 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1477 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1478 					if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true &&
1479 							ForcedOutputLinkBPP == 0) {
1480 						*RequiresDSC = true;
1481 						LinkDSCEnable = true;
1482 						if (Output == dm_dp)
1483 							*RequiresFEC = true;
1484 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1485 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1486 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1487 								OutputFormat, DSCInputBitPerComponent,
1488 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1489 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1490 					}
1491 					//OutputTypeAndRate = Output & " HBR";
1492 					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1493 					*OutputRate = dm_output_rate_dp_rate_hbr;
1494 				}
1495 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
1496 						*OutBpp == 0 && PHYCLKPerState >= 540) {
1497 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1498 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1499 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1500 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1501 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1502 
1503 					if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true &&
1504 							ForcedOutputLinkBPP == 0) {
1505 						*RequiresDSC = true;
1506 						LinkDSCEnable = true;
1507 						if (Output == dm_dp)
1508 							*RequiresFEC = true;
1509 
1510 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1511 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1512 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1513 								OutputFormat, DSCInputBitPerComponent,
1514 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1515 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1516 					}
1517 					//OutputTypeAndRate = Output & " HBR2";
1518 					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1519 					*OutputRate = dm_output_rate_dp_rate_hbr2;
1520 				}
1521 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
1522 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1523 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1524 							ForcedOutputLinkBPP, LinkDSCEnable, Output,
1525 							OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices,
1526 							AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
1527 							RequiredSlots);
1528 
1529 					if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1530 						*RequiresDSC = true;
1531 						LinkDSCEnable = true;
1532 						if (Output == dm_dp)
1533 							*RequiresFEC = true;
1534 
1535 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1536 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1537 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1538 								OutputFormat, DSCInputBitPerComponent,
1539 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1540 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1541 					}
1542 					//OutputTypeAndRate = Output & " HBR3";
1543 					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1544 					*OutputRate = dm_output_rate_dp_rate_hbr3;
1545 				}
1546 			}
1547 		}
1548 	}
1549 }
1550 
dml32_CalculateDPPCLK(unsigned int NumberOfActiveSurfaces,double DISPCLKDPPCLKDSCCLKDownSpreading,double DISPCLKDPPCLKVCOSpeed,double DPPCLKUsingSingleDPP[],unsigned int DPPPerSurface[],double * GlobalDPPCLK,double Dppclk[])1551 void dml32_CalculateDPPCLK(
1552 		unsigned int NumberOfActiveSurfaces,
1553 		double DISPCLKDPPCLKDSCCLKDownSpreading,
1554 		double DISPCLKDPPCLKVCOSpeed,
1555 		double DPPCLKUsingSingleDPP[],
1556 		unsigned int DPPPerSurface[],
1557 
1558 		/* output */
1559 		double *GlobalDPPCLK,
1560 		double Dppclk[])
1561 {
1562 	unsigned int k;
1563 	*GlobalDPPCLK = 0;
1564 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1565 		Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1566 		*GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
1567 	}
1568 	*GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
1569 	for (k = 0; k < NumberOfActiveSurfaces; ++k)
1570 		Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
1571 }
1572 
dml32_TruncToValidBPP(double LinkBitRate,unsigned int Lanes,unsigned int HTotal,unsigned int HActive,double PixelClock,double DesiredBPP,bool DSCEnable,enum output_encoder_class Output,enum output_format_class Format,unsigned int DSCInputBitPerComponent,unsigned int DSCSlices,unsigned int AudioRate,unsigned int AudioLayout,enum odm_combine_mode ODMModeNoDSC,enum odm_combine_mode ODMModeDSC,unsigned int * RequiredSlots)1573 double dml32_TruncToValidBPP(
1574 		double LinkBitRate,
1575 		unsigned int Lanes,
1576 		unsigned int HTotal,
1577 		unsigned int HActive,
1578 		double PixelClock,
1579 		double DesiredBPP,
1580 		bool DSCEnable,
1581 		enum output_encoder_class Output,
1582 		enum output_format_class Format,
1583 		unsigned int DSCInputBitPerComponent,
1584 		unsigned int DSCSlices,
1585 		unsigned int AudioRate,
1586 		unsigned int AudioLayout,
1587 		enum odm_combine_mode ODMModeNoDSC,
1588 		enum odm_combine_mode ODMModeDSC,
1589 		/* Output */
1590 		unsigned int *RequiredSlots)
1591 {
1592 	double    MaxLinkBPP;
1593 	unsigned int   MinDSCBPP;
1594 	double    MaxDSCBPP;
1595 	unsigned int   NonDSCBPP0;
1596 	unsigned int   NonDSCBPP1;
1597 	unsigned int   NonDSCBPP2;
1598 	unsigned int   NonDSCBPP3 = BPP_INVALID;
1599 
1600 	if (Format == dm_420) {
1601 		NonDSCBPP0 = 12;
1602 		NonDSCBPP1 = 15;
1603 		NonDSCBPP2 = 18;
1604 		MinDSCBPP = 6;
1605 		MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
1606 	} else if (Format == dm_444) {
1607 		NonDSCBPP3 = 18;
1608 		NonDSCBPP0 = 24;
1609 		NonDSCBPP1 = 30;
1610 		NonDSCBPP2 = 36;
1611 		MinDSCBPP = 8;
1612 		MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
1613 	} else {
1614 		if (Output == dm_hdmi) {
1615 			NonDSCBPP0 = 24;
1616 			NonDSCBPP1 = 24;
1617 			NonDSCBPP2 = 24;
1618 		} else {
1619 			NonDSCBPP0 = 16;
1620 			NonDSCBPP1 = 20;
1621 			NonDSCBPP2 = 24;
1622 		}
1623 		if (Format == dm_n422) {
1624 			MinDSCBPP = 7;
1625 			MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
1626 		} else {
1627 			MinDSCBPP = 8;
1628 			MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
1629 		}
1630 	}
1631 	if (Output == dm_dp2p0) {
1632 		MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
1633 	} else if (DSCEnable && Output == dm_dp) {
1634 		MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
1635 	} else {
1636 		MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
1637 	}
1638 
1639 	if (DSCEnable) {
1640 		if (ODMModeDSC == dm_odm_combine_mode_4to1)
1641 			MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1642 		else if (ODMModeDSC == dm_odm_combine_mode_2to1)
1643 			MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1644 		else if (ODMModeDSC == dm_odm_split_mode_1to2)
1645 			MaxLinkBPP = 2 * MaxLinkBPP;
1646 	} else {
1647 		if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
1648 			MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1649 		else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
1650 			MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1651 		else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
1652 			MaxLinkBPP = 2 * MaxLinkBPP;
1653 	}
1654 
1655 	*RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
1656 
1657 	if (DesiredBPP == 0) {
1658 		if (DSCEnable) {
1659 			if (MaxLinkBPP < MinDSCBPP)
1660 				return BPP_INVALID;
1661 			else if (MaxLinkBPP >= MaxDSCBPP)
1662 				return MaxDSCBPP;
1663 			else
1664 				return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
1665 		} else {
1666 			if (MaxLinkBPP >= NonDSCBPP2)
1667 				return NonDSCBPP2;
1668 			else if (MaxLinkBPP >= NonDSCBPP1)
1669 				return NonDSCBPP1;
1670 			else if (MaxLinkBPP >= NonDSCBPP0)
1671 				return 16.0;
1672 			else if ((Output == dm_dp2p0 || Output == dm_dp) && NonDSCBPP3 != BPP_INVALID &&  MaxLinkBPP >= NonDSCBPP3)
1673 				return NonDSCBPP3; // Special case to allow 6bpc RGB for DP connections.
1674 			else
1675 				return BPP_INVALID;
1676 		}
1677 	} else {
1678 		if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
1679 				DesiredBPP <= NonDSCBPP0)) ||
1680 				(DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
1681 			return BPP_INVALID;
1682 		else
1683 			return DesiredBPP;
1684 	}
1685 } // TruncToValidBPP
1686 
dml32_RequiredDTBCLK(bool DSCEnable,double PixelClock,enum output_format_class OutputFormat,double OutputBpp,unsigned int DSCSlices,unsigned int HTotal,unsigned int HActive,unsigned int AudioRate,unsigned int AudioLayout)1687 double dml32_RequiredDTBCLK(
1688 		bool              DSCEnable,
1689 		double               PixelClock,
1690 		enum output_format_class  OutputFormat,
1691 		double               OutputBpp,
1692 		unsigned int              DSCSlices,
1693 		unsigned int                 HTotal,
1694 		unsigned int                 HActive,
1695 		unsigned int              AudioRate,
1696 		unsigned int              AudioLayout)
1697 {
1698 	double PixelWordRate;
1699 	double HCActive;
1700 	double HCBlank;
1701 	double AverageTribyteRate;
1702 	double HActiveTribyteRate;
1703 
1704 	if (DSCEnable != true)
1705 		return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
1706 
1707 	PixelWordRate = PixelClock /  (OutputFormat == dm_444 ? 1 : 2);
1708 	HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
1709 			dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
1710 	HCBlank = 64 + 32 *
1711 			dml_ceil(AudioRate *  (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
1712 	AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
1713 	HActiveTribyteRate = PixelWordRate * HCActive / HActive;
1714 	return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
1715 }
1716 
dml32_DSCDelayRequirement(bool DSCEnabled,enum odm_combine_mode ODMMode,unsigned int DSCInputBitPerComponent,double OutputBpp,unsigned int HActive,unsigned int HTotal,unsigned int NumberOfDSCSlices,enum output_format_class OutputFormat,enum output_encoder_class Output,double PixelClock,double PixelClockBackEnd,double dsc_delay_factor_wa)1717 unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
1718 		enum odm_combine_mode ODMMode,
1719 		unsigned int DSCInputBitPerComponent,
1720 		double OutputBpp,
1721 		unsigned int HActive,
1722 		unsigned int HTotal,
1723 		unsigned int NumberOfDSCSlices,
1724 		enum output_format_class  OutputFormat,
1725 		enum output_encoder_class Output,
1726 		double PixelClock,
1727 		double PixelClockBackEnd,
1728 		double dsc_delay_factor_wa)
1729 {
1730 	unsigned int DSCDelayRequirement_val;
1731 
1732 	if (DSCEnabled == true && OutputBpp != 0) {
1733 		if (ODMMode == dm_odm_combine_mode_4to1) {
1734 			DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1735 					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4,
1736 					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1737 		} else if (ODMMode == dm_odm_combine_mode_2to1) {
1738 			DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1739 					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2,
1740 					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1741 		} else {
1742 			DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1743 					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices,
1744 					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output);
1745 		}
1746 
1747 		DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
1748 				dml_ceil((double)DSCDelayRequirement_val / HActive, 1);
1749 
1750 		DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
1751 
1752 	} else {
1753 		DSCDelayRequirement_val = 0;
1754 	}
1755 
1756 #ifdef __DML_VBA_DEBUG__
1757 	dml_print("DML::%s: DSCEnabled              = %d\n", __func__, DSCEnabled);
1758 	dml_print("DML::%s: OutputBpp               = %f\n", __func__, OutputBpp);
1759 	dml_print("DML::%s: HActive                 = %d\n", __func__, HActive);
1760 	dml_print("DML::%s: OutputFormat            = %d\n", __func__, OutputFormat);
1761 	dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent);
1762 	dml_print("DML::%s: NumberOfDSCSlices       = %d\n", __func__, NumberOfDSCSlices);
1763 	dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
1764 #endif
1765 
1766 	return dml_ceil(DSCDelayRequirement_val * dsc_delay_factor_wa, 1);
1767 }
1768 
dml32_CalculateSurfaceSizeInMall(unsigned int NumberOfActiveSurfaces,unsigned int MALLAllocatedForDCN,enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],bool DCCEnable[],bool ViewportStationary[],unsigned int ViewportXStartY[],unsigned int ViewportYStartY[],unsigned int ViewportXStartC[],unsigned int ViewportYStartC[],unsigned int ViewportWidthY[],unsigned int ViewportHeightY[],unsigned int BytesPerPixelY[],unsigned int ViewportWidthC[],unsigned int ViewportHeightC[],unsigned int BytesPerPixelC[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int ReadBlockWidthY[],unsigned int ReadBlockWidthC[],unsigned int ReadBlockHeightY[],unsigned int ReadBlockHeightC[],unsigned int DCCMetaPitchY[],unsigned int DCCMetaPitchC[],unsigned int SurfaceSizeInMALL[],bool * ExceededMALLSize)1769 void dml32_CalculateSurfaceSizeInMall(
1770 		unsigned int NumberOfActiveSurfaces,
1771 		unsigned int MALLAllocatedForDCN,
1772 		enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1773 		enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],
1774 		bool DCCEnable[],
1775 		bool ViewportStationary[],
1776 		unsigned int ViewportXStartY[],
1777 		unsigned int ViewportYStartY[],
1778 		unsigned int ViewportXStartC[],
1779 		unsigned int ViewportYStartC[],
1780 		unsigned int ViewportWidthY[],
1781 		unsigned int ViewportHeightY[],
1782 		unsigned int BytesPerPixelY[],
1783 		unsigned int ViewportWidthC[],
1784 		unsigned int ViewportHeightC[],
1785 		unsigned int BytesPerPixelC[],
1786 		unsigned int SurfaceWidthY[],
1787 		unsigned int SurfaceWidthC[],
1788 		unsigned int SurfaceHeightY[],
1789 		unsigned int SurfaceHeightC[],
1790 		unsigned int Read256BytesBlockWidthY[],
1791 		unsigned int Read256BytesBlockWidthC[],
1792 		unsigned int Read256BytesBlockHeightY[],
1793 		unsigned int Read256BytesBlockHeightC[],
1794 		unsigned int ReadBlockWidthY[],
1795 		unsigned int ReadBlockWidthC[],
1796 		unsigned int ReadBlockHeightY[],
1797 		unsigned int ReadBlockHeightC[],
1798 		unsigned int DCCMetaPitchY[],
1799 		unsigned int DCCMetaPitchC[],
1800 
1801 		/* Output */
1802 		unsigned int    SurfaceSizeInMALL[],
1803 		bool *ExceededMALLSize)
1804 {
1805 	unsigned int k;
1806 	unsigned int TotalSurfaceSizeInMALLForSS = 0;
1807 	unsigned int TotalSurfaceSizeInMALLForSubVP = 0;
1808 	unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024;
1809 
1810 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1811 		if (ViewportStationary[k]) {
1812 			SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]),
1813 					dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
1814 						ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k],
1815 						ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k],
1816 						ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1817 						ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
1818 						dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k];
1819 
1820 			if (ReadBlockWidthC[k] > 0) {
1821 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1822 						dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]),
1823 							dml_floor(ViewportXStartC[k] + ViewportWidthC[k] +
1824 							ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
1825 							dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
1826 							dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]),
1827 							dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1828 							ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
1829 							dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) *
1830 							BytesPerPixelC[k];
1831 			}
1832 			if (DCCEnable[k] == true) {
1833 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1834 						(dml_min(dml_ceil(DCCMetaPitchY[k], 8 * Read256BytesBlockWidthY[k]),
1835 							dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *
1836 							Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
1837 							- dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
1838 							* dml_min(dml_ceil(SurfaceHeightY[k], 8 *
1839 							Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1840 							ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 *
1841 							Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 *
1842 							Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256) + (64 * 1024);
1843 				if (Read256BytesBlockWidthC[k] > 0) {
1844 					SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1845 							dml_min(dml_ceil(DCCMetaPitchC[k], 8 *
1846 								Read256BytesBlockWidthC[k]),
1847 								dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
1848 								* Read256BytesBlockWidthC[k] - 1, 8 *
1849 								Read256BytesBlockWidthC[k]) -
1850 								dml_floor(ViewportXStartC[k], 8 *
1851 								Read256BytesBlockWidthC[k])) *
1852 								dml_min(dml_ceil(SurfaceHeightC[k], 8 *
1853 								Read256BytesBlockHeightC[k]),
1854 								dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1855 								8 * Read256BytesBlockHeightC[k] - 1, 8 *
1856 								Read256BytesBlockHeightC[k]) -
1857 								dml_floor(ViewportYStartC[k], 8 *
1858 								Read256BytesBlockHeightC[k])) *
1859 								BytesPerPixelC[k] / 256;
1860 				}
1861 			}
1862 		} else {
1863 			SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] +
1864 					ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
1865 					dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] +
1866 							ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) *
1867 							BytesPerPixelY[k];
1868 			if (ReadBlockWidthC[k] > 0) {
1869 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1870 						dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] +
1871 								ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
1872 						dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] +
1873 								ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) *
1874 								BytesPerPixelC[k];
1875 			}
1876 			if (DCCEnable[k] == true) {
1877 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1878 						(dml_ceil(dml_min(DCCMetaPitchY[k], ViewportWidthY[k] + 8 *
1879 								Read256BytesBlockWidthY[k] - 1), 8 *
1880 								Read256BytesBlockWidthY[k]) *
1881 						dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
1882 								Read256BytesBlockHeightY[k] - 1), 8 *
1883 								Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256) + (64 * 1024);
1884 
1885 				if (Read256BytesBlockWidthC[k] > 0) {
1886 					SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1887 							dml_ceil(dml_min(DCCMetaPitchC[k], ViewportWidthC[k] + 8 *
1888 									Read256BytesBlockWidthC[k] - 1), 8 *
1889 									Read256BytesBlockWidthC[k]) *
1890 							dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 *
1891 									Read256BytesBlockHeightC[k] - 1), 8 *
1892 									Read256BytesBlockHeightC[k]) *
1893 									BytesPerPixelC[k] / 256;
1894 				}
1895 			}
1896 		}
1897 	}
1898 
1899 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1900 		/* SS and Subvp counted separate as they are never used at the same time */
1901 		if (UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe)
1902 			TotalSurfaceSizeInMALLForSubVP = TotalSurfaceSizeInMALLForSubVP + SurfaceSizeInMALL[k];
1903 		else if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
1904 			TotalSurfaceSizeInMALLForSS = TotalSurfaceSizeInMALLForSS + SurfaceSizeInMALL[k];
1905 	}
1906 	*ExceededMALLSize =  (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) ||
1907 							(TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes);
1908 } // CalculateSurfaceSizeInMall
1909 
dml32_CalculateVMRowAndSwath(unsigned int NumberOfActiveSurfaces,DmlPipe myPipe[],unsigned int SurfaceSizeInMALL[],unsigned int PTEBufferSizeInRequestsLuma,unsigned int PTEBufferSizeInRequestsChroma,unsigned int DCCMetaBufferSizeBytes,enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],unsigned int MALLAllocatedForDCN,double SwathWidthY[],double SwathWidthC[],bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,unsigned int GPUVMMaxPageTableLevels,unsigned int GPUVMMinPageSizeKBytes[],unsigned int HostVMMinPageSize,bool PTEBufferSizeNotExceeded[],bool DCCMetaBufferSizeNotExceeded[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],unsigned int dpte_row_height_luma[],unsigned int dpte_row_height_chroma[],unsigned int dpte_row_height_linear_luma[],unsigned int dpte_row_height_linear_chroma[],unsigned int meta_req_width[],unsigned int meta_req_width_chroma[],unsigned int meta_req_height[],unsigned int meta_req_height_chroma[],unsigned int meta_row_width[],unsigned int meta_row_width_chroma[],unsigned int meta_row_height[],unsigned int meta_row_height_chroma[],unsigned int vm_group_bytes[],unsigned int dpte_group_bytes[],unsigned int PixelPTEReqWidthY[],unsigned int PixelPTEReqHeightY[],unsigned int PTERequestSizeY[],unsigned int PixelPTEReqWidthC[],unsigned int PixelPTEReqHeightC[],unsigned int PTERequestSizeC[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int meta_pte_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],unsigned int meta_pte_bytes_per_frame_ub_c[],double PrefetchSourceLinesY[],double PrefetchSourceLinesC[],double VInitPreFillY[],double VInitPreFillC[],unsigned int MaxNumSwathY[],unsigned int MaxNumSwathC[],double meta_row_bw[],double dpte_row_bw[],double PixelPTEBytesPerRow[],double PDEAndMetaPTEBytesFrame[],double MetaRowByte[],bool use_one_row_for_frame[],bool use_one_row_for_frame_flip[],bool UsesMALLForStaticScreen[],bool PTE_BUFFER_MODE[],unsigned int BIGK_FRAGMENT_SIZE[])1910 void dml32_CalculateVMRowAndSwath(
1911 		unsigned int NumberOfActiveSurfaces,
1912 		DmlPipe myPipe[],
1913 		unsigned int SurfaceSizeInMALL[],
1914 		unsigned int PTEBufferSizeInRequestsLuma,
1915 		unsigned int PTEBufferSizeInRequestsChroma,
1916 		unsigned int DCCMetaBufferSizeBytes,
1917 		enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1918 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
1919 		unsigned int MALLAllocatedForDCN,
1920 		double SwathWidthY[],
1921 		double SwathWidthC[],
1922 		bool GPUVMEnable,
1923 		bool HostVMEnable,
1924 		unsigned int HostVMMaxNonCachedPageTableLevels,
1925 		unsigned int GPUVMMaxPageTableLevels,
1926 		unsigned int GPUVMMinPageSizeKBytes[],
1927 		unsigned int HostVMMinPageSize,
1928 
1929 		/* Output */
1930 		bool PTEBufferSizeNotExceeded[],
1931 		bool DCCMetaBufferSizeNotExceeded[],
1932 		unsigned int dpte_row_width_luma_ub[],
1933 		unsigned int dpte_row_width_chroma_ub[],
1934 		unsigned int dpte_row_height_luma[],
1935 		unsigned int dpte_row_height_chroma[],
1936 		unsigned int dpte_row_height_linear_luma[],     // VBA_DELTA
1937 		unsigned int dpte_row_height_linear_chroma[],   // VBA_DELTA
1938 		unsigned int meta_req_width[],
1939 		unsigned int meta_req_width_chroma[],
1940 		unsigned int meta_req_height[],
1941 		unsigned int meta_req_height_chroma[],
1942 		unsigned int meta_row_width[],
1943 		unsigned int meta_row_width_chroma[],
1944 		unsigned int meta_row_height[],
1945 		unsigned int meta_row_height_chroma[],
1946 		unsigned int vm_group_bytes[],
1947 		unsigned int dpte_group_bytes[],
1948 		unsigned int PixelPTEReqWidthY[],
1949 		unsigned int PixelPTEReqHeightY[],
1950 		unsigned int PTERequestSizeY[],
1951 		unsigned int PixelPTEReqWidthC[],
1952 		unsigned int PixelPTEReqHeightC[],
1953 		unsigned int PTERequestSizeC[],
1954 		unsigned int dpde0_bytes_per_frame_ub_l[],
1955 		unsigned int meta_pte_bytes_per_frame_ub_l[],
1956 		unsigned int dpde0_bytes_per_frame_ub_c[],
1957 		unsigned int meta_pte_bytes_per_frame_ub_c[],
1958 		double PrefetchSourceLinesY[],
1959 		double PrefetchSourceLinesC[],
1960 		double VInitPreFillY[],
1961 		double VInitPreFillC[],
1962 		unsigned int MaxNumSwathY[],
1963 		unsigned int MaxNumSwathC[],
1964 		double meta_row_bw[],
1965 		double dpte_row_bw[],
1966 		double PixelPTEBytesPerRow[],
1967 		double PDEAndMetaPTEBytesFrame[],
1968 		double MetaRowByte[],
1969 		bool use_one_row_for_frame[],
1970 		bool use_one_row_for_frame_flip[],
1971 		bool UsesMALLForStaticScreen[],
1972 		bool PTE_BUFFER_MODE[],
1973 		unsigned int BIGK_FRAGMENT_SIZE[])
1974 {
1975 	unsigned int k;
1976 	unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
1977 	unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
1978 	unsigned int PDEAndMetaPTEBytesFrameY;
1979 	unsigned int PDEAndMetaPTEBytesFrameC;
1980 	unsigned int MetaRowByteY[DC__NUM_DPP__MAX] = {0};
1981 	unsigned int MetaRowByteC[DC__NUM_DPP__MAX] = {0};
1982 	unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
1983 	unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
1984 	unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
1985 	unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
1986 	unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1987 	unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
1988 	unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1989 	unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
1990 	bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
1991 
1992 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1993 		if (HostVMEnable == true) {
1994 			vm_group_bytes[k] = 512;
1995 			dpte_group_bytes[k] = 512;
1996 		} else if (GPUVMEnable == true) {
1997 			vm_group_bytes[k] = 2048;
1998 			if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation))
1999 				dpte_group_bytes[k] = 512;
2000 			else
2001 				dpte_group_bytes[k] = 2048;
2002 		} else {
2003 			vm_group_bytes[k] = 0;
2004 			dpte_group_bytes[k] = 0;
2005 		}
2006 
2007 		if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
2008 				myPipe[k].SourcePixelFormat == dm_420_12 ||
2009 				myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
2010 			if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
2011 					!IsVertical(myPipe[k].SourceRotation)) {
2012 				PTEBufferSizeInRequestsForLuma[k] =
2013 						(PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
2014 				PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k];
2015 			} else {
2016 				PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
2017 				PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
2018 			}
2019 
2020 			PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
2021 					myPipe[k].ViewportStationary,
2022 					myPipe[k].DCCEnable,
2023 					myPipe[k].DPPPerSurface,
2024 					myPipe[k].BlockHeight256BytesC,
2025 					myPipe[k].BlockWidth256BytesC,
2026 					myPipe[k].SourcePixelFormat,
2027 					myPipe[k].SurfaceTiling,
2028 					myPipe[k].BytePerPixelC,
2029 					myPipe[k].SourceRotation,
2030 					SwathWidthC[k],
2031 					myPipe[k].ViewportHeightChroma,
2032 					myPipe[k].ViewportXStartC,
2033 					myPipe[k].ViewportYStartC,
2034 					GPUVMEnable,
2035 					HostVMEnable,
2036 					HostVMMaxNonCachedPageTableLevels,
2037 					GPUVMMaxPageTableLevels,
2038 					GPUVMMinPageSizeKBytes[k],
2039 					HostVMMinPageSize,
2040 					PTEBufferSizeInRequestsForChroma[k],
2041 					myPipe[k].PitchC,
2042 					myPipe[k].DCCMetaPitchC,
2043 					myPipe[k].BlockWidthC,
2044 					myPipe[k].BlockHeightC,
2045 
2046 					/* Output */
2047 					&MetaRowByteC[k],
2048 					&PixelPTEBytesPerRowC[k],
2049 					&dpte_row_width_chroma_ub[k],
2050 					&dpte_row_height_chroma[k],
2051 					&dpte_row_height_linear_chroma[k],
2052 					&PixelPTEBytesPerRowC_one_row_per_frame[k],
2053 					&dpte_row_width_chroma_ub_one_row_per_frame[k],
2054 					&dpte_row_height_chroma_one_row_per_frame[k],
2055 					&meta_req_width_chroma[k],
2056 					&meta_req_height_chroma[k],
2057 					&meta_row_width_chroma[k],
2058 					&meta_row_height_chroma[k],
2059 					&PixelPTEReqWidthC[k],
2060 					&PixelPTEReqHeightC[k],
2061 					&PTERequestSizeC[k],
2062 					&dpde0_bytes_per_frame_ub_c[k],
2063 					&meta_pte_bytes_per_frame_ub_c[k]);
2064 
2065 			PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
2066 					myPipe[k].VRatioChroma,
2067 					myPipe[k].VTapsChroma,
2068 					myPipe[k].InterlaceEnable,
2069 					myPipe[k].ProgressiveToInterlaceUnitInOPP,
2070 					myPipe[k].SwathHeightC,
2071 					myPipe[k].SourceRotation,
2072 					myPipe[k].ViewportStationary,
2073 					SwathWidthC[k],
2074 					myPipe[k].ViewportHeightChroma,
2075 					myPipe[k].ViewportXStartC,
2076 					myPipe[k].ViewportYStartC,
2077 
2078 					/* Output */
2079 					&VInitPreFillC[k],
2080 					&MaxNumSwathC[k]);
2081 		} else {
2082 			PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
2083 			PTEBufferSizeInRequestsForChroma[k] = 0;
2084 			PixelPTEBytesPerRowC[k] = 0;
2085 			PDEAndMetaPTEBytesFrameC = 0;
2086 			MetaRowByteC[k] = 0;
2087 			MaxNumSwathC[k] = 0;
2088 			PrefetchSourceLinesC[k] = 0;
2089 			dpte_row_height_chroma_one_row_per_frame[k] = 0;
2090 			dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
2091 			PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
2092 		}
2093 
2094 		PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
2095 				myPipe[k].ViewportStationary,
2096 				myPipe[k].DCCEnable,
2097 				myPipe[k].DPPPerSurface,
2098 				myPipe[k].BlockHeight256BytesY,
2099 				myPipe[k].BlockWidth256BytesY,
2100 				myPipe[k].SourcePixelFormat,
2101 				myPipe[k].SurfaceTiling,
2102 				myPipe[k].BytePerPixelY,
2103 				myPipe[k].SourceRotation,
2104 				SwathWidthY[k],
2105 				myPipe[k].ViewportHeight,
2106 				myPipe[k].ViewportXStart,
2107 				myPipe[k].ViewportYStart,
2108 				GPUVMEnable,
2109 				HostVMEnable,
2110 				HostVMMaxNonCachedPageTableLevels,
2111 				GPUVMMaxPageTableLevels,
2112 				GPUVMMinPageSizeKBytes[k],
2113 				HostVMMinPageSize,
2114 				PTEBufferSizeInRequestsForLuma[k],
2115 				myPipe[k].PitchY,
2116 				myPipe[k].DCCMetaPitchY,
2117 				myPipe[k].BlockWidthY,
2118 				myPipe[k].BlockHeightY,
2119 
2120 				/* Output */
2121 				&MetaRowByteY[k],
2122 				&PixelPTEBytesPerRowY[k],
2123 				&dpte_row_width_luma_ub[k],
2124 				&dpte_row_height_luma[k],
2125 				&dpte_row_height_linear_luma[k],
2126 				&PixelPTEBytesPerRowY_one_row_per_frame[k],
2127 				&dpte_row_width_luma_ub_one_row_per_frame[k],
2128 				&dpte_row_height_luma_one_row_per_frame[k],
2129 				&meta_req_width[k],
2130 				&meta_req_height[k],
2131 				&meta_row_width[k],
2132 				&meta_row_height[k],
2133 				&PixelPTEReqWidthY[k],
2134 				&PixelPTEReqHeightY[k],
2135 				&PTERequestSizeY[k],
2136 				&dpde0_bytes_per_frame_ub_l[k],
2137 				&meta_pte_bytes_per_frame_ub_l[k]);
2138 
2139 		PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
2140 				myPipe[k].VRatio,
2141 				myPipe[k].VTaps,
2142 				myPipe[k].InterlaceEnable,
2143 				myPipe[k].ProgressiveToInterlaceUnitInOPP,
2144 				myPipe[k].SwathHeightY,
2145 				myPipe[k].SourceRotation,
2146 				myPipe[k].ViewportStationary,
2147 				SwathWidthY[k],
2148 				myPipe[k].ViewportHeight,
2149 				myPipe[k].ViewportXStart,
2150 				myPipe[k].ViewportYStart,
2151 
2152 				/* Output */
2153 				&VInitPreFillY[k],
2154 				&MaxNumSwathY[k]);
2155 
2156 		PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2157 		MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k];
2158 
2159 		if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] &&
2160 				PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) {
2161 			PTEBufferSizeNotExceeded[k] = true;
2162 		} else {
2163 			PTEBufferSizeNotExceeded[k] = false;
2164 		}
2165 
2166 		one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
2167 			PTEBufferSizeInRequestsForLuma[k] &&
2168 			PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]);
2169 	}
2170 
2171 	dml32_CalculateMALLUseForStaticScreen(
2172 			NumberOfActiveSurfaces,
2173 			MALLAllocatedForDCN,
2174 			UseMALLForStaticScreen,   // mode
2175 			SurfaceSizeInMALL,
2176 			one_row_per_frame_fits_in_buffer,
2177 			/* Output */
2178 			UsesMALLForStaticScreen); // boolen
2179 
2180 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2181 		PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2182 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2183 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2184 				(GPUVMMinPageSizeKBytes[k] > 64);
2185 		BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12;
2186 	}
2187 
2188 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2189 #ifdef __DML_VBA_DEBUG__
2190 		dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n",  __func__, k, SurfaceSizeInMALL[k]);
2191 		dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
2192 #endif
2193 		use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2194 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2195 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2196 				(GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation));
2197 
2198 		use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
2199 				!(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
2200 
2201 		if (use_one_row_for_frame[k]) {
2202 			dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k];
2203 			dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k];
2204 			PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k];
2205 			dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k];
2206 			dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k];
2207 			PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k];
2208 			PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k];
2209 		}
2210 
2211 		if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
2212 			DCCMetaBufferSizeNotExceeded[k] = true;
2213 		else
2214 			DCCMetaBufferSizeNotExceeded[k] = false;
2215 
2216 		PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k];
2217 		if (use_one_row_for_frame[k])
2218 			PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
2219 
2220 		dml32_CalculateRowBandwidth(
2221 				GPUVMEnable,
2222 				myPipe[k].SourcePixelFormat,
2223 				myPipe[k].VRatio,
2224 				myPipe[k].VRatioChroma,
2225 				myPipe[k].DCCEnable,
2226 				myPipe[k].HTotal / myPipe[k].PixelClock,
2227 				MetaRowByteY[k], MetaRowByteC[k],
2228 				meta_row_height[k],
2229 				meta_row_height_chroma[k],
2230 				PixelPTEBytesPerRowY[k],
2231 				PixelPTEBytesPerRowC[k],
2232 				dpte_row_height_luma[k],
2233 				dpte_row_height_chroma[k],
2234 
2235 				/* Output */
2236 				&meta_row_bw[k],
2237 				&dpte_row_bw[k]);
2238 #ifdef __DML_VBA_DEBUG__
2239 		dml_print("DML::%s: k=%d, use_one_row_for_frame        = %d\n",  __func__, k, use_one_row_for_frame[k]);
2240 		dml_print("DML::%s: k=%d, use_one_row_for_frame_flip   = %d\n",
2241 				__func__, k, use_one_row_for_frame_flip[k]);
2242 		dml_print("DML::%s: k=%d, UseMALLForPStateChange       = %d\n",
2243 				__func__, k, UseMALLForPStateChange[k]);
2244 		dml_print("DML::%s: k=%d, dpte_row_height_luma         = %d\n",  __func__, k, dpte_row_height_luma[k]);
2245 		dml_print("DML::%s: k=%d, dpte_row_width_luma_ub       = %d\n",
2246 				__func__, k, dpte_row_width_luma_ub[k]);
2247 		dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY         = %d\n",  __func__, k, PixelPTEBytesPerRowY[k]);
2248 		dml_print("DML::%s: k=%d, dpte_row_height_chroma       = %d\n",
2249 				__func__, k, dpte_row_height_chroma[k]);
2250 		dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub     = %d\n",
2251 				__func__, k, dpte_row_width_chroma_ub[k]);
2252 		dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC         = %d\n",  __func__, k, PixelPTEBytesPerRowC[k]);
2253 		dml_print("DML::%s: k=%d, PixelPTEBytesPerRow          = %d\n",  __func__, k, PixelPTEBytesPerRow[k]);
2254 		dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded     = %d\n",
2255 				__func__, k, PTEBufferSizeNotExceeded[k]);
2256 		dml_print("DML::%s: k=%d, PTE_BUFFER_MODE              = %d\n", __func__, k, PTE_BUFFER_MODE[k]);
2257 		dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE           = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]);
2258 #endif
2259 	}
2260 } // CalculateVMRowAndSwath
2261 
dml32_CalculateVMAndRowBytes(bool ViewportStationary,bool DCCEnable,unsigned int NumberOfDPPs,unsigned int BlockHeight256Bytes,unsigned int BlockWidth256Bytes,enum source_format_class SourcePixelFormat,unsigned int SurfaceTiling,unsigned int BytePerPixel,enum dm_rotation_angle SourceRotation,double SwathWidth,unsigned int ViewportHeight,unsigned int ViewportXStart,unsigned int ViewportYStart,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,unsigned int GPUVMMaxPageTableLevels,unsigned int GPUVMMinPageSizeKBytes,unsigned int HostVMMinPageSize,unsigned int PTEBufferSizeInRequests,unsigned int Pitch,unsigned int DCCMetaPitch,unsigned int MacroTileWidth,unsigned int MacroTileHeight,unsigned int * MetaRowByte,unsigned int * PixelPTEBytesPerRow,unsigned int * dpte_row_width_ub,unsigned int * dpte_row_height,unsigned int * dpte_row_height_linear,unsigned int * PixelPTEBytesPerRow_one_row_per_frame,unsigned int * dpte_row_width_ub_one_row_per_frame,unsigned int * dpte_row_height_one_row_per_frame,unsigned int * MetaRequestWidth,unsigned int * MetaRequestHeight,unsigned int * meta_row_width,unsigned int * meta_row_height,unsigned int * PixelPTEReqWidth,unsigned int * PixelPTEReqHeight,unsigned int * PTERequestSize,unsigned int * DPDE0BytesFrame,unsigned int * MetaPTEBytesFrame)2262 unsigned int dml32_CalculateVMAndRowBytes(
2263 		bool ViewportStationary,
2264 		bool DCCEnable,
2265 		unsigned int NumberOfDPPs,
2266 		unsigned int BlockHeight256Bytes,
2267 		unsigned int BlockWidth256Bytes,
2268 		enum source_format_class SourcePixelFormat,
2269 		unsigned int SurfaceTiling,
2270 		unsigned int BytePerPixel,
2271 		enum dm_rotation_angle SourceRotation,
2272 		double SwathWidth,
2273 		unsigned int ViewportHeight,
2274 		unsigned int    ViewportXStart,
2275 		unsigned int    ViewportYStart,
2276 		bool GPUVMEnable,
2277 		bool HostVMEnable,
2278 		unsigned int HostVMMaxNonCachedPageTableLevels,
2279 		unsigned int GPUVMMaxPageTableLevels,
2280 		unsigned int GPUVMMinPageSizeKBytes,
2281 		unsigned int HostVMMinPageSize,
2282 		unsigned int PTEBufferSizeInRequests,
2283 		unsigned int Pitch,
2284 		unsigned int DCCMetaPitch,
2285 		unsigned int MacroTileWidth,
2286 		unsigned int MacroTileHeight,
2287 
2288 		/* Output */
2289 		unsigned int *MetaRowByte,
2290 		unsigned int *PixelPTEBytesPerRow,
2291 		unsigned int    *dpte_row_width_ub,
2292 		unsigned int *dpte_row_height,
2293 		unsigned int *dpte_row_height_linear,
2294 		unsigned int    *PixelPTEBytesPerRow_one_row_per_frame,
2295 		unsigned int    *dpte_row_width_ub_one_row_per_frame,
2296 		unsigned int    *dpte_row_height_one_row_per_frame,
2297 		unsigned int *MetaRequestWidth,
2298 		unsigned int *MetaRequestHeight,
2299 		unsigned int *meta_row_width,
2300 		unsigned int *meta_row_height,
2301 		unsigned int *PixelPTEReqWidth,
2302 		unsigned int *PixelPTEReqHeight,
2303 		unsigned int *PTERequestSize,
2304 		unsigned int    *DPDE0BytesFrame,
2305 		unsigned int    *MetaPTEBytesFrame)
2306 {
2307 	unsigned int MPDEBytesFrame;
2308 	unsigned int DCCMetaSurfaceBytes;
2309 	unsigned int ExtraDPDEBytesFrame;
2310 	unsigned int PDEAndMetaPTEBytesFrame;
2311 	unsigned int HostVMDynamicLevels = 0;
2312 	unsigned int    MacroTileSizeBytes;
2313 	unsigned int    vp_height_meta_ub;
2314 	unsigned int    vp_height_dpte_ub;
2315 	unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
2316 
2317 	if (GPUVMEnable == true && HostVMEnable == true) {
2318 		if (HostVMMinPageSize < 2048)
2319 			HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
2320 		else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
2321 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
2322 		else
2323 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
2324 	}
2325 
2326 	*MetaRequestHeight = 8 * BlockHeight256Bytes;
2327 	*MetaRequestWidth = 8 * BlockWidth256Bytes;
2328 	if (SurfaceTiling == dm_sw_linear) {
2329 		*meta_row_height = 32;
2330 		*meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth)
2331 				- dml_floor(ViewportXStart, *MetaRequestWidth);
2332 	} else if (!IsVertical(SourceRotation)) {
2333 		*meta_row_height = *MetaRequestHeight;
2334 		if (ViewportStationary && NumberOfDPPs == 1) {
2335 			*meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
2336 					*MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth);
2337 		} else {
2338 			*meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
2339 		}
2340 		*MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
2341 	} else {
2342 		*meta_row_height = *MetaRequestWidth;
2343 		if (ViewportStationary && NumberOfDPPs == 1) {
2344 			*meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
2345 					*MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight);
2346 		} else {
2347 			*meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
2348 		}
2349 		*MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
2350 	}
2351 
2352 	if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2353 		vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
2354 				64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes);
2355 	} else if (!IsVertical(SourceRotation)) {
2356 		vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2357 	} else {
2358 		vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2359 	}
2360 
2361 	DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;
2362 
2363 	if (GPUVMEnable == true) {
2364 		*MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
2365 				(8 * 4.0 * 1024), 1) + 1) * 64;
2366 		MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
2367 	} else {
2368 		*MetaPTEBytesFrame = 0;
2369 		MPDEBytesFrame = 0;
2370 	}
2371 
2372 	if (DCCEnable != true) {
2373 		*MetaPTEBytesFrame = 0;
2374 		MPDEBytesFrame = 0;
2375 		*MetaRowByte = 0;
2376 	}
2377 
2378 	MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
2379 
2380 	if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
2381 		if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2382 			vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight +
2383 					MacroTileHeight - 1, MacroTileHeight) -
2384 					dml_floor(ViewportYStart, MacroTileHeight);
2385 		} else if (!IsVertical(SourceRotation)) {
2386 			vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight;
2387 		} else {
2388 			vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight;
2389 		}
2390 		*DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
2391 				(8 * 2097152), 1) + 1);
2392 		ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
2393 	} else {
2394 		*DPDE0BytesFrame = 0;
2395 		ExtraDPDEBytesFrame = 0;
2396 		vp_height_dpte_ub = 0;
2397 	}
2398 
2399 	PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2400 
2401 #ifdef __DML_VBA_DEBUG__
2402 	dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
2403 	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
2404 	dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear);
2405 	dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel);
2406 	dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels);
2407 	dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes);
2408 	dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes);
2409 	dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight);
2410 	dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth);
2411 	dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
2412 	dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
2413 	dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
2414 	dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
2415 	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2416 	dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight);
2417 	dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth);
2418 	dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub);
2419 #endif
2420 
2421 	if (HostVMEnable == true)
2422 		PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
2423 
2424 	if (SurfaceTiling == dm_sw_linear) {
2425 		*PixelPTEReqHeight = 1;
2426 		*PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2427 		PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2428 		*PTERequestSize = 64;
2429 	} else if (GPUVMMinPageSizeKBytes == 4) {
2430 		*PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2431 		*PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2432 		*PTERequestSize = 128;
2433 	} else {
2434 		*PixelPTEReqHeight = MacroTileHeight;
2435 		*PixelPTEReqWidth = 8 *  1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
2436 		*PTERequestSize = 64;
2437 	}
2438 #ifdef __DML_VBA_DEBUG__
2439 	dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2440 	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
2441 	dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight);
2442 	dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth);
2443 	dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear);
2444 	dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize);
2445 	dml_print("DML::%s: Pitch = %d\n", __func__, Pitch);
2446 #endif
2447 
2448 	*dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
2449 	*dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
2450 			(double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) *
2451 					(double) *PixelPTEReqWidth;
2452 	*PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
2453 			*PTERequestSize;
2454 
2455 	if (SurfaceTiling == dm_sw_linear) {
2456 		*dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2457 				*PixelPTEReqWidth / Pitch), 1));
2458 #ifdef __DML_VBA_DEBUG__
2459 		dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,
2460 				PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
2461 		dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,
2462 				dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
2463 		dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,
2464 				dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2465 		dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,
2466 				1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2467 						*PixelPTEReqWidth / Pitch), 1));
2468 		dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2469 #endif
2470 		*dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1),
2471 				(double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
2472 		*PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;
2473 
2474 		// VBA_DELTA, VBA doesn't have programming value for pte row height linear.
2475 		*dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2476 				PixelPTEReqWidth_linear / Pitch), 1);
2477 		if (*dpte_row_height_linear > 128)
2478 			*dpte_row_height_linear = 128;
2479 
2480 	} else if (!IsVertical(SourceRotation)) {
2481 		*dpte_row_height = *PixelPTEReqHeight;
2482 
2483 		if (GPUVMMinPageSizeKBytes > 64) {
2484 			*dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
2485 					*PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2486 		} else if (ViewportStationary && (NumberOfDPPs == 1)) {
2487 			*dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth +
2488 					*PixelPTEReqWidth - 1, *PixelPTEReqWidth) -
2489 					dml_floor(ViewportXStart, *PixelPTEReqWidth);
2490 		} else {
2491 			*dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) *
2492 					*PixelPTEReqWidth;
2493 		}
2494 
2495 		*PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2496 	} else {
2497 		*dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth);
2498 
2499 		if (ViewportStationary && (NumberOfDPPs == 1)) {
2500 			*dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
2501 					*PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight);
2502 		} else {
2503 			*dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1)
2504 					* *PixelPTEReqHeight;
2505 		}
2506 
2507 		*PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2508 	}
2509 
2510 	if (GPUVMEnable != true)
2511 		*PixelPTEBytesPerRow = 0;
2512 	if (HostVMEnable == true)
2513 		*PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2514 
2515 #ifdef __DML_VBA_DEBUG__
2516 	dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2517 	dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2518 	dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear);
2519 	dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub);
2520 	dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow);
2521 	dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests);
2522 	dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame);
2523 	dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",
2524 			__func__, *dpte_row_width_ub_one_row_per_frame);
2525 	dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",
2526 			__func__, *PixelPTEBytesPerRow_one_row_per_frame);
2527 	dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",
2528 			*MetaPTEBytesFrame);
2529 #endif
2530 
2531 	return PDEAndMetaPTEBytesFrame;
2532 } // CalculateVMAndRowBytes
2533 
dml32_CalculatePrefetchSourceLines(double VRatio,unsigned int VTaps,bool Interlace,bool ProgressiveToInterlaceUnitInOPP,unsigned int SwathHeight,enum dm_rotation_angle SourceRotation,bool ViewportStationary,double SwathWidth,unsigned int ViewportHeight,unsigned int ViewportXStart,unsigned int ViewportYStart,double * VInitPreFill,unsigned int * MaxNumSwath)2534 double dml32_CalculatePrefetchSourceLines(
2535 		double VRatio,
2536 		unsigned int VTaps,
2537 		bool Interlace,
2538 		bool ProgressiveToInterlaceUnitInOPP,
2539 		unsigned int SwathHeight,
2540 		enum dm_rotation_angle SourceRotation,
2541 		bool ViewportStationary,
2542 		double SwathWidth,
2543 		unsigned int ViewportHeight,
2544 		unsigned int ViewportXStart,
2545 		unsigned int ViewportYStart,
2546 
2547 		/* Output */
2548 		double *VInitPreFill,
2549 		unsigned int *MaxNumSwath)
2550 {
2551 
2552 	unsigned int vp_start_rot;
2553 	unsigned int sw0_tmp;
2554 	unsigned int MaxPartialSwath;
2555 	double numLines;
2556 
2557 #ifdef __DML_VBA_DEBUG__
2558 	dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
2559 	dml_print("DML::%s: VTaps = %d\n", __func__, VTaps);
2560 	dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart);
2561 	dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart);
2562 	dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary);
2563 	dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
2564 #endif
2565 	if (ProgressiveToInterlaceUnitInOPP)
2566 		*VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1);
2567 	else
2568 		*VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
2569 
2570 	if (ViewportStationary) {
2571 		if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
2572 			vp_start_rot = SwathHeight -
2573 					(((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
2574 		} else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
2575 			vp_start_rot = ViewportXStart;
2576 		} else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
2577 			vp_start_rot = SwathHeight -
2578 					(((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
2579 		} else {
2580 			vp_start_rot = ViewportYStart;
2581 		}
2582 		sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
2583 		if (sw0_tmp < *VInitPreFill)
2584 			*MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1;
2585 		else
2586 			*MaxNumSwath = 1;
2587 		MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
2588 	} else {
2589 		*MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1;
2590 		if (*VInitPreFill > 1)
2591 			MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight);
2592 		else
2593 			MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
2594 	}
2595 	numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
2596 
2597 #ifdef __DML_VBA_DEBUG__
2598 	dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot);
2599 	dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill);
2600 	dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
2601 	dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
2602 	dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
2603 #endif
2604 	return numLines;
2605 
2606 } // CalculatePrefetchSourceLines
2607 
dml32_CalculateMALLUseForStaticScreen(unsigned int NumberOfActiveSurfaces,unsigned int MALLAllocatedForDCNFinal,enum dm_use_mall_for_static_screen_mode * UseMALLForStaticScreen,unsigned int SurfaceSizeInMALL[],bool one_row_per_frame_fits_in_buffer[],bool UsesMALLForStaticScreen[])2608 void dml32_CalculateMALLUseForStaticScreen(
2609 		unsigned int NumberOfActiveSurfaces,
2610 		unsigned int MALLAllocatedForDCNFinal,
2611 		enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
2612 		unsigned int SurfaceSizeInMALL[],
2613 		bool one_row_per_frame_fits_in_buffer[],
2614 
2615 		/* output */
2616 		bool UsesMALLForStaticScreen[])
2617 {
2618 	unsigned int k;
2619 	unsigned int SurfaceToAddToMALL;
2620 	bool CanAddAnotherSurfaceToMALL;
2621 	unsigned int TotalSurfaceSizeInMALL;
2622 
2623 	TotalSurfaceSizeInMALL = 0;
2624 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2625 		UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
2626 		if (UsesMALLForStaticScreen[k])
2627 			TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
2628 #ifdef __DML_VBA_DEBUG__
2629 		dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
2630 		dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n",  __func__, k, TotalSurfaceSizeInMALL);
2631 #endif
2632 	}
2633 
2634 	SurfaceToAddToMALL = 0;
2635 	CanAddAnotherSurfaceToMALL = true;
2636 	while (CanAddAnotherSurfaceToMALL) {
2637 		CanAddAnotherSurfaceToMALL = false;
2638 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2639 			if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
2640 					!UsesMALLForStaticScreen[k] &&
2641 					UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
2642 					one_row_per_frame_fits_in_buffer[k] &&
2643 					(!CanAddAnotherSurfaceToMALL ||
2644 					SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
2645 				CanAddAnotherSurfaceToMALL = true;
2646 				SurfaceToAddToMALL = k;
2647 #ifdef __DML_VBA_DEBUG__
2648 				dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",
2649 						__func__, k, UseMALLForStaticScreen[k]);
2650 #endif
2651 			}
2652 		}
2653 		if (CanAddAnotherSurfaceToMALL) {
2654 			UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
2655 			TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
2656 
2657 #ifdef __DML_VBA_DEBUG__
2658 			dml_print("DML::%s: SurfaceToAddToMALL       = %d\n",  __func__, SurfaceToAddToMALL);
2659 			dml_print("DML::%s: TotalSurfaceSizeInMALL   = %d\n",  __func__, TotalSurfaceSizeInMALL);
2660 #endif
2661 
2662 		}
2663 	}
2664 }
2665 
dml32_CalculateRowBandwidth(bool GPUVMEnable,enum source_format_class SourcePixelFormat,double VRatio,double VRatioChroma,bool DCCEnable,double LineTime,unsigned int MetaRowByteLuma,unsigned int MetaRowByteChroma,unsigned int meta_row_height_luma,unsigned int meta_row_height_chroma,unsigned int PixelPTEBytesPerRowLuma,unsigned int PixelPTEBytesPerRowChroma,unsigned int dpte_row_height_luma,unsigned int dpte_row_height_chroma,double * meta_row_bw,double * dpte_row_bw)2666 void dml32_CalculateRowBandwidth(
2667 		bool GPUVMEnable,
2668 		enum source_format_class SourcePixelFormat,
2669 		double VRatio,
2670 		double VRatioChroma,
2671 		bool DCCEnable,
2672 		double LineTime,
2673 		unsigned int MetaRowByteLuma,
2674 		unsigned int MetaRowByteChroma,
2675 		unsigned int meta_row_height_luma,
2676 		unsigned int meta_row_height_chroma,
2677 		unsigned int PixelPTEBytesPerRowLuma,
2678 		unsigned int PixelPTEBytesPerRowChroma,
2679 		unsigned int dpte_row_height_luma,
2680 		unsigned int dpte_row_height_chroma,
2681 		/* Output */
2682 		double *meta_row_bw,
2683 		double *dpte_row_bw)
2684 {
2685 	if (DCCEnable != true) {
2686 		*meta_row_bw = 0;
2687 	} else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2688 			SourcePixelFormat == dm_rgbe_alpha) {
2689 		*meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
2690 				MetaRowByteChroma / (meta_row_height_chroma * LineTime);
2691 	} else {
2692 		*meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
2693 	}
2694 
2695 	if (GPUVMEnable != true) {
2696 		*dpte_row_bw = 0;
2697 	} else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2698 			SourcePixelFormat == dm_rgbe_alpha) {
2699 		*dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
2700 				VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
2701 	} else {
2702 		*dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
2703 	}
2704 }
2705 
dml32_CalculateUrgentLatency(double UrgentLatencyPixelDataOnly,double UrgentLatencyPixelMixedWithVMData,double UrgentLatencyVMDataOnly,bool DoUrgentLatencyAdjustment,double UrgentLatencyAdjustmentFabricClockComponent,double UrgentLatencyAdjustmentFabricClockReference,double FabricClock)2706 double dml32_CalculateUrgentLatency(
2707 		double UrgentLatencyPixelDataOnly,
2708 		double UrgentLatencyPixelMixedWithVMData,
2709 		double UrgentLatencyVMDataOnly,
2710 		bool   DoUrgentLatencyAdjustment,
2711 		double UrgentLatencyAdjustmentFabricClockComponent,
2712 		double UrgentLatencyAdjustmentFabricClockReference,
2713 		double FabricClock)
2714 {
2715 	double   ret;
2716 
2717 	ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
2718 	if (DoUrgentLatencyAdjustment == true) {
2719 		ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
2720 				(UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
2721 	}
2722 	return ret;
2723 }
2724 
dml32_CalculateUrgentBurstFactor(enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,unsigned int swath_width_luma_ub,unsigned int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double LineTime,double UrgentLatency,double CursorBufferSize,unsigned int CursorWidth,unsigned int CursorBPP,double VRatio,double VRatioC,double BytePerPixelInDETY,double BytePerPixelInDETC,unsigned int DETBufferSizeY,unsigned int DETBufferSizeC,double * UrgentBurstFactorCursor,double * UrgentBurstFactorLuma,double * UrgentBurstFactorChroma,bool * NotEnoughUrgentLatencyHiding)2725 void dml32_CalculateUrgentBurstFactor(
2726 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
2727 		unsigned int    swath_width_luma_ub,
2728 		unsigned int    swath_width_chroma_ub,
2729 		unsigned int SwathHeightY,
2730 		unsigned int SwathHeightC,
2731 		double  LineTime,
2732 		double  UrgentLatency,
2733 		double  CursorBufferSize,
2734 		unsigned int CursorWidth,
2735 		unsigned int CursorBPP,
2736 		double  VRatio,
2737 		double  VRatioC,
2738 		double  BytePerPixelInDETY,
2739 		double  BytePerPixelInDETC,
2740 		unsigned int    DETBufferSizeY,
2741 		unsigned int    DETBufferSizeC,
2742 		/* Output */
2743 		double *UrgentBurstFactorCursor,
2744 		double *UrgentBurstFactorLuma,
2745 		double *UrgentBurstFactorChroma,
2746 		bool   *NotEnoughUrgentLatencyHiding)
2747 {
2748 	double       LinesInDETLuma;
2749 	double       LinesInDETChroma;
2750 	unsigned int LinesInCursorBuffer;
2751 	double       CursorBufferSizeInTime;
2752 	double       DETBufferSizeInTimeLuma;
2753 	double       DETBufferSizeInTimeChroma;
2754 
2755 	*NotEnoughUrgentLatencyHiding = 0;
2756 
2757 	if (CursorWidth > 0) {
2758 		LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 /
2759 				(CursorWidth * CursorBPP / 8.0)), 1.0);
2760 		if (VRatio > 0) {
2761 			CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
2762 			if (CursorBufferSizeInTime - UrgentLatency <= 0) {
2763 				*NotEnoughUrgentLatencyHiding = 1;
2764 				*UrgentBurstFactorCursor = 0;
2765 			} else {
2766 				*UrgentBurstFactorCursor = CursorBufferSizeInTime /
2767 						(CursorBufferSizeInTime - UrgentLatency);
2768 			}
2769 		} else {
2770 			*UrgentBurstFactorCursor = 1;
2771 		}
2772 	}
2773 
2774 	LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
2775 			DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
2776 
2777 	if (VRatio > 0) {
2778 		DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
2779 		if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
2780 			*NotEnoughUrgentLatencyHiding = 1;
2781 			*UrgentBurstFactorLuma = 0;
2782 		} else {
2783 			*UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
2784 		}
2785 	} else {
2786 		*UrgentBurstFactorLuma = 1;
2787 	}
2788 
2789 	if (BytePerPixelInDETC > 0) {
2790 		LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
2791 					1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
2792 					/ swath_width_chroma_ub;
2793 
2794 		if (VRatio > 0) {
2795 			DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
2796 			if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
2797 				*NotEnoughUrgentLatencyHiding = 1;
2798 				*UrgentBurstFactorChroma = 0;
2799 			} else {
2800 				*UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
2801 						/ (DETBufferSizeInTimeChroma - UrgentLatency);
2802 			}
2803 		} else {
2804 			*UrgentBurstFactorChroma = 1;
2805 		}
2806 	}
2807 } // CalculateUrgentBurstFactor
2808 
dml32_CalculateDCFCLKDeepSleep(unsigned int NumberOfActiveSurfaces,unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],double VRatio[],double VRatioChroma[],double SwathWidthY[],double SwathWidthC[],unsigned int DPPPerSurface[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int ReturnBusWidth,double * DCFClkDeepSleep)2809 void dml32_CalculateDCFCLKDeepSleep(
2810 		unsigned int NumberOfActiveSurfaces,
2811 		unsigned int BytePerPixelY[],
2812 		unsigned int BytePerPixelC[],
2813 		double VRatio[],
2814 		double VRatioChroma[],
2815 		double SwathWidthY[],
2816 		double SwathWidthC[],
2817 		unsigned int DPPPerSurface[],
2818 		double HRatio[],
2819 		double HRatioChroma[],
2820 		double PixelClock[],
2821 		double PSCL_THROUGHPUT[],
2822 		double PSCL_THROUGHPUT_CHROMA[],
2823 		double Dppclk[],
2824 		double ReadBandwidthLuma[],
2825 		double ReadBandwidthChroma[],
2826 		unsigned int ReturnBusWidth,
2827 
2828 		/* Output */
2829 		double *DCFClkDeepSleep)
2830 {
2831 	unsigned int k;
2832 	double   DisplayPipeLineDeliveryTimeLuma;
2833 	double   DisplayPipeLineDeliveryTimeChroma;
2834 	double   DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX];
2835 	double ReadBandwidth = 0.0;
2836 
2837 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2838 
2839 		if (VRatio[k] <= 1) {
2840 			DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
2841 					/ PixelClock[k];
2842 		} else {
2843 			DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
2844 		}
2845 		if (BytePerPixelC[k] == 0) {
2846 			DisplayPipeLineDeliveryTimeChroma = 0;
2847 		} else {
2848 			if (VRatioChroma[k] <= 1) {
2849 				DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
2850 						DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
2851 			} else {
2852 				DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
2853 						/ Dppclk[k];
2854 			}
2855 		}
2856 
2857 		if (BytePerPixelC[k] > 0) {
2858 			DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] *
2859 					BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
2860 					__DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] /
2861 					32.0 / DisplayPipeLineDeliveryTimeChroma);
2862 		} else {
2863 			DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] /
2864 					64.0 / DisplayPipeLineDeliveryTimeLuma;
2865 		}
2866 		DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
2867 
2868 #ifdef __DML_VBA_DEBUG__
2869 		dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]);
2870 		dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
2871 #endif
2872 	}
2873 
2874 	for (k = 0; k < NumberOfActiveSurfaces; ++k)
2875 		ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
2876 
2877 	*DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth);
2878 
2879 #ifdef __DML_VBA_DEBUG__
2880 	dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
2881 	dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
2882 	dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth);
2883 	dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
2884 #endif
2885 
2886 	for (k = 0; k < NumberOfActiveSurfaces; ++k)
2887 		*DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
2888 #ifdef __DML_VBA_DEBUG__
2889 	dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
2890 #endif
2891 } // CalculateDCFCLKDeepSleep
2892 
dml32_CalculateWriteBackDelay(enum source_format_class WritebackPixelFormat,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackVTaps,unsigned int WritebackDestinationWidth,unsigned int WritebackDestinationHeight,unsigned int WritebackSourceHeight,unsigned int HTotal)2893 double dml32_CalculateWriteBackDelay(
2894 		enum source_format_class WritebackPixelFormat,
2895 		double WritebackHRatio,
2896 		double WritebackVRatio,
2897 		unsigned int WritebackVTaps,
2898 		unsigned int         WritebackDestinationWidth,
2899 		unsigned int         WritebackDestinationHeight,
2900 		unsigned int         WritebackSourceHeight,
2901 		unsigned int HTotal)
2902 {
2903 	double CalculateWriteBackDelay;
2904 	double Line_length;
2905 	double Output_lines_last_notclamped;
2906 	double WritebackVInit;
2907 
2908 	WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
2909 	Line_length = dml_max((double) WritebackDestinationWidth,
2910 			dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
2911 	Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
2912 			dml_ceil(((double)WritebackSourceHeight -
2913 					(double) WritebackVInit) / (double)WritebackVRatio, 1.0);
2914 	if (Output_lines_last_notclamped < 0) {
2915 		CalculateWriteBackDelay = 0;
2916 	} else {
2917 		CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
2918 				(HTotal - WritebackDestinationWidth) + 80;
2919 	}
2920 	return CalculateWriteBackDelay;
2921 }
2922 
dml32_UseMinimumDCFCLK(enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],bool DRRDisplay[],bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,unsigned int MaxInterDCNTileRepeaters,unsigned int MaxPrefetchMode,double DRAMClockChangeLatencyFinal,double FCLKChangeLatency,double SREnterPlusExitTime,unsigned int ReturnBusWidth,unsigned int RoundTripPingLatencyCycles,unsigned int ReorderingBytes,unsigned int PixelChunkSizeInKByte,unsigned int MetaChunkSize,bool GPUVMEnable,unsigned int GPUVMMaxPageTableLevels,bool HostVMEnable,unsigned int NumberOfActiveSurfaces,double HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels,bool DynamicMetadataVMEnabled,bool ImmediateFlipRequirement,bool ProgressiveToInterlaceUnitInOPP,double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,unsigned int VTotal[],unsigned int VActive[],unsigned int DynamicMetadataTransmittedBytes[],unsigned int DynamicMetadataLinesBeforeActiveRequired[],bool Interlace[],double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],double RequiredDISPCLK[][2],double UrgLatency[],unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],double ProjectedDCFClkDeepSleep[][2],double MaximumVStartup[][2][DC__NUM_DPP__MAX],unsigned int TotalNumberOfActiveDPP[][2],unsigned int TotalNumberOfDCCActiveDPP[][2],unsigned int dpte_group_bytes[],double PrefetchLinesY[][2][DC__NUM_DPP__MAX],double PrefetchLinesC[][2][DC__NUM_DPP__MAX],unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],unsigned int HTotal[],double PixelClock[],double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],double MetaRowBytes[][2][DC__NUM_DPP__MAX],bool DynamicMetadataEnable[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double DCFCLKPerState[],double DCFCLKState[][2])2923 void dml32_UseMinimumDCFCLK(
2924 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
2925 		bool DRRDisplay[],
2926 		bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2927 		unsigned int MaxInterDCNTileRepeaters,
2928 		unsigned int MaxPrefetchMode,
2929 		double DRAMClockChangeLatencyFinal,
2930 		double FCLKChangeLatency,
2931 		double SREnterPlusExitTime,
2932 		unsigned int ReturnBusWidth,
2933 		unsigned int RoundTripPingLatencyCycles,
2934 		unsigned int ReorderingBytes,
2935 		unsigned int PixelChunkSizeInKByte,
2936 		unsigned int MetaChunkSize,
2937 		bool GPUVMEnable,
2938 		unsigned int GPUVMMaxPageTableLevels,
2939 		bool HostVMEnable,
2940 		unsigned int NumberOfActiveSurfaces,
2941 		double HostVMMinPageSize,
2942 		unsigned int HostVMMaxNonCachedPageTableLevels,
2943 		bool DynamicMetadataVMEnabled,
2944 		bool ImmediateFlipRequirement,
2945 		bool ProgressiveToInterlaceUnitInOPP,
2946 		double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
2947 		double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
2948 		unsigned int VTotal[],
2949 		unsigned int VActive[],
2950 		unsigned int DynamicMetadataTransmittedBytes[],
2951 		unsigned int DynamicMetadataLinesBeforeActiveRequired[],
2952 		bool Interlace[],
2953 		double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
2954 		double RequiredDISPCLK[][2],
2955 		double UrgLatency[],
2956 		unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
2957 		double ProjectedDCFClkDeepSleep[][2],
2958 		double MaximumVStartup[][2][DC__NUM_DPP__MAX],
2959 		unsigned int TotalNumberOfActiveDPP[][2],
2960 		unsigned int TotalNumberOfDCCActiveDPP[][2],
2961 		unsigned int dpte_group_bytes[],
2962 		double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
2963 		double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
2964 		unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
2965 		unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
2966 		unsigned int BytePerPixelY[],
2967 		unsigned int BytePerPixelC[],
2968 		unsigned int HTotal[],
2969 		double PixelClock[],
2970 		double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
2971 		double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
2972 		double MetaRowBytes[][2][DC__NUM_DPP__MAX],
2973 		bool DynamicMetadataEnable[],
2974 		double ReadBandwidthLuma[],
2975 		double ReadBandwidthChroma[],
2976 		double DCFCLKPerState[],
2977 		/* Output */
2978 		double DCFCLKState[][2])
2979 {
2980 	unsigned int i, j, k;
2981 	unsigned int     dummy1;
2982 	double dummy2, dummy3;
2983 	double   NormalEfficiency;
2984 	double   TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
2985 
2986 	NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
2987 	for  (i = 0; i < DC__VOLTAGE_STATES; ++i) {
2988 		for  (j = 0; j <= 1; ++j) {
2989 			double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
2990 			double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
2991 			double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX];
2992 			double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
2993 			double MinimumTWait = 0.0;
2994 			double DPTEBandwidth;
2995 			double DCFCLKRequiredForAverageBandwidth;
2996 			unsigned int ExtraLatencyBytes;
2997 			double ExtraLatencyCycles;
2998 			double DCFCLKRequiredForPeakBandwidth;
2999 			unsigned int NoOfDPPState[DC__NUM_DPP__MAX];
3000 			double MinimumTvmPlus2Tr0;
3001 
3002 			TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
3003 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3004 				TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
3005 						+ NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
3006 								/ (15.75 * HTotal[k] / PixelClock[k]);
3007 			}
3008 
3009 			for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
3010 				NoOfDPPState[k] = NoOfDPP[i][j][k];
3011 
3012 			DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
3013 			DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth);
3014 
3015 			ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
3016 					TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
3017 					TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
3018 					NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize,
3019 					HostVMMaxNonCachedPageTableLevels);
3020 			ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__
3021 					+ ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
3022 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3023 				double DCFCLKCyclesRequiredInPrefetch;
3024 				double PrefetchTime;
3025 
3026 				PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
3027 						* swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
3028 						+ PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
3029 								* BytePerPixelC[k]) / NormalEfficiency
3030 						/ ReturnBusWidth;
3031 				DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
3032 						+ PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
3033 								/ NormalEfficiency / ReturnBusWidth
3034 								* (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
3035 						+ 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
3036 								/ ReturnBusWidth
3037 						+ 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
3038 						+ PixelDCFCLKCyclesRequiredInPrefetch[k];
3039 				PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k])
3040 						* HTotal[k] / PixelClock[k];
3041 				DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true &&
3042 						DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
3043 						UrgLatency[i] * GPUVMMaxPageTableLevels *
3044 						(HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
3045 
3046 				MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode,
3047 						UseMALLForPStateChange[k],
3048 						SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3049 						DRRDisplay[k],
3050 						DRAMClockChangeLatencyFinal,
3051 						FCLKChangeLatency,
3052 						UrgLatency[i],
3053 						SREnterPlusExitTime);
3054 
3055 				PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
3056 						MinimumTWait - UrgLatency[i] *
3057 						((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
3058 						GPUVMMaxPageTableLevels - 2) *  (HostVMEnable == true ?
3059 						HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
3060 						DynamicMetadataVMExtraLatency[k];
3061 
3062 				if (PrefetchTime > 0) {
3063 					double ExpectedVRatioPrefetch;
3064 
3065 					ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
3066 							PixelDCFCLKCyclesRequiredInPrefetch[k] /
3067 							DCFCLKCyclesRequiredInPrefetch);
3068 					DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
3069 							PixelDCFCLKCyclesRequiredInPrefetch[k] /
3070 							PrefetchPixelLinesTime[k] *
3071 							dml_max(1.0, ExpectedVRatioPrefetch) *
3072 							dml_max(1.0, ExpectedVRatioPrefetch / 4);
3073 					if (HostVMEnable == true || ImmediateFlipRequirement == true) {
3074 						DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3075 								DCFCLKRequiredForPeakBandwidthPerSurface[k] +
3076 								NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
3077 								NormalEfficiency / ReturnBusWidth;
3078 					}
3079 				} else {
3080 					DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3081 				}
3082 				if (DynamicMetadataEnable[k] == true) {
3083 					double TSetupPipe;
3084 					double TdmbfPipe;
3085 					double TdmsksPipe;
3086 					double TdmecPipe;
3087 					double AllowedTimeForUrgentExtraLatency;
3088 
3089 					dml32_CalculateVUpdateAndDynamicMetadataParameters(
3090 							MaxInterDCNTileRepeaters,
3091 							RequiredDPPCLKPerSurface[i][j][k],
3092 							RequiredDISPCLK[i][j],
3093 							ProjectedDCFClkDeepSleep[i][j],
3094 							PixelClock[k],
3095 							HTotal[k],
3096 							VTotal[k] - VActive[k],
3097 							DynamicMetadataTransmittedBytes[k],
3098 							DynamicMetadataLinesBeforeActiveRequired[k],
3099 							Interlace[k],
3100 							ProgressiveToInterlaceUnitInOPP,
3101 
3102 							/* output */
3103 							&TSetupPipe,
3104 							&TdmbfPipe,
3105 							&TdmecPipe,
3106 							&TdmsksPipe,
3107 							&dummy1,
3108 							&dummy2,
3109 							&dummy3);
3110 					AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
3111 							PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
3112 							TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
3113 					if (AllowedTimeForUrgentExtraLatency > 0)
3114 						DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3115 								dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k],
3116 								ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
3117 					else
3118 						DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3119 				}
3120 			}
3121 			DCFCLKRequiredForPeakBandwidth = 0;
3122 			for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
3123 				DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
3124 						DCFCLKRequiredForPeakBandwidthPerSurface[k];
3125 			}
3126 			MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ?
3127 					(HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) *
3128 					(HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
3129 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3130 				double MaximumTvmPlus2Tr0PlusTsw;
3131 
3132 				MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
3133 						PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
3134 				if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
3135 					DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
3136 				} else {
3137 					DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth,
3138 							2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
3139 								MinimumTvmPlus2Tr0 -
3140 								PrefetchPixelLinesTime[k] / 4),
3141 							(2 * ExtraLatencyCycles +
3142 								PixelDCFCLKCyclesRequiredInPrefetch[k]) /
3143 								(MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
3144 				}
3145 			}
3146 			DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 *
3147 					dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
3148 		}
3149 	}
3150 }
3151 
dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,unsigned int TotalNumberOfActiveDPP,unsigned int PixelChunkSizeInKByte,unsigned int TotalNumberOfDCCActiveDPP,unsigned int MetaChunkSize,bool GPUVMEnable,bool HostVMEnable,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],unsigned int dpte_group_bytes[],double HostVMInefficiencyFactor,double HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels)3152 unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
3153 		unsigned int TotalNumberOfActiveDPP,
3154 		unsigned int PixelChunkSizeInKByte,
3155 		unsigned int TotalNumberOfDCCActiveDPP,
3156 		unsigned int MetaChunkSize,
3157 		bool GPUVMEnable,
3158 		bool HostVMEnable,
3159 		unsigned int NumberOfActiveSurfaces,
3160 		unsigned int NumberOfDPP[],
3161 		unsigned int dpte_group_bytes[],
3162 		double HostVMInefficiencyFactor,
3163 		double HostVMMinPageSize,
3164 		unsigned int HostVMMaxNonCachedPageTableLevels)
3165 {
3166 	unsigned int k;
3167 	double   ret;
3168 	unsigned int  HostVMDynamicLevels;
3169 
3170 	if (GPUVMEnable == true && HostVMEnable == true) {
3171 		if (HostVMMinPageSize < 2048)
3172 			HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
3173 		else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
3174 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
3175 		else
3176 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
3177 	} else {
3178 		HostVMDynamicLevels = 0;
3179 	}
3180 
3181 	ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
3182 			TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
3183 
3184 	if (GPUVMEnable == true) {
3185 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3186 			ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
3187 					(1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
3188 		}
3189 	}
3190 	return ret;
3191 }
3192 
dml32_CalculateVUpdateAndDynamicMetadataParameters(unsigned int MaxInterDCNTileRepeaters,double Dppclk,double Dispclk,double DCFClkDeepSleep,double PixelClock,unsigned int HTotal,unsigned int VBlank,unsigned int DynamicMetadataTransmittedBytes,unsigned int DynamicMetadataLinesBeforeActiveRequired,unsigned int InterlaceEnable,bool ProgressiveToInterlaceUnitInOPP,double * TSetup,double * Tdmbf,double * Tdmec,double * Tdmsks,unsigned int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)3193 void dml32_CalculateVUpdateAndDynamicMetadataParameters(
3194 		unsigned int MaxInterDCNTileRepeaters,
3195 		double Dppclk,
3196 		double Dispclk,
3197 		double DCFClkDeepSleep,
3198 		double PixelClock,
3199 		unsigned int HTotal,
3200 		unsigned int VBlank,
3201 		unsigned int DynamicMetadataTransmittedBytes,
3202 		unsigned int DynamicMetadataLinesBeforeActiveRequired,
3203 		unsigned int InterlaceEnable,
3204 		bool ProgressiveToInterlaceUnitInOPP,
3205 
3206 		/* output */
3207 		double *TSetup,
3208 		double *Tdmbf,
3209 		double *Tdmec,
3210 		double *Tdmsks,
3211 		unsigned int *VUpdateOffsetPix,
3212 		double *VUpdateWidthPix,
3213 		double *VReadyOffsetPix)
3214 {
3215 	double TotalRepeaterDelayTime;
3216 
3217 	TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
3218 	*VUpdateWidthPix  =
3219 			dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0);
3220 	*VReadyOffsetPix  = dml_ceil(dml_max(150.0 / Dppclk,
3221 			TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0);
3222 	*VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0);
3223 	*TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3224 	*Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
3225 	*Tdmec = HTotal / PixelClock;
3226 
3227 	if (DynamicMetadataLinesBeforeActiveRequired == 0)
3228 		*Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3229 	else
3230 		*Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3231 
3232 	if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false)
3233 		*Tdmsks = *Tdmsks / 2;
3234 #ifdef __DML_VBA_DEBUG__
3235 	dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3236 	dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3237 	dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3238 
3239 	dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",
3240 			__func__, DynamicMetadataLinesBeforeActiveRequired);
3241 	dml_print("DML::%s: VBlank = %d\n", __func__, VBlank);
3242 	dml_print("DML::%s: HTotal = %d\n", __func__, HTotal);
3243 	dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
3244 	dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
3245 #endif
3246 }
3247 
dml32_CalculateTWait(unsigned int PrefetchMode,enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,bool DRRDisplay,double DRAMClockChangeLatency,double FCLKChangeLatency,double UrgentLatency,double SREnterPlusExitTime)3248 double dml32_CalculateTWait(
3249 		unsigned int PrefetchMode,
3250 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
3251 		bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3252 		bool DRRDisplay,
3253 		double DRAMClockChangeLatency,
3254 		double FCLKChangeLatency,
3255 		double UrgentLatency,
3256 		double SREnterPlusExitTime)
3257 {
3258 	double TWait = 0.0;
3259 
3260 	if (PrefetchMode == 0 &&
3261 			!(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
3262 			!(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
3263 			!(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
3264 			!(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
3265 		TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3266 	} else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3267 		TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3268 	} else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3269 		TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
3270 	} else {
3271 		TWait = UrgentLatency;
3272 	}
3273 
3274 #ifdef __DML_VBA_DEBUG__
3275 	dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode);
3276 	dml_print("DML::%s: TWait = %f\n", __func__, TWait);
3277 #endif
3278 	return TWait;
3279 } // CalculateTWait
3280 
3281 // Function: get_return_bw_mbps
3282 // Megabyte per second
dml32_get_return_bw_mbps(const soc_bounding_box_st * soc,const int VoltageLevel,const bool HostVMEnable,const double DCFCLK,const double FabricClock,const double DRAMSpeed)3283 double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
3284 		const int VoltageLevel,
3285 		const bool HostVMEnable,
3286 		const double DCFCLK,
3287 		const double FabricClock,
3288 		const double DRAMSpeed)
3289 {
3290 	double ReturnBW = 0.;
3291 	double IdealSDPPortBandwidth    = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
3292 	double IdealFabricBandwidth     = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
3293 	double IdealDRAMBandwidth       = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
3294 	double PixelDataOnlyReturnBW    = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3295 			IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3296 			IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe  :
3297 					soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3298 	double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3299 			IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3300 			IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3301 					soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3302 
3303 	if (HostVMEnable != true)
3304 		ReturnBW = PixelDataOnlyReturnBW;
3305 	else
3306 		ReturnBW = PixelMixedWithVMDataReturnBW;
3307 
3308 #ifdef __DML_VBA_DEBUG__
3309 	dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3310 	dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable);
3311 	dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
3312 	dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
3313 	dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
3314 	dml_print("DML::%s: IdealSDPPortBandwidth        = %f\n", __func__, IdealSDPPortBandwidth);
3315 	dml_print("DML::%s: IdealFabricBandwidth         = %f\n", __func__, IdealFabricBandwidth);
3316 	dml_print("DML::%s: IdealDRAMBandwidth           = %f\n", __func__, IdealDRAMBandwidth);
3317 	dml_print("DML::%s: PixelDataOnlyReturnBW        = %f\n", __func__, PixelDataOnlyReturnBW);
3318 	dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
3319 	dml_print("DML::%s: ReturnBW                     = %f MBps\n", __func__, ReturnBW);
3320 #endif
3321 	return ReturnBW;
3322 }
3323 
3324 // Function: get_return_bw_mbps_vm_only
3325 // Megabyte per second
dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st * soc,const int VoltageLevel,const double DCFCLK,const double FabricClock,const double DRAMSpeed)3326 double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
3327 		const int VoltageLevel,
3328 		const double DCFCLK,
3329 		const double FabricClock,
3330 		const double DRAMSpeed)
3331 {
3332 	double VMDataOnlyReturnBW = dml_min3(
3333 			soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3334 			FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
3335 					* soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3336 			DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
3337 					* (VoltageLevel < 2 ?
3338 							soc->pct_ideal_dram_bw_after_urgent_strobe :
3339 							soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
3340 #ifdef __DML_VBA_DEBUG__
3341 	dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3342 	dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
3343 	dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
3344 	dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
3345 	dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
3346 #endif
3347 	return VMDataOnlyReturnBW;
3348 }
3349 
dml32_CalculateExtraLatency(unsigned int RoundTripPingLatencyCycles,unsigned int ReorderingBytes,double DCFCLK,unsigned int TotalNumberOfActiveDPP,unsigned int PixelChunkSizeInKByte,unsigned int TotalNumberOfDCCActiveDPP,unsigned int MetaChunkSize,double ReturnBW,bool GPUVMEnable,bool HostVMEnable,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],unsigned int dpte_group_bytes[],double HostVMInefficiencyFactor,double HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels)3350 double dml32_CalculateExtraLatency(
3351 		unsigned int RoundTripPingLatencyCycles,
3352 		unsigned int ReorderingBytes,
3353 		double DCFCLK,
3354 		unsigned int TotalNumberOfActiveDPP,
3355 		unsigned int PixelChunkSizeInKByte,
3356 		unsigned int TotalNumberOfDCCActiveDPP,
3357 		unsigned int MetaChunkSize,
3358 		double ReturnBW,
3359 		bool GPUVMEnable,
3360 		bool HostVMEnable,
3361 		unsigned int NumberOfActiveSurfaces,
3362 		unsigned int NumberOfDPP[],
3363 		unsigned int dpte_group_bytes[],
3364 		double HostVMInefficiencyFactor,
3365 		double HostVMMinPageSize,
3366 		unsigned int HostVMMaxNonCachedPageTableLevels)
3367 {
3368 	double ExtraLatencyBytes;
3369 	double ExtraLatency;
3370 
3371 	ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
3372 			ReorderingBytes,
3373 			TotalNumberOfActiveDPP,
3374 			PixelChunkSizeInKByte,
3375 			TotalNumberOfDCCActiveDPP,
3376 			MetaChunkSize,
3377 			GPUVMEnable,
3378 			HostVMEnable,
3379 			NumberOfActiveSurfaces,
3380 			NumberOfDPP,
3381 			dpte_group_bytes,
3382 			HostVMInefficiencyFactor,
3383 			HostVMMinPageSize,
3384 			HostVMMaxNonCachedPageTableLevels);
3385 
3386 	ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
3387 
3388 #ifdef __DML_VBA_DEBUG__
3389 	dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
3390 	dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
3391 	dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
3392 	dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
3393 	dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
3394 #endif
3395 
3396 	return ExtraLatency;
3397 } // CalculateExtraLatency
3398 
dml32_CalculatePrefetchSchedule(struct vba_vars_st * v,unsigned int k,double HostVMInefficiencyFactor,DmlPipe * myPipe,unsigned int DSCDelay,unsigned int DPP_RECOUT_WIDTH,unsigned int VStartup,unsigned int MaxVStartup,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,unsigned int VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,unsigned int SwathWidthC,unsigned int VInitPreFillC,unsigned int MaxNumSwathC,unsigned int swath_width_luma_ub,unsigned int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,double TPreReq,bool ExtendPrefetchIfPossible,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,bool * NotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,double * Tdmdl_vm,double * Tdmdl,double * TSetup,unsigned int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)3399 bool dml32_CalculatePrefetchSchedule(
3400 		struct vba_vars_st *v,
3401 		unsigned int k,
3402 		double HostVMInefficiencyFactor,
3403 		DmlPipe *myPipe,
3404 		unsigned int DSCDelay,
3405 		unsigned int DPP_RECOUT_WIDTH,
3406 		unsigned int VStartup,
3407 		unsigned int MaxVStartup,
3408 		double UrgentLatency,
3409 		double UrgentExtraLatency,
3410 		double TCalc,
3411 		unsigned int PDEAndMetaPTEBytesFrame,
3412 		unsigned int MetaRowByte,
3413 		unsigned int PixelPTEBytesPerRow,
3414 		double PrefetchSourceLinesY,
3415 		unsigned int SwathWidthY,
3416 		unsigned int VInitPreFillY,
3417 		unsigned int MaxNumSwathY,
3418 		double PrefetchSourceLinesC,
3419 		unsigned int SwathWidthC,
3420 		unsigned int VInitPreFillC,
3421 		unsigned int MaxNumSwathC,
3422 		unsigned int swath_width_luma_ub,
3423 		unsigned int swath_width_chroma_ub,
3424 		unsigned int SwathHeightY,
3425 		unsigned int SwathHeightC,
3426 		double TWait,
3427 		double TPreReq,
3428 		bool ExtendPrefetchIfPossible,
3429 		/* Output */
3430 		double   *DSTXAfterScaler,
3431 		double   *DSTYAfterScaler,
3432 		double *DestinationLinesForPrefetch,
3433 		double *PrefetchBandwidth,
3434 		double *DestinationLinesToRequestVMInVBlank,
3435 		double *DestinationLinesToRequestRowInVBlank,
3436 		double *VRatioPrefetchY,
3437 		double *VRatioPrefetchC,
3438 		double *RequiredPrefetchPixDataBWLuma,
3439 		double *RequiredPrefetchPixDataBWChroma,
3440 		bool   *NotEnoughTimeForDynamicMetadata,
3441 		double *Tno_bw,
3442 		double *prefetch_vmrow_bw,
3443 		double *Tdmdl_vm,
3444 		double *Tdmdl,
3445 		double *TSetup,
3446 		unsigned int   *VUpdateOffsetPix,
3447 		double   *VUpdateWidthPix,
3448 		double   *VReadyOffsetPix)
3449 {
3450 	double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater;
3451 	bool MyError = false;
3452 	unsigned int DPPCycles, DISPCLKCycles;
3453 	double DSTTotalPixelsAfterScaler;
3454 	double LineTime;
3455 	double dst_y_prefetch_equ;
3456 	double prefetch_bw_oto;
3457 	double Tvm_oto;
3458 	double Tr0_oto;
3459 	double Tvm_oto_lines;
3460 	double Tr0_oto_lines;
3461 	double dst_y_prefetch_oto;
3462 	double TimeForFetchingMetaPTE = 0;
3463 	double TimeForFetchingRowInVBlank = 0;
3464 	double LinesToRequestPrefetchPixelData = 0;
3465 	double LinesForPrefetchBandwidth = 0;
3466 	unsigned int HostVMDynamicLevelsTrips;
3467 	double  trip_to_mem;
3468 	double  Tvm_trips;
3469 	double  Tr0_trips;
3470 	double  Tvm_trips_rounded;
3471 	double  Tr0_trips_rounded;
3472 	double  Lsw_oto;
3473 	double  Tpre_rounded;
3474 	double  prefetch_bw_equ;
3475 	double  Tvm_equ;
3476 	double  Tr0_equ;
3477 	double  Tdmbf;
3478 	double  Tdmec;
3479 	double  Tdmsks;
3480 	double  prefetch_sw_bytes;
3481 	double  bytes_pp;
3482 	double  dep_bytes;
3483 	unsigned int max_vratio_pre = v->MaxVRatioPre;
3484 	double  min_Lsw;
3485 	double  Tsw_est1 = 0;
3486 	double  Tsw_est3 = 0;
3487 
3488 	if (v->GPUVMEnable == true && v->HostVMEnable == true)
3489 		HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3490 	else
3491 		HostVMDynamicLevelsTrips = 0;
3492 #ifdef __DML_VBA_DEBUG__
3493 	dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable);
3494 	dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels);
3495 	dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
3496 	dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
3497 			__func__, v->HostVMEnable, HostVMInefficiencyFactor);
3498 #endif
3499 	dml32_CalculateVUpdateAndDynamicMetadataParameters(
3500 			v->MaxInterDCNTileRepeaters,
3501 			myPipe->Dppclk,
3502 			myPipe->Dispclk,
3503 			myPipe->DCFClkDeepSleep,
3504 			myPipe->PixelClock,
3505 			myPipe->HTotal,
3506 			myPipe->VBlank,
3507 			v->DynamicMetadataTransmittedBytes[k],
3508 			v->DynamicMetadataLinesBeforeActiveRequired[k],
3509 			myPipe->InterlaceEnable,
3510 			myPipe->ProgressiveToInterlaceUnitInOPP,
3511 			TSetup,
3512 
3513 			/* output */
3514 			&Tdmbf,
3515 			&Tdmec,
3516 			&Tdmsks,
3517 			VUpdateOffsetPix,
3518 			VUpdateWidthPix,
3519 			VReadyOffsetPix);
3520 
3521 	LineTime = myPipe->HTotal / myPipe->PixelClock;
3522 	trip_to_mem = UrgentLatency;
3523 	Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
3524 
3525 	if (v->DynamicMetadataVMEnabled == true)
3526 		*Tdmdl = TWait + Tvm_trips + trip_to_mem;
3527 	else
3528 		*Tdmdl = TWait + UrgentExtraLatency;
3529 
3530 #ifdef __DML_VBA_ALLOW_DELTA__
3531 	if (v->DynamicMetadataEnable[k] == false)
3532 		*Tdmdl = 0.0;
3533 #endif
3534 
3535 	if (v->DynamicMetadataEnable[k] == true) {
3536 		if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
3537 			*NotEnoughTimeForDynamicMetadata = true;
3538 #ifdef __DML_VBA_DEBUG__
3539 			dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
3540 			dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
3541 					__func__, Tdmbf);
3542 			dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3543 			dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
3544 					__func__, Tdmsks);
3545 			dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
3546 					__func__, *Tdmdl);
3547 #endif
3548 		} else {
3549 			*NotEnoughTimeForDynamicMetadata = false;
3550 		}
3551 	} else {
3552 		*NotEnoughTimeForDynamicMetadata = false;
3553 	}
3554 
3555 	*Tdmdl_vm =  (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true &&
3556 			v->GPUVMEnable == true ? TWait + Tvm_trips : 0);
3557 
3558 	if (myPipe->ScalerEnabled)
3559 		DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL;
3560 	else
3561 		DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly;
3562 
3563 	DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor;
3564 
3565 	DISPCLKCycles = v->DISPCLKDelaySubtotal;
3566 
3567 	if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
3568 		return true;
3569 
3570 	*DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
3571 			myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
3572 
3573 	*DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
3574 			+ (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
3575 			+ ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
3576 					myPipe->HActive / 2 : 0)
3577 			+ ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
3578 
3579 #ifdef __DML_VBA_DEBUG__
3580 	dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
3581 	dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
3582 	dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
3583 	dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
3584 	dml_print("DML::%s: DISPCLK: %f\n", __func__,  myPipe->Dispclk);
3585 	dml_print("DML::%s: DSCDelay: %d\n", __func__,  DSCDelay);
3586 	dml_print("DML::%s: ODMMode: %d\n", __func__,  myPipe->ODMMode);
3587 	dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH);
3588 	dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__,  *DSTXAfterScaler);
3589 #endif
3590 
3591 	if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
3592 		*DSTYAfterScaler = 1;
3593 	else
3594 		*DSTYAfterScaler = 0;
3595 
3596 	DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
3597 	*DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
3598 	*DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
3599 #ifdef __DML_VBA_DEBUG__
3600 	dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__,  *DSTXAfterScaler);
3601 	dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
3602 #endif
3603 
3604 	MyError = false;
3605 
3606 	Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
3607 
3608 	if (v->GPUVMEnable == true) {
3609 		Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
3610 		Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3611 		if (v->GPUVMMaxPageTableLevels >= 3) {
3612 			*Tno_bw = UrgentExtraLatency + trip_to_mem *
3613 					(double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
3614 		} else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true) {
3615 			Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
3616 					4.0 * LineTime; // VBA_ERROR
3617 			*Tno_bw = UrgentExtraLatency;
3618 		} else {
3619 			*Tno_bw = 0;
3620 		}
3621 	} else if (myPipe->DCCEnable == true) {
3622 		Tvm_trips_rounded = LineTime / 4.0;
3623 		Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3624 		*Tno_bw = 0;
3625 	} else {
3626 		Tvm_trips_rounded = LineTime / 4.0;
3627 		Tr0_trips_rounded = LineTime / 2.0;
3628 		*Tno_bw = 0;
3629 	}
3630 	Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
3631 	Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);
3632 
3633 	if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
3634 			|| myPipe->SourcePixelFormat == dm_420_12) {
3635 		bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
3636 	} else {
3637 		bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
3638 	}
3639 
3640 	prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
3641 			+ PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
3642 	prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
3643 			prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
3644 
3645 	min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
3646 	min_Lsw = dml_max(min_Lsw, 1.0);
3647 	Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;
3648 
3649 	if (v->GPUVMEnable == true) {
3650 		Tvm_oto = dml_max3(
3651 				Tvm_trips,
3652 				*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
3653 				LineTime / 4.0);
3654 	} else
3655 		Tvm_oto = LineTime / 4.0;
3656 
3657 	if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
3658 		Tr0_oto = dml_max4(
3659 				Tr0_trips,
3660 				(MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
3661 				(LineTime - Tvm_oto)/2.0,
3662 				LineTime / 4.0);
3663 #ifdef __DML_VBA_DEBUG__
3664 		dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
3665 				(MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
3666 		dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
3667 		dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
3668 		dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
3669 #endif
3670 	} else
3671 		Tr0_oto = (LineTime - Tvm_oto) / 2.0;
3672 
3673 	Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
3674 	Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
3675 	dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
3676 
3677 	dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
3678 			(*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
3679 
3680 	dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, __DML_VBA_MAX_DST_Y_PRE__);
3681 #ifdef __DML_VBA_DEBUG__
3682 	dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
3683 	dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
3684 	dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
3685 	dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
3686 	dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
3687 	dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3688 	dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3689 	dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3690 	dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
3691 	dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3692 	dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
3693 	dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
3694 	dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
3695 	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3696 	dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
3697 	dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
3698 	dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3699 	dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
3700 	dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
3701 	dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
3702 	dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
3703 	dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
3704 	dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
3705 	dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
3706 	dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
3707 	dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
3708 	dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
3709 #endif
3710 
3711 	dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
3712 	Tpre_rounded = dst_y_prefetch_equ * LineTime;
3713 #ifdef __DML_VBA_DEBUG__
3714 	dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
3715 	dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
3716 	dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
3717 	dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
3718 			__func__, VStartup * LineTime);
3719 	dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
3720 	dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
3721 	dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
3722 	dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3723 	dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
3724 	dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
3725 	dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
3726 			__func__, *DSTYAfterScaler);
3727 #endif
3728 	dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
3729 			MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
3730 
3731 	if (prefetch_sw_bytes < dep_bytes)
3732 		prefetch_sw_bytes = 2 * dep_bytes;
3733 
3734 	*PrefetchBandwidth = 0;
3735 	*DestinationLinesToRequestVMInVBlank = 0;
3736 	*DestinationLinesToRequestRowInVBlank = 0;
3737 	*VRatioPrefetchY = 0;
3738 	*VRatioPrefetchC = 0;
3739 	*RequiredPrefetchPixDataBWLuma = 0;
3740 	if (dst_y_prefetch_equ > 1 &&
3741 			(Tpre_rounded >= TPreReq || dst_y_prefetch_equ == __DML_VBA_MAX_DST_Y_PRE__)) {
3742 		double PrefetchBandwidth1;
3743 		double PrefetchBandwidth2;
3744 		double PrefetchBandwidth3;
3745 		double PrefetchBandwidth4;
3746 
3747 		if (Tpre_rounded - *Tno_bw > 0) {
3748 			PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3749 					+ 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3750 					+ prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
3751 			Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
3752 		} else
3753 			PrefetchBandwidth1 = 0;
3754 
3755 		if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
3756 				&& Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
3757 			PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3758 					+ 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3759 					/ (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
3760 		}
3761 
3762 		if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
3763 			PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
3764 			(Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
3765 		else
3766 			PrefetchBandwidth2 = 0;
3767 
3768 		if (Tpre_rounded - Tvm_trips_rounded > 0) {
3769 			PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3770 					+ prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
3771 			Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
3772 		} else
3773 			PrefetchBandwidth3 = 0;
3774 
3775 
3776 		if (VStartup == MaxVStartup &&
3777 				(Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
3778 				LineTime - Tvm_trips_rounded > 0) {
3779 			PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3780 					/ (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
3781 		}
3782 
3783 		if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
3784 			PrefetchBandwidth4 = prefetch_sw_bytes /
3785 					(Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
3786 		} else {
3787 			PrefetchBandwidth4 = 0;
3788 		}
3789 
3790 #ifdef __DML_VBA_DEBUG__
3791 		dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
3792 		dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
3793 		dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
3794 		dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
3795 		dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
3796 		dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
3797 		dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
3798 		dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
3799 		dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4);
3800 #endif
3801 		{
3802 			bool Case1OK;
3803 			bool Case2OK;
3804 			bool Case3OK;
3805 
3806 			if (PrefetchBandwidth1 > 0) {
3807 				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
3808 						>= Tvm_trips_rounded
3809 						&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3810 								/ PrefetchBandwidth1 >= Tr0_trips_rounded) {
3811 					Case1OK = true;
3812 				} else {
3813 					Case1OK = false;
3814 				}
3815 			} else {
3816 				Case1OK = false;
3817 			}
3818 
3819 			if (PrefetchBandwidth2 > 0) {
3820 				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
3821 						>= Tvm_trips_rounded
3822 						&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3823 						/ PrefetchBandwidth2 < Tr0_trips_rounded) {
3824 					Case2OK = true;
3825 				} else {
3826 					Case2OK = false;
3827 				}
3828 			} else {
3829 				Case2OK = false;
3830 			}
3831 
3832 			if (PrefetchBandwidth3 > 0) {
3833 				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
3834 						Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
3835 								HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
3836 								Tr0_trips_rounded) {
3837 					Case3OK = true;
3838 				} else {
3839 					Case3OK = false;
3840 				}
3841 			} else {
3842 				Case3OK = false;
3843 			}
3844 
3845 			if (Case1OK)
3846 				prefetch_bw_equ = PrefetchBandwidth1;
3847 			else if (Case2OK)
3848 				prefetch_bw_equ = PrefetchBandwidth2;
3849 			else if (Case3OK)
3850 				prefetch_bw_equ = PrefetchBandwidth3;
3851 			else
3852 				prefetch_bw_equ = PrefetchBandwidth4;
3853 
3854 #ifdef __DML_VBA_DEBUG__
3855 			dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
3856 			dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
3857 			dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
3858 			dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
3859 #endif
3860 
3861 			if (prefetch_bw_equ > 0) {
3862 				if (v->GPUVMEnable == true) {
3863 					Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
3864 							HostVMInefficiencyFactor / prefetch_bw_equ,
3865 							Tvm_trips, LineTime / 4);
3866 				} else {
3867 					Tvm_equ = LineTime / 4;
3868 				}
3869 
3870 				if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
3871 					Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
3872 							HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
3873 							(LineTime - Tvm_equ) / 2, LineTime / 4);
3874 				} else {
3875 					Tr0_equ = (LineTime - Tvm_equ) / 2;
3876 				}
3877 			} else {
3878 				Tvm_equ = 0;
3879 				Tr0_equ = 0;
3880 #ifdef __DML_VBA_DEBUG__
3881 				dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
3882 #endif
3883 			}
3884 		}
3885 
3886 		if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
3887 			if (dst_y_prefetch_oto * LineTime < TPreReq) {
3888 				*DestinationLinesForPrefetch = dst_y_prefetch_equ;
3889 			} else {
3890 				*DestinationLinesForPrefetch = dst_y_prefetch_oto;
3891 			}
3892 			TimeForFetchingMetaPTE = Tvm_oto;
3893 			TimeForFetchingRowInVBlank = Tr0_oto;
3894 			*PrefetchBandwidth = prefetch_bw_oto;
3895 			/* Clamp to oto for bandwidth calculation */
3896 			LinesForPrefetchBandwidth = dst_y_prefetch_oto;
3897 		} else {
3898 			/* For mode programming we want to extend the prefetch as much as possible
3899 			 * (up to oto, or as long as we can for equ) if we're not already applying
3900 			 * the 60us prefetch requirement. This is to avoid intermittent underflow
3901 			 * issues during prefetch.
3902 			 *
3903 			 * The prefetch extension is applied under the following scenarios:
3904 			 * 1. We're in prefetch mode > 0 (i.e. we don't support MCLK switch in blank)
3905 			 * 2. We're using subvp or drr methods of p-state switch, in which case we
3906 			 *    we don't care if prefetch takes up more of the blanking time
3907 			 *
3908 			 * Mode programming typically chooses the smallest prefetch time possible
3909 			 * (i.e. highest bandwidth during prefetch) presumably to create margin between
3910 			 * p-states / c-states that happen in vblank and prefetch. Therefore we only
3911 			 * apply this prefetch extension when p-state in vblank is not required (UCLK
3912 			 * p-states take up the most vblank time).
3913 			 */
3914 			if (ExtendPrefetchIfPossible && TPreReq == 0 && VStartup < MaxVStartup) {
3915 				MyError = true;
3916 			} else {
3917 				*DestinationLinesForPrefetch = dst_y_prefetch_equ;
3918 				TimeForFetchingMetaPTE = Tvm_equ;
3919 				TimeForFetchingRowInVBlank = Tr0_equ;
3920 				*PrefetchBandwidth = prefetch_bw_equ;
3921 				/* Clamp to equ for bandwidth calculation */
3922 				LinesForPrefetchBandwidth = dst_y_prefetch_equ;
3923 			}
3924 		}
3925 
3926 		*DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
3927 
3928 		*DestinationLinesToRequestRowInVBlank =
3929 				dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
3930 
3931 		LinesToRequestPrefetchPixelData = LinesForPrefetchBandwidth -
3932 				*DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
3933 
3934 #ifdef __DML_VBA_DEBUG__
3935 		dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
3936 		dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3937 				__func__, *DestinationLinesToRequestVMInVBlank);
3938 		dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
3939 		dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3940 		dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
3941 				__func__, *DestinationLinesToRequestRowInVBlank);
3942 		dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3943 		dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
3944 #endif
3945 
3946 		if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
3947 			*VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
3948 			*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3949 #ifdef __DML_VBA_DEBUG__
3950 			dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3951 			dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
3952 			dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
3953 #endif
3954 			if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
3955 				if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
3956 					*VRatioPrefetchY =
3957 							dml_max((double) PrefetchSourceLinesY /
3958 									LinesToRequestPrefetchPixelData,
3959 									(double) MaxNumSwathY * SwathHeightY /
3960 									(LinesToRequestPrefetchPixelData -
3961 									(VInitPreFillY - 3.0) / 2.0));
3962 					*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3963 				} else {
3964 					MyError = true;
3965 					*VRatioPrefetchY = 0;
3966 				}
3967 #ifdef __DML_VBA_DEBUG__
3968 				dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3969 				dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3970 				dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
3971 #endif
3972 			}
3973 
3974 			*VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
3975 			*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3976 
3977 #ifdef __DML_VBA_DEBUG__
3978 			dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3979 			dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
3980 			dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
3981 #endif
3982 			if ((SwathHeightC > 4)) {
3983 				if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
3984 					*VRatioPrefetchC =
3985 						dml_max(*VRatioPrefetchC,
3986 							(double) MaxNumSwathC * SwathHeightC /
3987 							(LinesToRequestPrefetchPixelData -
3988 							(VInitPreFillC - 3.0) / 2.0));
3989 					*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3990 				} else {
3991 					MyError = true;
3992 					*VRatioPrefetchC = 0;
3993 				}
3994 #ifdef __DML_VBA_DEBUG__
3995 				dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3996 				dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3997 				dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
3998 #endif
3999 			}
4000 
4001 			*RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
4002 					/ LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
4003 					/ LineTime;
4004 
4005 #ifdef __DML_VBA_DEBUG__
4006 			dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
4007 			dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
4008 			dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
4009 			dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
4010 					__func__, *RequiredPrefetchPixDataBWLuma);
4011 #endif
4012 			*RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
4013 					LinesToRequestPrefetchPixelData
4014 					* myPipe->BytePerPixelC
4015 					* swath_width_chroma_ub / LineTime;
4016 		} else {
4017 			MyError = true;
4018 #ifdef __DML_VBA_DEBUG__
4019 			dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
4020 					__func__, LinesToRequestPrefetchPixelData);
4021 #endif
4022 			*VRatioPrefetchY = 0;
4023 			*VRatioPrefetchC = 0;
4024 			*RequiredPrefetchPixDataBWLuma = 0;
4025 			*RequiredPrefetchPixDataBWChroma = 0;
4026 		}
4027 #ifdef __DML_VBA_DEBUG__
4028 		dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
4029 			(double)LinesToRequestPrefetchPixelData * LineTime +
4030 			2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
4031 		dml_print("DML:  Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
4032 		dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
4033 			(*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
4034 		dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
4035 		dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
4036 			TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
4037 			((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
4038 		dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
4039 				PixelPTEBytesPerRow);
4040 #endif
4041 	} else {
4042 		MyError = true;
4043 #ifdef __DML_VBA_DEBUG__
4044 		dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
4045 				__func__, dst_y_prefetch_equ);
4046 #endif
4047 	}
4048 
4049 	{
4050 		double prefetch_vm_bw;
4051 		double prefetch_row_bw;
4052 
4053 		if (PDEAndMetaPTEBytesFrame == 0) {
4054 			prefetch_vm_bw = 0;
4055 		} else if (*DestinationLinesToRequestVMInVBlank > 0) {
4056 #ifdef __DML_VBA_DEBUG__
4057 			dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
4058 			dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
4059 			dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
4060 					__func__, *DestinationLinesToRequestVMInVBlank);
4061 			dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
4062 #endif
4063 			prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
4064 					(*DestinationLinesToRequestVMInVBlank * LineTime);
4065 #ifdef __DML_VBA_DEBUG__
4066 			dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
4067 #endif
4068 		} else {
4069 			prefetch_vm_bw = 0;
4070 			MyError = true;
4071 #ifdef __DML_VBA_DEBUG__
4072 			dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",
4073 					__func__, *DestinationLinesToRequestVMInVBlank);
4074 #endif
4075 		}
4076 
4077 		if (MetaRowByte + PixelPTEBytesPerRow == 0) {
4078 			prefetch_row_bw = 0;
4079 		} else if (*DestinationLinesToRequestRowInVBlank > 0) {
4080 			prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
4081 					(*DestinationLinesToRequestRowInVBlank * LineTime);
4082 
4083 #ifdef __DML_VBA_DEBUG__
4084 			dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
4085 			dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
4086 			dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
4087 					__func__, *DestinationLinesToRequestRowInVBlank);
4088 			dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
4089 #endif
4090 		} else {
4091 			prefetch_row_bw = 0;
4092 			MyError = true;
4093 #ifdef __DML_VBA_DEBUG__
4094 			dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",
4095 					__func__, *DestinationLinesToRequestRowInVBlank);
4096 #endif
4097 		}
4098 
4099 		*prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
4100 	}
4101 
4102 	if (MyError) {
4103 		*PrefetchBandwidth = 0;
4104 		*DestinationLinesToRequestVMInVBlank = 0;
4105 		*DestinationLinesToRequestRowInVBlank = 0;
4106 		*DestinationLinesForPrefetch = 0;
4107 		*VRatioPrefetchY = 0;
4108 		*VRatioPrefetchC = 0;
4109 		*RequiredPrefetchPixDataBWLuma = 0;
4110 		*RequiredPrefetchPixDataBWChroma = 0;
4111 	}
4112 
4113 	return MyError;
4114 } // CalculatePrefetchSchedule
4115 
dml32_CalculateFlipSchedule(double HostVMInefficiencyFactor,double UrgentExtraLatency,double UrgentLatency,unsigned int GPUVMMaxPageTableLevels,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,bool GPUVMEnable,double HostVMMinPageSize,double PDEAndMetaPTEBytesPerFrame,double MetaRowBytes,double DPTEBytesPerRow,double BandwidthAvailableForImmediateFlip,unsigned int TotImmediateFlipBytes,enum source_format_class SourcePixelFormat,double LineTime,double VRatio,double VRatioChroma,double Tno_bw,bool DCCEnable,unsigned int dpte_row_height,unsigned int meta_row_height,unsigned int dpte_row_height_chroma,unsigned int meta_row_height_chroma,bool use_one_row_for_frame_flip,double * DestinationLinesToRequestVMInImmediateFlip,double * DestinationLinesToRequestRowInImmediateFlip,double * final_flip_bw,bool * ImmediateFlipSupportedForPipe)4116 void dml32_CalculateFlipSchedule(
4117 		double HostVMInefficiencyFactor,
4118 		double UrgentExtraLatency,
4119 		double UrgentLatency,
4120 		unsigned int GPUVMMaxPageTableLevels,
4121 		bool HostVMEnable,
4122 		unsigned int HostVMMaxNonCachedPageTableLevels,
4123 		bool GPUVMEnable,
4124 		double HostVMMinPageSize,
4125 		double PDEAndMetaPTEBytesPerFrame,
4126 		double MetaRowBytes,
4127 		double DPTEBytesPerRow,
4128 		double BandwidthAvailableForImmediateFlip,
4129 		unsigned int TotImmediateFlipBytes,
4130 		enum source_format_class SourcePixelFormat,
4131 		double LineTime,
4132 		double VRatio,
4133 		double VRatioChroma,
4134 		double Tno_bw,
4135 		bool DCCEnable,
4136 		unsigned int dpte_row_height,
4137 		unsigned int meta_row_height,
4138 		unsigned int dpte_row_height_chroma,
4139 		unsigned int meta_row_height_chroma,
4140 		bool    use_one_row_for_frame_flip,
4141 
4142 		/* Output */
4143 		double *DestinationLinesToRequestVMInImmediateFlip,
4144 		double *DestinationLinesToRequestRowInImmediateFlip,
4145 		double *final_flip_bw,
4146 		bool *ImmediateFlipSupportedForPipe)
4147 {
4148 	double min_row_time = 0.0;
4149 	unsigned int HostVMDynamicLevelsTrips;
4150 	double TimeForFetchingMetaPTEImmediateFlip;
4151 	double TimeForFetchingRowInVBlankImmediateFlip;
4152 	double ImmediateFlipBW = 1.0;
4153 
4154 	if (GPUVMEnable == true && HostVMEnable == true)
4155 		HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
4156 	else
4157 		HostVMDynamicLevelsTrips = 0;
4158 
4159 #ifdef __DML_VBA_DEBUG__
4160 	dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes);
4161 	dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
4162 #endif
4163 
4164 	if (TotImmediateFlipBytes > 0) {
4165 		if (use_one_row_for_frame_flip) {
4166 			ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
4167 					BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4168 		} else {
4169 			ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
4170 					BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4171 		}
4172 		if (GPUVMEnable == true) {
4173 			TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame *
4174 					HostVMInefficiencyFactor / ImmediateFlipBW,
4175 					UrgentExtraLatency + UrgentLatency *
4176 					(GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
4177 					LineTime / 4.0);
4178 		} else {
4179 			TimeForFetchingMetaPTEImmediateFlip = 0;
4180 		}
4181 		if ((GPUVMEnable == true || DCCEnable == true)) {
4182 			TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
4183 					(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
4184 					UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
4185 		} else {
4186 			TimeForFetchingRowInVBlankImmediateFlip = 0;
4187 		}
4188 
4189 		*DestinationLinesToRequestVMInImmediateFlip =
4190 				dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
4191 		*DestinationLinesToRequestRowInImmediateFlip =
4192 				dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
4193 
4194 		if (GPUVMEnable == true) {
4195 			*final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
4196 					(*DestinationLinesToRequestVMInImmediateFlip * LineTime),
4197 					(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4198 					(*DestinationLinesToRequestRowInImmediateFlip * LineTime));
4199 		} else if ((GPUVMEnable == true || DCCEnable == true)) {
4200 			*final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4201 					(*DestinationLinesToRequestRowInImmediateFlip * LineTime);
4202 		} else {
4203 			*final_flip_bw = 0;
4204 		}
4205 	} else {
4206 		TimeForFetchingMetaPTEImmediateFlip = 0;
4207 		TimeForFetchingRowInVBlankImmediateFlip = 0;
4208 		*DestinationLinesToRequestVMInImmediateFlip = 0;
4209 		*DestinationLinesToRequestRowInImmediateFlip = 0;
4210 		*final_flip_bw = 0;
4211 	}
4212 
4213 	if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
4214 		if (GPUVMEnable == true && DCCEnable != true) {
4215 			min_row_time = dml_min(dpte_row_height *
4216 					LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
4217 		} else if (GPUVMEnable != true && DCCEnable == true) {
4218 			min_row_time = dml_min(meta_row_height *
4219 					LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
4220 		} else {
4221 			min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height *
4222 					LineTime / VRatio, dpte_row_height_chroma * LineTime /
4223 					VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
4224 		}
4225 	} else {
4226 		if (GPUVMEnable == true && DCCEnable != true) {
4227 			min_row_time = dpte_row_height * LineTime / VRatio;
4228 		} else if (GPUVMEnable != true && DCCEnable == true) {
4229 			min_row_time = meta_row_height * LineTime / VRatio;
4230 		} else {
4231 			min_row_time =
4232 				dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
4233 		}
4234 	}
4235 
4236 	if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
4237 			|| TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
4238 					> min_row_time) {
4239 		*ImmediateFlipSupportedForPipe = false;
4240 	} else {
4241 		*ImmediateFlipSupportedForPipe = true;
4242 	}
4243 
4244 #ifdef __DML_VBA_DEBUG__
4245 	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
4246 	dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
4247 	dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",
4248 			__func__, *DestinationLinesToRequestVMInImmediateFlip);
4249 	dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",
4250 			__func__, *DestinationLinesToRequestRowInImmediateFlip);
4251 	dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
4252 	dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",
4253 			__func__, TimeForFetchingRowInVBlankImmediateFlip);
4254 	dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
4255 	dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
4256 #endif
4257 } // CalculateFlipSchedule
4258 
dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(struct vba_vars_st * v,unsigned int PrefetchMode,double DCFCLK,double ReturnBW,SOCParametersList mmSOCParameters,double SOCCLK,double DCFClkDeepSleep,unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],double SwathWidthY[],double SwathWidthC[],unsigned int DPPPerSurface[],double BytePerPixelDETY[],double BytePerPixelDETC[],double DSTXAfterScaler[],double DSTYAfterScaler[],bool UnboundedRequestEnabled,unsigned int CompressedBufferSizeInkByte,enum clock_change_support * DRAMClockChangeSupport,double MaxActiveDRAMClockChangeLatencySupported[],unsigned int SubViewportLinesNeededInMALL[],enum dm_fclock_change_support * FCLKChangeSupport,double * MinActiveFCLKChangeLatencySupported,bool * USRRetrainingSupport,double ActiveDRAMClockChangeLatencyMargin[])4259 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
4260 		struct vba_vars_st *v,
4261 		unsigned int PrefetchMode,
4262 		double DCFCLK,
4263 		double ReturnBW,
4264 		SOCParametersList mmSOCParameters,
4265 		double SOCCLK,
4266 		double DCFClkDeepSleep,
4267 		unsigned int DETBufferSizeY[],
4268 		unsigned int DETBufferSizeC[],
4269 		unsigned int SwathHeightY[],
4270 		unsigned int SwathHeightC[],
4271 		double SwathWidthY[],
4272 		double SwathWidthC[],
4273 		unsigned int DPPPerSurface[],
4274 		double BytePerPixelDETY[],
4275 		double BytePerPixelDETC[],
4276 		double DSTXAfterScaler[],
4277 		double DSTYAfterScaler[],
4278 		bool UnboundedRequestEnabled,
4279 		unsigned int CompressedBufferSizeInkByte,
4280 
4281 		/* Output */
4282 		enum clock_change_support *DRAMClockChangeSupport,
4283 		double MaxActiveDRAMClockChangeLatencySupported[],
4284 		unsigned int SubViewportLinesNeededInMALL[],
4285 		enum dm_fclock_change_support *FCLKChangeSupport,
4286 		double *MinActiveFCLKChangeLatencySupported,
4287 		bool *USRRetrainingSupport,
4288 		double ActiveDRAMClockChangeLatencyMargin[])
4289 {
4290 	unsigned int i, j, k;
4291 	unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
4292 	unsigned int DRAMClockChangeSupportNumber = 0;
4293 	unsigned int LastSurfaceWithoutMargin = 0;
4294 	unsigned int DRAMClockChangeMethod = 0;
4295 	bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
4296 	double MinActiveFCLKChangeMargin = 0.;
4297 	double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
4298 	double ActiveClockChangeLatencyHidingY;
4299 	double ActiveClockChangeLatencyHidingC;
4300 	double ActiveClockChangeLatencyHiding;
4301 	double EffectiveDETBufferSizeY;
4302 	double     ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
4303 	double     USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
4304 	double TotalPixelBW = 0.0;
4305 	bool    SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
4306 	double     EffectiveLBLatencyHidingY;
4307 	double     EffectiveLBLatencyHidingC;
4308 	double     LinesInDETY[DC__NUM_DPP__MAX];
4309 	double     LinesInDETC[DC__NUM_DPP__MAX];
4310 	unsigned int    LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
4311 	unsigned int    LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
4312 	double     FullDETBufferingTimeY;
4313 	double     FullDETBufferingTimeC;
4314 	double     WritebackDRAMClockChangeLatencyMargin;
4315 	double     WritebackFCLKChangeLatencyMargin;
4316 	double     WritebackLatencyHiding;
4317 	bool    SameTimingForFCLKChange;
4318 
4319 	unsigned int    TotalActiveWriteback = 0;
4320 	unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
4321 	unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
4322 
4323 	v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
4324 	v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
4325 			+ mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
4326 	v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark;
4327 	v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark;
4328 	v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
4329 			+ 10 / DCFClkDeepSleep;
4330 	v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
4331 			+ 10 / DCFClkDeepSleep;
4332 	v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
4333 			+ 10 / DCFClkDeepSleep;
4334 	v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
4335 			+ mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
4336 
4337 #ifdef __DML_VBA_DEBUG__
4338 	dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
4339 	dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
4340 	dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
4341 	dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark);
4342 	dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark);
4343 	dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark);
4344 	dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark);
4345 	dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark);
4346 	dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark);
4347 	dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark);
4348 	dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
4349 			__func__, v->Watermark.Z8StutterEnterPlusExitWatermark);
4350 #endif
4351 
4352 
4353 	TotalActiveWriteback = 0;
4354 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4355 		if (v->WritebackEnable[k] == true)
4356 			TotalActiveWriteback = TotalActiveWriteback + 1;
4357 	}
4358 
4359 	if (TotalActiveWriteback <= 1) {
4360 		v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
4361 	} else {
4362 		v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
4363 				+ v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4364 	}
4365 	if (v->USRRetrainingRequiredFinal)
4366 		v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
4367 				+ mmSOCParameters.USRRetrainingLatency;
4368 
4369 	if (TotalActiveWriteback <= 1) {
4370 		v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4371 				+ mmSOCParameters.WritebackLatency;
4372 		v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4373 				+ mmSOCParameters.WritebackLatency;
4374 	} else {
4375 		v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4376 				+ mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4377 		v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4378 				+ mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK;
4379 	}
4380 
4381 	if (v->USRRetrainingRequiredFinal)
4382 		v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
4383 				+ mmSOCParameters.USRRetrainingLatency;
4384 
4385 	if (v->USRRetrainingRequiredFinal)
4386 		v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark
4387 				+ mmSOCParameters.USRRetrainingLatency;
4388 
4389 #ifdef __DML_VBA_DEBUG__
4390 	dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
4391 			__func__, v->Watermark.WritebackDRAMClockChangeWatermark);
4392 	dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark);
4393 	dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark);
4394 	dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal);
4395 	dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
4396 #endif
4397 
4398 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4399 		TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] +
4400 				SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]);
4401 	}
4402 
4403 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4404 
4405 		LBLatencyHidingSourceLinesY[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
4406 		LBLatencyHidingSourceLinesC[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
4407 
4408 
4409 #ifdef __DML_VBA_DEBUG__
4410 		dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines);
4411 		dml_print("DML::%s: k=%d, v->LineBufferSizeFinal     = %d\n", __func__, k, v->LineBufferSizeFinal);
4412 		dml_print("DML::%s: k=%d, v->LBBitPerPixel      = %d\n", __func__, k, v->LBBitPerPixel[k]);
4413 		dml_print("DML::%s: k=%d, v->HRatio             = %f\n", __func__, k, v->HRatio[k]);
4414 		dml_print("DML::%s: k=%d, v->vtaps              = %d\n", __func__, k, v->vtaps[k]);
4415 #endif
4416 
4417 		EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
4418 		EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
4419 		EffectiveDETBufferSizeY = DETBufferSizeY[k];
4420 
4421 		if (UnboundedRequestEnabled) {
4422 			EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
4423 					+ CompressedBufferSizeInkByte * 1024
4424 							* (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k])
4425 							/ (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
4426 		}
4427 
4428 		LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
4429 		LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
4430 		FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
4431 
4432 		ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
4433 				- (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k];
4434 
4435 		if (v->NumberOfActiveSurfaces > 1) {
4436 			ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
4437 					- (1.0 - 1.0 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k]
4438 							/ v->PixelClock[k] / v->VRatio[k];
4439 		}
4440 
4441 		if (BytePerPixelDETC[k] > 0) {
4442 			LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
4443 			LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]);
4444 			FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k])
4445 					/ v->VRatioChroma[k];
4446 			ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
4447 					- (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k]
4448 							/ v->PixelClock[k];
4449 			if (v->NumberOfActiveSurfaces > 1) {
4450 				ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC
4451 						- (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k]
4452 								/ v->PixelClock[k] / v->VRatioChroma[k];
4453 			}
4454 			ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY,
4455 					ActiveClockChangeLatencyHidingC);
4456 		} else {
4457 			ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY;
4458 		}
4459 
4460 		ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4461 				- v->Watermark.DRAMClockChangeWatermark;
4462 		ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4463 				- v->Watermark.FCLKChangeWatermark;
4464 		USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark;
4465 
4466 		if (v->WritebackEnable[k]) {
4467 			WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024
4468 					/ (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4469 							/ (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
4470 			if (v->WritebackPixelFormat[k] == dm_444_64)
4471 				WritebackLatencyHiding = WritebackLatencyHiding / 2;
4472 
4473 			WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding
4474 					- v->Watermark.WritebackDRAMClockChangeWatermark;
4475 
4476 			WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding
4477 					- v->Watermark.WritebackFCLKChangeWatermark;
4478 
4479 			ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
4480 					WritebackFCLKChangeLatencyMargin);
4481 			ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k],
4482 					WritebackDRAMClockChangeLatencyMargin);
4483 		}
4484 		MaxActiveDRAMClockChangeLatencySupported[k] =
4485 				(v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
4486 						0 :
4487 						(ActiveDRAMClockChangeLatencyMargin[k]
4488 								+ mmSOCParameters.DRAMClockChangeLatency);
4489 	}
4490 
4491 	for (i = 0; i < v->NumberOfActiveSurfaces; ++i) {
4492 		for (j = 0; j < v->NumberOfActiveSurfaces; ++j) {
4493 			if (i == j ||
4494 					(v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) ||
4495 					(v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) ||
4496 					(v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) ||
4497 					(v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] &&
4498 					v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] &&
4499 					v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
4500 					(v->DRRDisplay[i] || v->DRRDisplay[j]))) {
4501 				SynchronizedSurfaces[i][j] = true;
4502 			} else {
4503 				SynchronizedSurfaces[i][j] = false;
4504 			}
4505 		}
4506 	}
4507 
4508 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4509 		if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4510 				(!FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
4511 				ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) {
4512 			FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
4513 			MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k];
4514 			SurfaceWithMinActiveFCLKChangeMargin = k;
4515 		}
4516 	}
4517 
4518 	*MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
4519 
4520 	SameTimingForFCLKChange = true;
4521 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4522 		if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) {
4523 			if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4524 					(SameTimingForFCLKChange ||
4525 					ActiveFCLKChangeLatencyMargin[k] <
4526 					SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
4527 				SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k];
4528 			}
4529 			SameTimingForFCLKChange = false;
4530 		}
4531 	}
4532 
4533 	if (MinActiveFCLKChangeMargin > 0) {
4534 		*FCLKChangeSupport = dm_fclock_change_vactive;
4535 	} else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
4536 			(PrefetchMode <= 1)) {
4537 		*FCLKChangeSupport = dm_fclock_change_vblank;
4538 	} else {
4539 		*FCLKChangeSupport = dm_fclock_change_unsupported;
4540 	}
4541 
4542 	*USRRetrainingSupport = true;
4543 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4544 		if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4545 				(USRRetrainingLatencyMargin[k] < 0)) {
4546 			*USRRetrainingSupport = false;
4547 		}
4548 	}
4549 
4550 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4551 		if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
4552 				v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
4553 				v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
4554 				ActiveDRAMClockChangeLatencyMargin[k] < 0) {
4555 			if (PrefetchMode > 0) {
4556 				DRAMClockChangeSupportNumber = 2;
4557 			} else if (DRAMClockChangeSupportNumber == 0) {
4558 				DRAMClockChangeSupportNumber = 1;
4559 				LastSurfaceWithoutMargin = k;
4560 			} else if (DRAMClockChangeSupportNumber == 1 &&
4561 					!SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) {
4562 				DRAMClockChangeSupportNumber = 2;
4563 			}
4564 		}
4565 	}
4566 
4567 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4568 		if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
4569 			DRAMClockChangeMethod = 1;
4570 		else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
4571 			DRAMClockChangeMethod = 2;
4572 	}
4573 
4574 	if (DRAMClockChangeMethod == 0) {
4575 		if (DRAMClockChangeSupportNumber == 0)
4576 			*DRAMClockChangeSupport = dm_dram_clock_change_vactive;
4577 		else if (DRAMClockChangeSupportNumber == 1)
4578 			*DRAMClockChangeSupport = dm_dram_clock_change_vblank;
4579 		else
4580 			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4581 	} else if (DRAMClockChangeMethod == 1) {
4582 		if (DRAMClockChangeSupportNumber == 0)
4583 			*DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
4584 		else if (DRAMClockChangeSupportNumber == 1)
4585 			*DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
4586 		else
4587 			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4588 	} else {
4589 		if (DRAMClockChangeSupportNumber == 0)
4590 			*DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
4591 		else if (DRAMClockChangeSupportNumber == 1)
4592 			*DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
4593 		else
4594 			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4595 	}
4596 
4597 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4598 		unsigned int dst_y_pstate;
4599 		unsigned int src_y_pstate_l;
4600 		unsigned int src_y_pstate_c;
4601 		unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
4602 
4603 		dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), 1);
4604 		src_y_pstate_l = dml_ceil(dst_y_pstate * v->VRatio[k], SwathHeightY[k]);
4605 		src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k];
4606 		sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k];
4607 
4608 #ifdef __DML_VBA_DEBUG__
4609 dml_print("DML::%s: k=%d, DETBufferSizeY               = %d\n", __func__, k, DETBufferSizeY[k]);
4610 dml_print("DML::%s: k=%d, BytePerPixelDETY             = %f\n", __func__, k, BytePerPixelDETY[k]);
4611 dml_print("DML::%s: k=%d, SwathWidthY                  = %d\n", __func__, k, SwathWidthY[k]);
4612 dml_print("DML::%s: k=%d, SwathHeightY                 = %d\n", __func__, k, SwathHeightY[k]);
4613 dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY  = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]);
4614 dml_print("DML::%s: k=%d, dst_y_pstate      = %d\n", __func__, k, dst_y_pstate);
4615 dml_print("DML::%s: k=%d, src_y_pstate_l    = %d\n", __func__, k, src_y_pstate_l);
4616 dml_print("DML::%s: k=%d, src_y_ahead_l     = %d\n", __func__, k, src_y_ahead_l);
4617 dml_print("DML::%s: k=%d, v->meta_row_height   = %d\n", __func__, k, v->meta_row_height[k]);
4618 dml_print("DML::%s: k=%d, sub_vp_lines_l    = %d\n", __func__, k, sub_vp_lines_l);
4619 #endif
4620 		SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
4621 
4622 		if (BytePerPixelDETC[k] > 0) {
4623 			src_y_pstate_c = dml_ceil(dst_y_pstate * v->VRatioChroma[k], SwathHeightC[k]);
4624 			src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k];
4625 			sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k];
4626 			SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
4627 
4628 #ifdef __DML_VBA_DEBUG__
4629 dml_print("DML::%s: k=%d, src_y_pstate_c            = %d\n", __func__, k, src_y_pstate_c);
4630 dml_print("DML::%s: k=%d, src_y_ahead_c             = %d\n", __func__, k, src_y_ahead_c);
4631 dml_print("DML::%s: k=%d, v->meta_row_height_chroma    = %d\n", __func__, k, v->meta_row_height_chroma[k]);
4632 dml_print("DML::%s: k=%d, sub_vp_lines_c            = %d\n", __func__, k, sub_vp_lines_c);
4633 #endif
4634 		}
4635 	}
4636 #ifdef __DML_VBA_DEBUG__
4637 	dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport);
4638 	dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport);
4639 	dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",
4640 			__func__, *MinActiveFCLKChangeLatencySupported);
4641 	dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport);
4642 #endif
4643 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
4644 
dml32_CalculateWriteBackDISPCLK(enum source_format_class WritebackPixelFormat,double PixelClock,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackHTaps,unsigned int WritebackVTaps,unsigned int WritebackSourceWidth,unsigned int WritebackDestinationWidth,unsigned int HTotal,unsigned int WritebackLineBufferSize,double DISPCLKDPPCLKVCOSpeed)4645 double dml32_CalculateWriteBackDISPCLK(
4646 		enum source_format_class WritebackPixelFormat,
4647 		double PixelClock,
4648 		double WritebackHRatio,
4649 		double WritebackVRatio,
4650 		unsigned int WritebackHTaps,
4651 		unsigned int WritebackVTaps,
4652 		unsigned int   WritebackSourceWidth,
4653 		unsigned int   WritebackDestinationWidth,
4654 		unsigned int HTotal,
4655 		unsigned int WritebackLineBufferSize,
4656 		double DISPCLKDPPCLKVCOSpeed)
4657 {
4658 	double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4659 
4660 	DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
4661 	DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
4662 	DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
4663 			WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
4664 	return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
4665 }
4666 
dml32_CalculateMinAndMaxPrefetchMode(enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal,unsigned int * MinPrefetchMode,unsigned int * MaxPrefetchMode)4667 void dml32_CalculateMinAndMaxPrefetchMode(
4668 		enum dm_prefetch_modes   AllowForPStateChangeOrStutterInVBlankFinal,
4669 		unsigned int             *MinPrefetchMode,
4670 		unsigned int             *MaxPrefetchMode)
4671 {
4672 	if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
4673 		*MinPrefetchMode = 3;
4674 		*MaxPrefetchMode = 3;
4675 	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
4676 		*MinPrefetchMode = 2;
4677 		*MaxPrefetchMode = 2;
4678 	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
4679 		*MinPrefetchMode = 1;
4680 		*MaxPrefetchMode = 1;
4681 	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
4682 		*MinPrefetchMode = 0;
4683 		*MaxPrefetchMode = 0;
4684 	} else {
4685 		*MinPrefetchMode = 0;
4686 		*MaxPrefetchMode = 3;
4687 	}
4688 } // CalculateMinAndMaxPrefetchMode
4689 
dml32_CalculatePixelDeliveryTimes(unsigned int NumberOfActiveSurfaces,double VRatio[],double VRatioChroma[],double VRatioPrefetchY[],double VRatioPrefetchC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],unsigned int DPPPerSurface[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],unsigned int BytePerPixelC[],enum dm_rotation_angle SourceRotation[],unsigned int NumberOfCursors[],unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],unsigned int BlockWidth256BytesY[],unsigned int BlockHeight256BytesY[],unsigned int BlockWidth256BytesC[],unsigned int BlockHeight256BytesC[],double DisplayPipeLineDeliveryTimeLuma[],double DisplayPipeLineDeliveryTimeChroma[],double DisplayPipeLineDeliveryTimeLumaPrefetch[],double DisplayPipeLineDeliveryTimeChromaPrefetch[],double DisplayPipeRequestDeliveryTimeLuma[],double DisplayPipeRequestDeliveryTimeChroma[],double DisplayPipeRequestDeliveryTimeLumaPrefetch[],double DisplayPipeRequestDeliveryTimeChromaPrefetch[],double CursorRequestDeliveryTime[],double CursorRequestDeliveryTimePrefetch[])4690 void dml32_CalculatePixelDeliveryTimes(
4691 		unsigned int             NumberOfActiveSurfaces,
4692 		double              VRatio[],
4693 		double              VRatioChroma[],
4694 		double              VRatioPrefetchY[],
4695 		double              VRatioPrefetchC[],
4696 		unsigned int             swath_width_luma_ub[],
4697 		unsigned int             swath_width_chroma_ub[],
4698 		unsigned int             DPPPerSurface[],
4699 		double              HRatio[],
4700 		double              HRatioChroma[],
4701 		double              PixelClock[],
4702 		double              PSCL_THROUGHPUT[],
4703 		double              PSCL_THROUGHPUT_CHROMA[],
4704 		double              Dppclk[],
4705 		unsigned int             BytePerPixelC[],
4706 		enum dm_rotation_angle   SourceRotation[],
4707 		unsigned int             NumberOfCursors[],
4708 		unsigned int             CursorWidth[][DC__NUM_CURSOR__MAX],
4709 		unsigned int             CursorBPP[][DC__NUM_CURSOR__MAX],
4710 		unsigned int             BlockWidth256BytesY[],
4711 		unsigned int             BlockHeight256BytesY[],
4712 		unsigned int             BlockWidth256BytesC[],
4713 		unsigned int             BlockHeight256BytesC[],
4714 
4715 		/* Output */
4716 		double              DisplayPipeLineDeliveryTimeLuma[],
4717 		double              DisplayPipeLineDeliveryTimeChroma[],
4718 		double              DisplayPipeLineDeliveryTimeLumaPrefetch[],
4719 		double              DisplayPipeLineDeliveryTimeChromaPrefetch[],
4720 		double              DisplayPipeRequestDeliveryTimeLuma[],
4721 		double              DisplayPipeRequestDeliveryTimeChroma[],
4722 		double              DisplayPipeRequestDeliveryTimeLumaPrefetch[],
4723 		double              DisplayPipeRequestDeliveryTimeChromaPrefetch[],
4724 		double              CursorRequestDeliveryTime[],
4725 		double              CursorRequestDeliveryTimePrefetch[])
4726 {
4727 	double   req_per_swath_ub;
4728 	unsigned int k;
4729 
4730 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4731 
4732 #ifdef __DML_VBA_DEBUG__
4733 		dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
4734 		dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
4735 		dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
4736 		dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
4737 		dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
4738 		dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
4739 		dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
4740 		dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
4741 		dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
4742 		dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]);
4743 		dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]);
4744 #endif
4745 
4746 		if (VRatio[k] <= 1) {
4747 			DisplayPipeLineDeliveryTimeLuma[k] =
4748 					swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4749 		} else {
4750 			DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4751 		}
4752 
4753 		if (BytePerPixelC[k] == 0) {
4754 			DisplayPipeLineDeliveryTimeChroma[k] = 0;
4755 		} else {
4756 			if (VRatioChroma[k] <= 1) {
4757 				DisplayPipeLineDeliveryTimeChroma[k] =
4758 					swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4759 			} else {
4760 				DisplayPipeLineDeliveryTimeChroma[k] =
4761 					swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4762 			}
4763 		}
4764 
4765 		if (VRatioPrefetchY[k] <= 1) {
4766 			DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4767 					swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4768 		} else {
4769 			DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4770 					swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4771 		}
4772 
4773 		if (BytePerPixelC[k] == 0) {
4774 			DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
4775 		} else {
4776 			if (VRatioPrefetchC[k] <= 1) {
4777 				DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
4778 						DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4779 			} else {
4780 				DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
4781 						swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4782 			}
4783 		}
4784 #ifdef __DML_VBA_DEBUG__
4785 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",
4786 				__func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
4787 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",
4788 				__func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
4789 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",
4790 				__func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
4791 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",
4792 				__func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
4793 #endif
4794 	}
4795 
4796 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4797 		if (!IsVertical(SourceRotation[k]))
4798 			req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
4799 		else
4800 			req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
4801 #ifdef __DML_VBA_DEBUG__
4802 		dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
4803 #endif
4804 
4805 		DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
4806 		DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
4807 				DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
4808 		if (BytePerPixelC[k] == 0) {
4809 			DisplayPipeRequestDeliveryTimeChroma[k] = 0;
4810 			DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
4811 		} else {
4812 			if (!IsVertical(SourceRotation[k]))
4813 				req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
4814 			else
4815 				req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
4816 #ifdef __DML_VBA_DEBUG__
4817 			dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
4818 #endif
4819 			DisplayPipeRequestDeliveryTimeChroma[k] =
4820 					DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
4821 			DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
4822 					DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
4823 		}
4824 #ifdef __DML_VBA_DEBUG__
4825 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",
4826 				__func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
4827 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",
4828 				__func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
4829 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",
4830 				__func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
4831 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",
4832 				__func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
4833 #endif
4834 	}
4835 
4836 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4837 		unsigned int cursor_req_per_width;
4838 
4839 		cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
4840 				256.0 / 8.0, 1.0);
4841 		if (NumberOfCursors[k] > 0) {
4842 			if (VRatio[k] <= 1) {
4843 				CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4844 						HRatio[k] / PixelClock[k] / cursor_req_per_width;
4845 			} else {
4846 				CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4847 						PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4848 			}
4849 			if (VRatioPrefetchY[k] <= 1) {
4850 				CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4851 						HRatio[k] / PixelClock[k] / cursor_req_per_width;
4852 			} else {
4853 				CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4854 						PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4855 			}
4856 		} else {
4857 			CursorRequestDeliveryTime[k] = 0;
4858 			CursorRequestDeliveryTimePrefetch[k] = 0;
4859 		}
4860 #ifdef __DML_VBA_DEBUG__
4861 		dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",
4862 				__func__, k, NumberOfCursors[k]);
4863 		dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",
4864 				__func__, k, CursorRequestDeliveryTime[k]);
4865 		dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",
4866 				__func__, k, CursorRequestDeliveryTimePrefetch[k]);
4867 #endif
4868 	}
4869 } // CalculatePixelDeliveryTimes
4870 
dml32_CalculateMetaAndPTETimes(bool use_one_row_for_frame[],unsigned int NumberOfActiveSurfaces,bool GPUVMEnable,unsigned int MetaChunkSize,unsigned int MinMetaChunkSizeBytes,unsigned int HTotal[],double VRatio[],double VRatioChroma[],double DestinationLinesToRequestRowInVBlank[],double DestinationLinesToRequestRowInImmediateFlip[],bool DCCEnable[],double PixelClock[],unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],enum dm_rotation_angle SourceRotation[],unsigned int dpte_row_height[],unsigned int dpte_row_height_chroma[],unsigned int meta_row_width[],unsigned int meta_row_width_chroma[],unsigned int meta_row_height[],unsigned int meta_row_height_chroma[],unsigned int meta_req_width[],unsigned int meta_req_width_chroma[],unsigned int meta_req_height[],unsigned int meta_req_height_chroma[],unsigned int dpte_group_bytes[],unsigned int PTERequestSizeY[],unsigned int PTERequestSizeC[],unsigned int PixelPTEReqWidthY[],unsigned int PixelPTEReqHeightY[],unsigned int PixelPTEReqWidthC[],unsigned int PixelPTEReqHeightC[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],double DST_Y_PER_PTE_ROW_NOM_L[],double DST_Y_PER_PTE_ROW_NOM_C[],double DST_Y_PER_META_ROW_NOM_L[],double DST_Y_PER_META_ROW_NOM_C[],double TimePerMetaChunkNominal[],double TimePerChromaMetaChunkNominal[],double TimePerMetaChunkVBlank[],double TimePerChromaMetaChunkVBlank[],double TimePerMetaChunkFlip[],double TimePerChromaMetaChunkFlip[],double time_per_pte_group_nom_luma[],double time_per_pte_group_vblank_luma[],double time_per_pte_group_flip_luma[],double time_per_pte_group_nom_chroma[],double time_per_pte_group_vblank_chroma[],double time_per_pte_group_flip_chroma[])4871 void dml32_CalculateMetaAndPTETimes(
4872 		bool use_one_row_for_frame[],
4873 		unsigned int NumberOfActiveSurfaces,
4874 		bool GPUVMEnable,
4875 		unsigned int MetaChunkSize,
4876 		unsigned int MinMetaChunkSizeBytes,
4877 		unsigned int    HTotal[],
4878 		double  VRatio[],
4879 		double  VRatioChroma[],
4880 		double  DestinationLinesToRequestRowInVBlank[],
4881 		double  DestinationLinesToRequestRowInImmediateFlip[],
4882 		bool DCCEnable[],
4883 		double  PixelClock[],
4884 		unsigned int BytePerPixelY[],
4885 		unsigned int BytePerPixelC[],
4886 		enum dm_rotation_angle SourceRotation[],
4887 		unsigned int dpte_row_height[],
4888 		unsigned int dpte_row_height_chroma[],
4889 		unsigned int meta_row_width[],
4890 		unsigned int meta_row_width_chroma[],
4891 		unsigned int meta_row_height[],
4892 		unsigned int meta_row_height_chroma[],
4893 		unsigned int meta_req_width[],
4894 		unsigned int meta_req_width_chroma[],
4895 		unsigned int meta_req_height[],
4896 		unsigned int meta_req_height_chroma[],
4897 		unsigned int dpte_group_bytes[],
4898 		unsigned int    PTERequestSizeY[],
4899 		unsigned int    PTERequestSizeC[],
4900 		unsigned int    PixelPTEReqWidthY[],
4901 		unsigned int    PixelPTEReqHeightY[],
4902 		unsigned int    PixelPTEReqWidthC[],
4903 		unsigned int    PixelPTEReqHeightC[],
4904 		unsigned int    dpte_row_width_luma_ub[],
4905 		unsigned int    dpte_row_width_chroma_ub[],
4906 
4907 		/* Output */
4908 		double DST_Y_PER_PTE_ROW_NOM_L[],
4909 		double DST_Y_PER_PTE_ROW_NOM_C[],
4910 		double DST_Y_PER_META_ROW_NOM_L[],
4911 		double DST_Y_PER_META_ROW_NOM_C[],
4912 		double TimePerMetaChunkNominal[],
4913 		double TimePerChromaMetaChunkNominal[],
4914 		double TimePerMetaChunkVBlank[],
4915 		double TimePerChromaMetaChunkVBlank[],
4916 		double TimePerMetaChunkFlip[],
4917 		double TimePerChromaMetaChunkFlip[],
4918 		double time_per_pte_group_nom_luma[],
4919 		double time_per_pte_group_vblank_luma[],
4920 		double time_per_pte_group_flip_luma[],
4921 		double time_per_pte_group_nom_chroma[],
4922 		double time_per_pte_group_vblank_chroma[],
4923 		double time_per_pte_group_flip_chroma[])
4924 {
4925 	unsigned int   meta_chunk_width;
4926 	unsigned int   min_meta_chunk_width;
4927 	unsigned int   meta_chunk_per_row_int;
4928 	unsigned int   meta_row_remainder;
4929 	unsigned int   meta_chunk_threshold;
4930 	unsigned int   meta_chunks_per_row_ub;
4931 	unsigned int   meta_chunk_width_chroma;
4932 	unsigned int   min_meta_chunk_width_chroma;
4933 	unsigned int   meta_chunk_per_row_int_chroma;
4934 	unsigned int   meta_row_remainder_chroma;
4935 	unsigned int   meta_chunk_threshold_chroma;
4936 	unsigned int   meta_chunks_per_row_ub_chroma;
4937 	unsigned int   dpte_group_width_luma;
4938 	unsigned int   dpte_groups_per_row_luma_ub;
4939 	unsigned int   dpte_group_width_chroma;
4940 	unsigned int   dpte_groups_per_row_chroma_ub;
4941 	unsigned int k;
4942 
4943 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4944 		DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
4945 		if (BytePerPixelC[k] == 0)
4946 			DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
4947 		else
4948 			DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
4949 		DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
4950 		if (BytePerPixelC[k] == 0)
4951 			DST_Y_PER_META_ROW_NOM_C[k] = 0;
4952 		else
4953 			DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
4954 	}
4955 
4956 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4957 		if (DCCEnable[k] == true) {
4958 			meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
4959 			min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
4960 			meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
4961 			meta_row_remainder = meta_row_width[k] % meta_chunk_width;
4962 			if (!IsVertical(SourceRotation[k]))
4963 				meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
4964 			else
4965 				meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
4966 
4967 			if (meta_row_remainder <= meta_chunk_threshold)
4968 				meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
4969 			else
4970 				meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
4971 
4972 			TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
4973 					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4974 			TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4975 					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4976 			TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4977 					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4978 			if (BytePerPixelC[k] == 0) {
4979 				TimePerChromaMetaChunkNominal[k] = 0;
4980 				TimePerChromaMetaChunkVBlank[k] = 0;
4981 				TimePerChromaMetaChunkFlip[k] = 0;
4982 			} else {
4983 				meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
4984 						meta_row_height_chroma[k];
4985 				min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
4986 						meta_row_height_chroma[k];
4987 				meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
4988 						meta_chunk_width_chroma;
4989 				meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
4990 				if (!IsVertical(SourceRotation[k])) {
4991 					meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4992 							meta_req_width_chroma[k];
4993 				} else {
4994 					meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4995 							meta_req_height_chroma[k];
4996 				}
4997 				if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
4998 					meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
4999 				else
5000 					meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
5001 
5002 				TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
5003 						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5004 				TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
5005 						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5006 				TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5007 						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5008 			}
5009 		} else {
5010 			TimePerMetaChunkNominal[k] = 0;
5011 			TimePerMetaChunkVBlank[k] = 0;
5012 			TimePerMetaChunkFlip[k] = 0;
5013 			TimePerChromaMetaChunkNominal[k] = 0;
5014 			TimePerChromaMetaChunkVBlank[k] = 0;
5015 			TimePerChromaMetaChunkFlip[k] = 0;
5016 		}
5017 	}
5018 
5019 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5020 		if (GPUVMEnable == true) {
5021 			if (!IsVertical(SourceRotation[k])) {
5022 				dpte_group_width_luma = (double) dpte_group_bytes[k] /
5023 						(double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
5024 			} else {
5025 				dpte_group_width_luma = (double) dpte_group_bytes[k] /
5026 						(double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
5027 			}
5028 
5029 			if (use_one_row_for_frame[k]) {
5030 				dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5031 						(double) dpte_group_width_luma / 2.0, 1.0);
5032 			} else {
5033 				dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5034 						(double) dpte_group_width_luma, 1.0);
5035 			}
5036 #ifdef __DML_VBA_DEBUG__
5037 			dml_print("DML::%s: k=%0d, use_one_row_for_frame        = %d\n",
5038 					__func__, k, use_one_row_for_frame[k]);
5039 			dml_print("DML::%s: k=%0d, dpte_group_bytes             = %d\n",
5040 					__func__, k, dpte_group_bytes[k]);
5041 			dml_print("DML::%s: k=%0d, PTERequestSizeY              = %d\n",
5042 					__func__, k, PTERequestSizeY[k]);
5043 			dml_print("DML::%s: k=%0d, PixelPTEReqWidthY            = %d\n",
5044 					__func__, k, PixelPTEReqWidthY[k]);
5045 			dml_print("DML::%s: k=%0d, PixelPTEReqHeightY           = %d\n",
5046 					__func__, k, PixelPTEReqHeightY[k]);
5047 			dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub       = %d\n",
5048 					__func__, k, dpte_row_width_luma_ub[k]);
5049 			dml_print("DML::%s: k=%0d, dpte_group_width_luma        = %d\n",
5050 					__func__, k, dpte_group_width_luma);
5051 			dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub  = %d\n",
5052 					__func__, k, dpte_groups_per_row_luma_ub);
5053 #endif
5054 
5055 			time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
5056 					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5057 			time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
5058 					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5059 			time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5060 					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5061 			if (BytePerPixelC[k] == 0) {
5062 				time_per_pte_group_nom_chroma[k] = 0;
5063 				time_per_pte_group_vblank_chroma[k] = 0;
5064 				time_per_pte_group_flip_chroma[k] = 0;
5065 			} else {
5066 				if (!IsVertical(SourceRotation[k])) {
5067 					dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5068 							(double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5069 				} else {
5070 					dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5071 							(double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5072 				}
5073 
5074 				if (use_one_row_for_frame[k]) {
5075 					dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5076 							(double) dpte_group_width_chroma / 2.0, 1.0);
5077 				} else {
5078 					dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5079 							(double) dpte_group_width_chroma, 1.0);
5080 				}
5081 #ifdef __DML_VBA_DEBUG__
5082 				dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub        = %d\n",
5083 						__func__, k, dpte_row_width_chroma_ub[k]);
5084 				dml_print("DML::%s: k=%0d, dpte_group_width_chroma        = %d\n",
5085 						__func__, k, dpte_group_width_chroma);
5086 				dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub  = %d\n",
5087 						__func__, k, dpte_groups_per_row_chroma_ub);
5088 #endif
5089 				time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
5090 						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5091 				time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
5092 						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5093 				time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5094 						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5095 			}
5096 		} else {
5097 			time_per_pte_group_nom_luma[k] = 0;
5098 			time_per_pte_group_vblank_luma[k] = 0;
5099 			time_per_pte_group_flip_luma[k] = 0;
5100 			time_per_pte_group_nom_chroma[k] = 0;
5101 			time_per_pte_group_vblank_chroma[k] = 0;
5102 			time_per_pte_group_flip_chroma[k] = 0;
5103 		}
5104 #ifdef __DML_VBA_DEBUG__
5105 		dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank         = %f\n",
5106 				__func__, k, DestinationLinesToRequestRowInVBlank[k]);
5107 		dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip  = %f\n",
5108 				__func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
5109 		dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L                      = %f\n",
5110 				__func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
5111 		dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C                      = %f\n",
5112 				__func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
5113 		dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L                     = %f\n",
5114 				__func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
5115 		dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C                     = %f\n",
5116 				__func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
5117 		dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal          = %f\n",
5118 				__func__, k, TimePerMetaChunkNominal[k]);
5119 		dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank           = %f\n",
5120 				__func__, k, TimePerMetaChunkVBlank[k]);
5121 		dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip             = %f\n",
5122 				__func__, k, TimePerMetaChunkFlip[k]);
5123 		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal    = %f\n",
5124 				__func__, k, TimePerChromaMetaChunkNominal[k]);
5125 		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank     = %f\n",
5126 				__func__, k, TimePerChromaMetaChunkVBlank[k]);
5127 		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip       = %f\n",
5128 				__func__, k, TimePerChromaMetaChunkFlip[k]);
5129 		dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma      = %f\n",
5130 				__func__, k, time_per_pte_group_nom_luma[k]);
5131 		dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma   = %f\n",
5132 				__func__, k, time_per_pte_group_vblank_luma[k]);
5133 		dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma     = %f\n",
5134 				__func__, k, time_per_pte_group_flip_luma[k]);
5135 		dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma    = %f\n",
5136 				__func__, k, time_per_pte_group_nom_chroma[k]);
5137 		dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",
5138 				__func__, k, time_per_pte_group_vblank_chroma[k]);
5139 		dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma   = %f\n",
5140 				__func__, k, time_per_pte_group_flip_chroma[k]);
5141 #endif
5142 	}
5143 } // CalculateMetaAndPTETimes
5144 
dml32_CalculateVMGroupAndRequestTimes(unsigned int NumberOfActiveSurfaces,bool GPUVMEnable,unsigned int GPUVMMaxPageTableLevels,unsigned int HTotal[],unsigned int BytePerPixelC[],double DestinationLinesToRequestVMInVBlank[],double DestinationLinesToRequestVMInImmediateFlip[],bool DCCEnable[],double PixelClock[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],unsigned int vm_group_bytes[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],unsigned int meta_pte_bytes_per_frame_ub_l[],unsigned int meta_pte_bytes_per_frame_ub_c[],double TimePerVMGroupVBlank[],double TimePerVMGroupFlip[],double TimePerVMRequestVBlank[],double TimePerVMRequestFlip[])5145 void dml32_CalculateVMGroupAndRequestTimes(
5146 		unsigned int     NumberOfActiveSurfaces,
5147 		bool     GPUVMEnable,
5148 		unsigned int     GPUVMMaxPageTableLevels,
5149 		unsigned int     HTotal[],
5150 		unsigned int     BytePerPixelC[],
5151 		double      DestinationLinesToRequestVMInVBlank[],
5152 		double      DestinationLinesToRequestVMInImmediateFlip[],
5153 		bool     DCCEnable[],
5154 		double      PixelClock[],
5155 		unsigned int        dpte_row_width_luma_ub[],
5156 		unsigned int        dpte_row_width_chroma_ub[],
5157 		unsigned int     vm_group_bytes[],
5158 		unsigned int     dpde0_bytes_per_frame_ub_l[],
5159 		unsigned int     dpde0_bytes_per_frame_ub_c[],
5160 		unsigned int        meta_pte_bytes_per_frame_ub_l[],
5161 		unsigned int        meta_pte_bytes_per_frame_ub_c[],
5162 
5163 		/* Output */
5164 		double      TimePerVMGroupVBlank[],
5165 		double      TimePerVMGroupFlip[],
5166 		double      TimePerVMRequestVBlank[],
5167 		double      TimePerVMRequestFlip[])
5168 {
5169 	unsigned int k;
5170 	unsigned int   num_group_per_lower_vm_stage;
5171 	unsigned int   num_req_per_lower_vm_stage;
5172 
5173 #ifdef __DML_VBA_DEBUG__
5174 	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
5175 	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
5176 #endif
5177 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5178 
5179 #ifdef __DML_VBA_DEBUG__
5180 		dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]);
5181 		dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]);
5182 		dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",
5183 				__func__, k, dpde0_bytes_per_frame_ub_l[k]);
5184 		dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",
5185 				__func__, k, dpde0_bytes_per_frame_ub_c[k]);
5186 		dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",
5187 				__func__, k, meta_pte_bytes_per_frame_ub_l[k]);
5188 		dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",
5189 				__func__, k, meta_pte_bytes_per_frame_ub_c[k]);
5190 #endif
5191 
5192 		if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5193 			if (DCCEnable[k] == false) {
5194 				if (BytePerPixelC[k] > 0) {
5195 					num_group_per_lower_vm_stage = dml_ceil(
5196 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5197 							(double) (vm_group_bytes[k]), 1.0) +
5198 							dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5199 							(double) (vm_group_bytes[k]), 1.0);
5200 				} else {
5201 					num_group_per_lower_vm_stage = dml_ceil(
5202 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5203 							(double) (vm_group_bytes[k]), 1.0);
5204 				}
5205 			} else {
5206 				if (GPUVMMaxPageTableLevels == 1) {
5207 					if (BytePerPixelC[k] > 0) {
5208 						num_group_per_lower_vm_stage = dml_ceil(
5209 							(double) (meta_pte_bytes_per_frame_ub_l[k]) /
5210 							(double) (vm_group_bytes[k]), 1.0) +
5211 							dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5212 							(double) (vm_group_bytes[k]), 1.0);
5213 					} else {
5214 						num_group_per_lower_vm_stage = dml_ceil(
5215 								(double) (meta_pte_bytes_per_frame_ub_l[k]) /
5216 								(double) (vm_group_bytes[k]), 1.0);
5217 					}
5218 				} else {
5219 					if (BytePerPixelC[k] > 0) {
5220 						num_group_per_lower_vm_stage = 2 + dml_ceil(
5221 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5222 							(double) (vm_group_bytes[k]), 1) +
5223 							dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5224 							(double) (vm_group_bytes[k]), 1) +
5225 							dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) /
5226 							(double) (vm_group_bytes[k]), 1) +
5227 							dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5228 							(double) (vm_group_bytes[k]), 1);
5229 					} else {
5230 						num_group_per_lower_vm_stage = 1 + dml_ceil(
5231 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5232 							(double) (vm_group_bytes[k]), 1) + dml_ceil(
5233 							(double) (meta_pte_bytes_per_frame_ub_l[k]) /
5234 							(double) (vm_group_bytes[k]), 1);
5235 					}
5236 				}
5237 			}
5238 
5239 			if (DCCEnable[k] == false) {
5240 				if (BytePerPixelC[k] > 0) {
5241 					num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
5242 							dpde0_bytes_per_frame_ub_c[k] / 64;
5243 				} else {
5244 					num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5245 				}
5246 			} else {
5247 				if (GPUVMMaxPageTableLevels == 1) {
5248 					if (BytePerPixelC[k] > 0) {
5249 						num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
5250 								meta_pte_bytes_per_frame_ub_c[k] / 64;
5251 					} else {
5252 						num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5253 					}
5254 				} else {
5255 					if (BytePerPixelC[k] > 0) {
5256 						num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5257 								64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
5258 								meta_pte_bytes_per_frame_ub_l[k] / 64 +
5259 								meta_pte_bytes_per_frame_ub_c[k] / 64;
5260 					} else {
5261 						num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5262 								64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5263 					}
5264 				}
5265 			}
5266 
5267 			TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5268 					HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5269 			TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5270 					HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5271 			TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5272 					HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5273 			TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5274 					HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5275 
5276 			if (GPUVMMaxPageTableLevels > 2) {
5277 				TimePerVMGroupVBlank[k]    = TimePerVMGroupVBlank[k] / 2;
5278 				TimePerVMGroupFlip[k]      = TimePerVMGroupFlip[k] / 2;
5279 				TimePerVMRequestVBlank[k]  = TimePerVMRequestVBlank[k] / 2;
5280 				TimePerVMRequestFlip[k]    = TimePerVMRequestFlip[k] / 2;
5281 			}
5282 
5283 		} else {
5284 			TimePerVMGroupVBlank[k] = 0;
5285 			TimePerVMGroupFlip[k] = 0;
5286 			TimePerVMRequestVBlank[k] = 0;
5287 			TimePerVMRequestFlip[k] = 0;
5288 		}
5289 
5290 #ifdef __DML_VBA_DEBUG__
5291 		dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
5292 		dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
5293 		dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
5294 		dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
5295 #endif
5296 	}
5297 } // CalculateVMGroupAndRequestTimes
5298 
dml32_CalculateDCCConfiguration(bool DCCEnabled,bool DCCProgrammingAssumesScanDirectionUnknown,enum source_format_class SourcePixelFormat,unsigned int SurfaceWidthLuma,unsigned int SurfaceWidthChroma,unsigned int SurfaceHeightLuma,unsigned int SurfaceHeightChroma,unsigned int nomDETInKByte,unsigned int RequestHeight256ByteLuma,unsigned int RequestHeight256ByteChroma,enum dm_swizzle_mode TilingFormat,unsigned int BytePerPixelY,unsigned int BytePerPixelC,double BytePerPixelDETY,double BytePerPixelDETC,enum dm_rotation_angle SourceRotation,unsigned int * MaxUncompressedBlockLuma,unsigned int * MaxUncompressedBlockChroma,unsigned int * MaxCompressedBlockLuma,unsigned int * MaxCompressedBlockChroma,unsigned int * IndependentBlockLuma,unsigned int * IndependentBlockChroma)5299 void dml32_CalculateDCCConfiguration(
5300 		bool             DCCEnabled,
5301 		bool             DCCProgrammingAssumesScanDirectionUnknown,
5302 		enum source_format_class SourcePixelFormat,
5303 		unsigned int             SurfaceWidthLuma,
5304 		unsigned int             SurfaceWidthChroma,
5305 		unsigned int             SurfaceHeightLuma,
5306 		unsigned int             SurfaceHeightChroma,
5307 		unsigned int                nomDETInKByte,
5308 		unsigned int             RequestHeight256ByteLuma,
5309 		unsigned int             RequestHeight256ByteChroma,
5310 		enum dm_swizzle_mode     TilingFormat,
5311 		unsigned int             BytePerPixelY,
5312 		unsigned int             BytePerPixelC,
5313 		double              BytePerPixelDETY,
5314 		double              BytePerPixelDETC,
5315 		enum dm_rotation_angle   SourceRotation,
5316 		/* Output */
5317 		unsigned int        *MaxUncompressedBlockLuma,
5318 		unsigned int        *MaxUncompressedBlockChroma,
5319 		unsigned int        *MaxCompressedBlockLuma,
5320 		unsigned int        *MaxCompressedBlockChroma,
5321 		unsigned int        *IndependentBlockLuma,
5322 		unsigned int        *IndependentBlockChroma)
5323 {
5324 	typedef enum {
5325 		REQ_256Bytes,
5326 		REQ_128BytesNonContiguous,
5327 		REQ_128BytesContiguous,
5328 		REQ_NA
5329 	} RequestType;
5330 
5331 	RequestType   RequestLuma;
5332 	RequestType   RequestChroma;
5333 
5334 	unsigned int   segment_order_horz_contiguous_luma;
5335 	unsigned int   segment_order_horz_contiguous_chroma;
5336 	unsigned int   segment_order_vert_contiguous_luma;
5337 	unsigned int   segment_order_vert_contiguous_chroma;
5338 	unsigned int req128_horz_wc_l;
5339 	unsigned int req128_horz_wc_c;
5340 	unsigned int req128_vert_wc_l;
5341 	unsigned int req128_vert_wc_c;
5342 	unsigned int MAS_vp_horz_limit;
5343 	unsigned int MAS_vp_vert_limit;
5344 	unsigned int max_vp_horz_width;
5345 	unsigned int max_vp_vert_height;
5346 	unsigned int eff_surf_width_l;
5347 	unsigned int eff_surf_width_c;
5348 	unsigned int eff_surf_height_l;
5349 	unsigned int eff_surf_height_c;
5350 	unsigned int full_swath_bytes_horz_wc_l;
5351 	unsigned int full_swath_bytes_horz_wc_c;
5352 	unsigned int full_swath_bytes_vert_wc_l;
5353 	unsigned int full_swath_bytes_vert_wc_c;
5354 	unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
5355 
5356 	unsigned int   yuv420;
5357 	unsigned int   horz_div_l;
5358 	unsigned int   horz_div_c;
5359 	unsigned int   vert_div_l;
5360 	unsigned int   vert_div_c;
5361 
5362 	unsigned int     swath_buf_size;
5363 	double   detile_buf_vp_horz_limit;
5364 	double   detile_buf_vp_vert_limit;
5365 
5366 	yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
5367 			SourcePixelFormat == dm_420_12) ? 1 : 0);
5368 	horz_div_l = 1;
5369 	horz_div_c = 1;
5370 	vert_div_l = 1;
5371 	vert_div_c = 1;
5372 
5373 	if (BytePerPixelY == 1)
5374 		vert_div_l = 0;
5375 	if (BytePerPixelC == 1)
5376 		vert_div_c = 0;
5377 
5378 	if (BytePerPixelC == 0) {
5379 		swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
5380 		detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5381 				BytePerPixelY / (1 + horz_div_l));
5382 		detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5383 				(1 + vert_div_l));
5384 	} else {
5385 		swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
5386 		detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5387 				BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
5388 				BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
5389 		detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5390 				(1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
5391 				(1 + vert_div_c) / (1 + yuv420));
5392 	}
5393 
5394 	if (SourcePixelFormat == dm_420_10) {
5395 		detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
5396 		detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
5397 	}
5398 
5399 	detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
5400 	detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
5401 
5402 	MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
5403 	MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
5404 	max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
5405 	max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
5406 	eff_surf_width_l =  (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
5407 	eff_surf_width_c =  eff_surf_width_l / (1 + yuv420);
5408 	eff_surf_height_l =  (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
5409 	eff_surf_height_c =  eff_surf_height_l / (1 + yuv420);
5410 
5411 	full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
5412 	full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
5413 	if (BytePerPixelC > 0) {
5414 		full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
5415 		full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
5416 	} else {
5417 		full_swath_bytes_horz_wc_c = 0;
5418 		full_swath_bytes_vert_wc_c = 0;
5419 	}
5420 
5421 	if (SourcePixelFormat == dm_420_10) {
5422 		full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0);
5423 		full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0);
5424 		full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0);
5425 		full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0);
5426 	}
5427 
5428 	if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5429 		req128_horz_wc_l = 0;
5430 		req128_horz_wc_c = 0;
5431 	} else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
5432 			full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5433 		req128_horz_wc_l = 0;
5434 		req128_horz_wc_c = 1;
5435 	} else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
5436 			full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5437 		req128_horz_wc_l = 1;
5438 		req128_horz_wc_c = 0;
5439 	} else {
5440 		req128_horz_wc_l = 1;
5441 		req128_horz_wc_c = 1;
5442 	}
5443 
5444 	if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5445 		req128_vert_wc_l = 0;
5446 		req128_vert_wc_c = 0;
5447 	} else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
5448 			full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5449 		req128_vert_wc_l = 0;
5450 		req128_vert_wc_c = 1;
5451 	} else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
5452 			full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5453 		req128_vert_wc_l = 1;
5454 		req128_vert_wc_c = 0;
5455 	} else {
5456 		req128_vert_wc_l = 1;
5457 		req128_vert_wc_c = 1;
5458 	}
5459 
5460 	if (BytePerPixelY == 2) {
5461 		segment_order_horz_contiguous_luma = 0;
5462 		segment_order_vert_contiguous_luma = 1;
5463 	} else {
5464 		segment_order_horz_contiguous_luma = 1;
5465 		segment_order_vert_contiguous_luma = 0;
5466 	}
5467 
5468 	if (BytePerPixelC == 2) {
5469 		segment_order_horz_contiguous_chroma = 0;
5470 		segment_order_vert_contiguous_chroma = 1;
5471 	} else {
5472 		segment_order_horz_contiguous_chroma = 1;
5473 		segment_order_vert_contiguous_chroma = 0;
5474 	}
5475 #ifdef __DML_VBA_DEBUG__
5476 	dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled);
5477 	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
5478 	dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC);
5479 	dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l);
5480 	dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c);
5481 	dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l);
5482 	dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c);
5483 	dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma);
5484 	dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",
5485 			__func__, segment_order_horz_contiguous_chroma);
5486 #endif
5487 
5488 	if (DCCProgrammingAssumesScanDirectionUnknown == true) {
5489 		if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
5490 			RequestLuma = REQ_256Bytes;
5491 		else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
5492 				(req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
5493 			RequestLuma = REQ_128BytesNonContiguous;
5494 		else
5495 			RequestLuma = REQ_128BytesContiguous;
5496 
5497 		if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
5498 			RequestChroma = REQ_256Bytes;
5499 		else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
5500 				(req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
5501 			RequestChroma = REQ_128BytesNonContiguous;
5502 		else
5503 			RequestChroma = REQ_128BytesContiguous;
5504 
5505 	} else if (!IsVertical(SourceRotation)) {
5506 		if (req128_horz_wc_l == 0)
5507 			RequestLuma = REQ_256Bytes;
5508 		else if (segment_order_horz_contiguous_luma == 0)
5509 			RequestLuma = REQ_128BytesNonContiguous;
5510 		else
5511 			RequestLuma = REQ_128BytesContiguous;
5512 
5513 		if (req128_horz_wc_c == 0)
5514 			RequestChroma = REQ_256Bytes;
5515 		else if (segment_order_horz_contiguous_chroma == 0)
5516 			RequestChroma = REQ_128BytesNonContiguous;
5517 		else
5518 			RequestChroma = REQ_128BytesContiguous;
5519 
5520 	} else {
5521 		if (req128_vert_wc_l == 0)
5522 			RequestLuma = REQ_256Bytes;
5523 		else if (segment_order_vert_contiguous_luma == 0)
5524 			RequestLuma = REQ_128BytesNonContiguous;
5525 		else
5526 			RequestLuma = REQ_128BytesContiguous;
5527 
5528 		if (req128_vert_wc_c == 0)
5529 			RequestChroma = REQ_256Bytes;
5530 		else if (segment_order_vert_contiguous_chroma == 0)
5531 			RequestChroma = REQ_128BytesNonContiguous;
5532 		else
5533 			RequestChroma = REQ_128BytesContiguous;
5534 	}
5535 
5536 	if (RequestLuma == REQ_256Bytes) {
5537 		*MaxUncompressedBlockLuma = 256;
5538 		*MaxCompressedBlockLuma = 256;
5539 		*IndependentBlockLuma = 0;
5540 	} else if (RequestLuma == REQ_128BytesContiguous) {
5541 		*MaxUncompressedBlockLuma = 256;
5542 		*MaxCompressedBlockLuma = 128;
5543 		*IndependentBlockLuma = 128;
5544 	} else {
5545 		*MaxUncompressedBlockLuma = 256;
5546 		*MaxCompressedBlockLuma = 64;
5547 		*IndependentBlockLuma = 64;
5548 	}
5549 
5550 	if (RequestChroma == REQ_256Bytes) {
5551 		*MaxUncompressedBlockChroma = 256;
5552 		*MaxCompressedBlockChroma = 256;
5553 		*IndependentBlockChroma = 0;
5554 	} else if (RequestChroma == REQ_128BytesContiguous) {
5555 		*MaxUncompressedBlockChroma = 256;
5556 		*MaxCompressedBlockChroma = 128;
5557 		*IndependentBlockChroma = 128;
5558 	} else {
5559 		*MaxUncompressedBlockChroma = 256;
5560 		*MaxCompressedBlockChroma = 64;
5561 		*IndependentBlockChroma = 64;
5562 	}
5563 
5564 	if (DCCEnabled != true || BytePerPixelC == 0) {
5565 		*MaxUncompressedBlockChroma = 0;
5566 		*MaxCompressedBlockChroma = 0;
5567 		*IndependentBlockChroma = 0;
5568 	}
5569 
5570 	if (DCCEnabled != true) {
5571 		*MaxUncompressedBlockLuma = 0;
5572 		*MaxCompressedBlockLuma = 0;
5573 		*IndependentBlockLuma = 0;
5574 	}
5575 
5576 #ifdef __DML_VBA_DEBUG__
5577 	dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma);
5578 	dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma);
5579 	dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma);
5580 	dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma);
5581 	dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma);
5582 	dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma);
5583 #endif
5584 
5585 } // CalculateDCCConfiguration
5586 
dml32_CalculateStutterEfficiency(unsigned int CompressedBufferSizeInkByte,enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],bool UnboundedRequestEnabled,unsigned int MetaFIFOSizeInKEntries,unsigned int ZeroSizeBufferEntries,unsigned int PixelChunkSizeInKByte,unsigned int NumberOfActiveSurfaces,unsigned int ROBBufferSizeInKByte,double TotalDataReadBandwidth,double DCFCLK,double ReturnBW,unsigned int CompbufReservedSpace64B,unsigned int CompbufReservedSpaceZs,double SRExitTime,double SRExitZ8Time,bool SynchronizeTimingsFinal,unsigned int BlendingAndTiming[],double StutterEnterPlusExitWatermark,double Z8StutterEnterPlusExitWatermark,bool ProgressiveToInterlaceUnitInOPP,bool Interlace[],double MinTTUVBlank[],unsigned int DPPPerSurface[],unsigned int DETBufferSizeY[],unsigned int BytePerPixelY[],double BytePerPixelDETY[],double SwathWidthY[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],double NetDCCRateLuma[],double NetDCCRateChroma[],double DCCFractionOfZeroSizeRequestsLuma[],double DCCFractionOfZeroSizeRequestsChroma[],unsigned int HTotal[],unsigned int VTotal[],double PixelClock[],double VRatio[],enum dm_rotation_angle SourceRotation[],unsigned int BlockHeight256BytesY[],unsigned int BlockWidth256BytesY[],unsigned int BlockHeight256BytesC[],unsigned int BlockWidth256BytesC[],unsigned int DCCYMaxUncompressedBlock[],unsigned int DCCCMaxUncompressedBlock[],unsigned int VActive[],bool DCCEnable[],bool WritebackEnable[],double ReadBandwidthSurfaceLuma[],double ReadBandwidthSurfaceChroma[],double meta_row_bw[],double dpte_row_bw[],double * StutterEfficiencyNotIncludingVBlank,double * StutterEfficiency,unsigned int * NumberOfStutterBurstsPerFrame,double * Z8StutterEfficiencyNotIncludingVBlank,double * Z8StutterEfficiency,unsigned int * Z8NumberOfStutterBurstsPerFrame,double * StutterPeriod,bool * DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)5587 void dml32_CalculateStutterEfficiency(
5588 		unsigned int      CompressedBufferSizeInkByte,
5589 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
5590 		bool   UnboundedRequestEnabled,
5591 		unsigned int      MetaFIFOSizeInKEntries,
5592 		unsigned int      ZeroSizeBufferEntries,
5593 		unsigned int      PixelChunkSizeInKByte,
5594 		unsigned int   NumberOfActiveSurfaces,
5595 		unsigned int      ROBBufferSizeInKByte,
5596 		double    TotalDataReadBandwidth,
5597 		double    DCFCLK,
5598 		double    ReturnBW,
5599 		unsigned int      CompbufReservedSpace64B,
5600 		unsigned int      CompbufReservedSpaceZs,
5601 		double    SRExitTime,
5602 		double    SRExitZ8Time,
5603 		bool   SynchronizeTimingsFinal,
5604 		unsigned int   BlendingAndTiming[],
5605 		double    StutterEnterPlusExitWatermark,
5606 		double    Z8StutterEnterPlusExitWatermark,
5607 		bool   ProgressiveToInterlaceUnitInOPP,
5608 		bool   Interlace[],
5609 		double    MinTTUVBlank[],
5610 		unsigned int   DPPPerSurface[],
5611 		unsigned int      DETBufferSizeY[],
5612 		unsigned int   BytePerPixelY[],
5613 		double    BytePerPixelDETY[],
5614 		double      SwathWidthY[],
5615 		unsigned int   SwathHeightY[],
5616 		unsigned int   SwathHeightC[],
5617 		double    NetDCCRateLuma[],
5618 		double    NetDCCRateChroma[],
5619 		double    DCCFractionOfZeroSizeRequestsLuma[],
5620 		double    DCCFractionOfZeroSizeRequestsChroma[],
5621 		unsigned int      HTotal[],
5622 		unsigned int      VTotal[],
5623 		double    PixelClock[],
5624 		double    VRatio[],
5625 		enum dm_rotation_angle SourceRotation[],
5626 		unsigned int   BlockHeight256BytesY[],
5627 		unsigned int   BlockWidth256BytesY[],
5628 		unsigned int   BlockHeight256BytesC[],
5629 		unsigned int   BlockWidth256BytesC[],
5630 		unsigned int   DCCYMaxUncompressedBlock[],
5631 		unsigned int   DCCCMaxUncompressedBlock[],
5632 		unsigned int      VActive[],
5633 		bool   DCCEnable[],
5634 		bool   WritebackEnable[],
5635 		double    ReadBandwidthSurfaceLuma[],
5636 		double    ReadBandwidthSurfaceChroma[],
5637 		double    meta_row_bw[],
5638 		double    dpte_row_bw[],
5639 
5640 		/* Output */
5641 		double   *StutterEfficiencyNotIncludingVBlank,
5642 		double   *StutterEfficiency,
5643 		unsigned int     *NumberOfStutterBurstsPerFrame,
5644 		double   *Z8StutterEfficiencyNotIncludingVBlank,
5645 		double   *Z8StutterEfficiency,
5646 		unsigned int     *Z8NumberOfStutterBurstsPerFrame,
5647 		double   *StutterPeriod,
5648 		bool  *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
5649 {
5650 
5651 	bool FoundCriticalSurface = false;
5652 	unsigned int SwathSizeCriticalSurface = 0;
5653 	unsigned int LastChunkOfSwathSize;
5654 	unsigned int MissingPartOfLastSwathOfDETSize;
5655 	double LastZ8StutterPeriod = 0.0;
5656 	double LastStutterPeriod = 0.0;
5657 	unsigned int TotalNumberOfActiveOTG = 0;
5658 	double doublePixelClock = 0;
5659 	unsigned int doubleHTotal = 0;
5660 	unsigned int doubleVTotal = 0;
5661 	bool SameTiming = true;
5662 	double DETBufferingTimeY;
5663 	double SwathWidthYCriticalSurface = 0.0;
5664 	double SwathHeightYCriticalSurface = 0.0;
5665 	double VActiveTimeCriticalSurface = 0.0;
5666 	double FrameTimeCriticalSurface = 0.0;
5667 	unsigned int BytePerPixelYCriticalSurface = 0;
5668 	double LinesToFinishSwathTransferStutterCriticalSurface = 0.0;
5669 	unsigned int DETBufferSizeYCriticalSurface = 0;
5670 	double MinTTUVBlankCriticalSurface = 0.0;
5671 	unsigned int BlockWidth256BytesYCriticalSurface = 0;
5672 	bool doublePlaneCriticalSurface = 0;
5673 	bool doublePipeCriticalSurface = 0;
5674 	double TotalCompressedReadBandwidth;
5675 	double TotalRowReadBandwidth;
5676 	double AverageDCCCompressionRate;
5677 	double EffectiveCompressedBufferSize;
5678 	double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
5679 	double StutterBurstTime;
5680 	unsigned int TotalActiveWriteback;
5681 	double LinesInDETY;
5682 	double LinesInDETYRoundedDownToSwath;
5683 	double MaximumEffectiveCompressionLuma;
5684 	double MaximumEffectiveCompressionChroma;
5685 	double TotalZeroSizeRequestReadBandwidth;
5686 	double TotalZeroSizeCompressedReadBandwidth;
5687 	double AverageDCCZeroSizeFraction;
5688 	double AverageZeroSizeCompressionRate;
5689 	unsigned int k;
5690 
5691 	TotalZeroSizeRequestReadBandwidth = 0;
5692 	TotalZeroSizeCompressedReadBandwidth = 0;
5693 	TotalRowReadBandwidth = 0;
5694 	TotalCompressedReadBandwidth = 0;
5695 
5696 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5697 		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5698 			if (DCCEnable[k] == true) {
5699 				if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k])
5700 						|| (!IsVertical(SourceRotation[k])
5701 								&& BlockHeight256BytesY[k] > SwathHeightY[k])
5702 						|| DCCYMaxUncompressedBlock[k] < 256) {
5703 					MaximumEffectiveCompressionLuma = 2;
5704 				} else {
5705 					MaximumEffectiveCompressionLuma = 4;
5706 				}
5707 				TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5708 						+ ReadBandwidthSurfaceLuma[k]
5709 								/ dml_min(NetDCCRateLuma[k],
5710 										MaximumEffectiveCompressionLuma);
5711 #ifdef __DML_VBA_DEBUG__
5712 				dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5713 						__func__, k, ReadBandwidthSurfaceLuma[k]);
5714 				dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n",
5715 						__func__, k, NetDCCRateLuma[k]);
5716 				dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n",
5717 						__func__, k, MaximumEffectiveCompressionLuma);
5718 #endif
5719 				TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5720 						+ ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
5721 				TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5722 						+ ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]
5723 								/ MaximumEffectiveCompressionLuma;
5724 
5725 				if (ReadBandwidthSurfaceChroma[k] > 0) {
5726 					if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k])
5727 							|| (!IsVertical(SourceRotation[k])
5728 									&& BlockHeight256BytesC[k] > SwathHeightC[k])
5729 							|| DCCCMaxUncompressedBlock[k] < 256) {
5730 						MaximumEffectiveCompressionChroma = 2;
5731 					} else {
5732 						MaximumEffectiveCompressionChroma = 4;
5733 					}
5734 					TotalCompressedReadBandwidth =
5735 							TotalCompressedReadBandwidth
5736 							+ ReadBandwidthSurfaceChroma[k]
5737 							/ dml_min(NetDCCRateChroma[k],
5738 							MaximumEffectiveCompressionChroma);
5739 #ifdef __DML_VBA_DEBUG__
5740 					dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n",
5741 							__func__, k, ReadBandwidthSurfaceChroma[k]);
5742 					dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n",
5743 							__func__, k, NetDCCRateChroma[k]);
5744 					dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n",
5745 							__func__, k, MaximumEffectiveCompressionChroma);
5746 #endif
5747 					TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5748 							+ ReadBandwidthSurfaceChroma[k]
5749 									* DCCFractionOfZeroSizeRequestsChroma[k];
5750 					TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5751 							+ ReadBandwidthSurfaceChroma[k]
5752 									* DCCFractionOfZeroSizeRequestsChroma[k]
5753 									/ MaximumEffectiveCompressionChroma;
5754 				}
5755 			} else {
5756 				TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5757 						+ ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k];
5758 			}
5759 			TotalRowReadBandwidth = TotalRowReadBandwidth
5760 					+ DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]);
5761 		}
5762 	}
5763 
5764 	AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
5765 	AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
5766 
5767 #ifdef __DML_VBA_DEBUG__
5768 	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
5769 	dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
5770 	dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
5771 	dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n",
5772 			__func__, TotalZeroSizeCompressedReadBandwidth);
5773 	dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
5774 	dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
5775 	dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5776 	dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
5777 	dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B);
5778 	dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs);
5779 	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
5780 #endif
5781 	if (AverageDCCZeroSizeFraction == 1) {
5782 		AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5783 				/ TotalZeroSizeCompressedReadBandwidth;
5784 		EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64
5785 				* AverageZeroSizeCompressionRate
5786 				+ ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5787 						* AverageZeroSizeCompressionRate;
5788 	} else if (AverageDCCZeroSizeFraction > 0) {
5789 		AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5790 				/ TotalZeroSizeCompressedReadBandwidth;
5791 		EffectiveCompressedBufferSize = dml_min(
5792 				(double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5793 				(double) MetaFIFOSizeInKEntries * 1024 * 64
5794 					/ (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate
5795 					+ 1 / AverageDCCCompressionRate))
5796 					+ dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5797 					* AverageDCCCompressionRate,
5798 					((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5799 					/ (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5800 
5801 #ifdef __DML_VBA_DEBUG__
5802 		dml_print("DML::%s: min 1 = %f\n", __func__,
5803 				CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5804 		dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 /
5805 				(AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 /
5806 						AverageDCCCompressionRate));
5807 		dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 -
5808 				CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
5809 		dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 /
5810 				(AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5811 #endif
5812 	} else {
5813 		EffectiveCompressedBufferSize = dml_min(
5814 				(double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5815 				(double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate)
5816 				+ ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5817 						* AverageDCCCompressionRate;
5818 
5819 #ifdef __DML_VBA_DEBUG__
5820 		dml_print("DML::%s: min 1 = %f\n", __func__,
5821 				CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5822 		dml_print("DML::%s: min 2 = %f\n", __func__,
5823 				MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
5824 #endif
5825 	}
5826 
5827 #ifdef __DML_VBA_DEBUG__
5828 	dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
5829 	dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
5830 	dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5831 #endif
5832 
5833 	*StutterPeriod = 0;
5834 
5835 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5836 		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5837 			LinesInDETY = ((double) DETBufferSizeY[k]
5838 					+ (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0)
5839 							* ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth)
5840 					/ BytePerPixelDETY[k] / SwathWidthY[k];
5841 			LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
5842 			DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k])
5843 					/ VRatio[k];
5844 #ifdef __DML_VBA_DEBUG__
5845 			dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
5846 			dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
5847 			dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
5848 			dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5849 					__func__, k, ReadBandwidthSurfaceLuma[k]);
5850 			dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
5851 			dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY);
5852 			dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n",
5853 					__func__, k, LinesInDETYRoundedDownToSwath);
5854 			dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]);
5855 			dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5856 			dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]);
5857 			dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
5858 			dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5859 #endif
5860 
5861 			if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) {
5862 				bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
5863 
5864 				FoundCriticalSurface = true;
5865 				*StutterPeriod = DETBufferingTimeY;
5866 				FrameTimeCriticalSurface = (
5867 						isInterlaceTiming ?
5868 								dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k])
5869 						* (double) HTotal[k] / PixelClock[k];
5870 				VActiveTimeCriticalSurface = (
5871 						isInterlaceTiming ?
5872 								dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k])
5873 						* (double) HTotal[k] / PixelClock[k];
5874 				BytePerPixelYCriticalSurface = BytePerPixelY[k];
5875 				SwathWidthYCriticalSurface = SwathWidthY[k];
5876 				SwathHeightYCriticalSurface = SwathHeightY[k];
5877 				BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k];
5878 				LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k]
5879 						- (LinesInDETY - LinesInDETYRoundedDownToSwath);
5880 				DETBufferSizeYCriticalSurface = DETBufferSizeY[k];
5881 				MinTTUVBlankCriticalSurface = MinTTUVBlank[k];
5882 				doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0);
5883 				doublePipeCriticalSurface = (DPPPerSurface[k] == 1);
5884 
5885 #ifdef __DML_VBA_DEBUG__
5886 				dml_print("DML::%s: k=%0d, FoundCriticalSurface                = %d\n",
5887 						__func__, k, FoundCriticalSurface);
5888 				dml_print("DML::%s: k=%0d, StutterPeriod                       = %f\n",
5889 						__func__, k, *StutterPeriod);
5890 				dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface         = %f\n",
5891 						__func__, k, MinTTUVBlankCriticalSurface);
5892 				dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface            = %f\n",
5893 						__func__, k, FrameTimeCriticalSurface);
5894 				dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface          = %f\n",
5895 						__func__, k, VActiveTimeCriticalSurface);
5896 				dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface        = %d\n",
5897 						__func__, k, BytePerPixelYCriticalSurface);
5898 				dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface          = %f\n",
5899 						__func__, k, SwathWidthYCriticalSurface);
5900 				dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface         = %f\n",
5901 						__func__, k, SwathHeightYCriticalSurface);
5902 				dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface  = %d\n",
5903 						__func__, k, BlockWidth256BytesYCriticalSurface);
5904 				dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface          = %d\n",
5905 						__func__, k, doublePlaneCriticalSurface);
5906 				dml_print("DML::%s: k=%0d, doublePipeCriticalSurface           = %d\n",
5907 						__func__, k, doublePipeCriticalSurface);
5908 				dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n",
5909 						__func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
5910 #endif
5911 			}
5912 		}
5913 	}
5914 
5915 	PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth,
5916 			EffectiveCompressedBufferSize);
5917 #ifdef __DML_VBA_DEBUG__
5918 	dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
5919 	dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5920 	dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5921 			__func__, *StutterPeriod * TotalDataReadBandwidth);
5922 	dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5923 	dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__,
5924 			PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
5925 	dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
5926 	dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
5927 	dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
5928 	dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5929 #endif
5930 
5931 	StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate
5932 			/ ReturnBW
5933 			+ (*StutterPeriod * TotalDataReadBandwidth
5934 					- PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
5935 			+ *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
5936 #ifdef __DML_VBA_DEBUG__
5937 	dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer /
5938 			AverageDCCCompressionRate / ReturnBW);
5939 	dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5940 			__func__, (*StutterPeriod * TotalDataReadBandwidth));
5941 	dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth -
5942 			PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
5943 	dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
5944 	dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5945 #endif
5946 	StutterBurstTime = dml_max(StutterBurstTime,
5947 			LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface
5948 					* SwathWidthYCriticalSurface / ReturnBW);
5949 
5950 #ifdef __DML_VBA_DEBUG__
5951 	dml_print("DML::%s: Time to finish residue swath=%f\n",
5952 			__func__,
5953 			LinesToFinishSwathTransferStutterCriticalSurface *
5954 			BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW);
5955 #endif
5956 
5957 	TotalActiveWriteback = 0;
5958 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5959 		if (WritebackEnable[k])
5960 			TotalActiveWriteback = TotalActiveWriteback + 1;
5961 	}
5962 
5963 	if (TotalActiveWriteback == 0) {
5964 #ifdef __DML_VBA_DEBUG__
5965 		dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
5966 		dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
5967 		dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
5968 		dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5969 #endif
5970 		*StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5971 				1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
5972 		*Z8StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5973 				1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
5974 		*NumberOfStutterBurstsPerFrame = (
5975 				*StutterEfficiencyNotIncludingVBlank > 0 ?
5976 						dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5977 		*Z8NumberOfStutterBurstsPerFrame = (
5978 				*Z8StutterEfficiencyNotIncludingVBlank > 0 ?
5979 						dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5980 	} else {
5981 		*StutterEfficiencyNotIncludingVBlank = 0.;
5982 		*Z8StutterEfficiencyNotIncludingVBlank = 0.;
5983 		*NumberOfStutterBurstsPerFrame = 0;
5984 		*Z8NumberOfStutterBurstsPerFrame = 0;
5985 	}
5986 #ifdef __DML_VBA_DEBUG__
5987 	dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
5988 	dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5989 			__func__, *StutterEfficiencyNotIncludingVBlank);
5990 	dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n",
5991 			__func__, *Z8StutterEfficiencyNotIncludingVBlank);
5992 	dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
5993 	dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5994 #endif
5995 
5996 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5997 		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5998 			if (BlendingAndTiming[k] == k) {
5999 				if (TotalNumberOfActiveOTG == 0) {
6000 					doublePixelClock = PixelClock[k];
6001 					doubleHTotal = HTotal[k];
6002 					doubleVTotal = VTotal[k];
6003 				} else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k]
6004 						|| doubleVTotal != VTotal[k]) {
6005 					SameTiming = false;
6006 				}
6007 				TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6008 			}
6009 		}
6010 	}
6011 
6012 	if (*StutterEfficiencyNotIncludingVBlank > 0) {
6013 		LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6014 
6015 		if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming
6016 				&& LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) {
6017 			*StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime
6018 						+ StutterBurstTime * VActiveTimeCriticalSurface
6019 						/ *StutterPeriod) / FrameTimeCriticalSurface) * 100;
6020 		} else {
6021 			*StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6022 		}
6023 	} else {
6024 		*StutterEfficiency = 0;
6025 	}
6026 
6027 	if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6028 		LastZ8StutterPeriod = VActiveTimeCriticalSurface
6029 				- (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6030 		if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod +
6031 				MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) {
6032 			*Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime
6033 				* VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
6034 		} else {
6035 			*Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6036 		}
6037 	} else {
6038 		*Z8StutterEfficiency = 0.;
6039 	}
6040 
6041 #ifdef __DML_VBA_DEBUG__
6042 	dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6043 	dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6044 	dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6045 	dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6046 	dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6047 	dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6048 	dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
6049 			__func__, *StutterEfficiencyNotIncludingVBlank);
6050 	dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6051 #endif
6052 
6053 	SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface
6054 			* dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface);
6055 	LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024);
6056 	MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface)
6057 			- DETBufferSizeYCriticalSurface;
6058 
6059 	*DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1)
6060 			&& doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0)
6061 			&& (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0)
6062 			&& (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
6063 
6064 #ifdef __DML_VBA_DEBUG__
6065 	dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface);
6066 	dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize);
6067 	dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize);
6068 	dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
6069 #endif
6070 } // CalculateStutterEfficiency
6071 
dml32_CalculateMaxDETAndMinCompressedBufferSize(unsigned int ConfigReturnBufferSizeInKByte,unsigned int ROBBufferSizeInKByte,unsigned int MaxNumDPP,bool nomDETInKByteOverrideEnable,unsigned int nomDETInKByteOverrideValue,unsigned int * MaxTotalDETInKByte,unsigned int * nomDETInKByte,unsigned int * MinCompressedBufferSizeInKByte)6072 void dml32_CalculateMaxDETAndMinCompressedBufferSize(
6073 		unsigned int    ConfigReturnBufferSizeInKByte,
6074 		unsigned int    ROBBufferSizeInKByte,
6075 		unsigned int MaxNumDPP,
6076 		bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
6077 		unsigned int nomDETInKByteOverrideValue,  // VBA_DELTA
6078 
6079 		/* Output */
6080 		unsigned int *MaxTotalDETInKByte,
6081 		unsigned int *nomDETInKByte,
6082 		unsigned int *MinCompressedBufferSizeInKByte)
6083 {
6084 	bool     det_buff_size_override_en  = nomDETInKByteOverrideEnable;
6085 	unsigned int        det_buff_size_override_val = nomDETInKByteOverrideValue;
6086 
6087 	*MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte +
6088 			(double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64);
6089 	*nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64);
6090 	*MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
6091 
6092 #ifdef __DML_VBA_DEBUG__
6093 	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte);
6094 	dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte);
6095 	dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP);
6096 	dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte);
6097 	dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte);
6098 	dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte);
6099 #endif
6100 
6101 	if (det_buff_size_override_en) {
6102 		*nomDETInKByte = det_buff_size_override_val;
6103 #ifdef __DML_VBA_DEBUG__
6104 		dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte);
6105 #endif
6106 	}
6107 } // CalculateMaxDETAndMinCompressedBufferSize
6108 
dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,double ReturnBW,bool NotUrgentLatencyHiding[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double cursor_bw[],double meta_row_bandwidth[],double dpte_row_bandwidth[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[])6109 bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
6110 		double ReturnBW,
6111 		bool NotUrgentLatencyHiding[],
6112 		double ReadBandwidthLuma[],
6113 		double ReadBandwidthChroma[],
6114 		double cursor_bw[],
6115 		double meta_row_bandwidth[],
6116 		double dpte_row_bandwidth[],
6117 		unsigned int NumberOfDPP[],
6118 		double UrgentBurstFactorLuma[],
6119 		double UrgentBurstFactorChroma[],
6120 		double UrgentBurstFactorCursor[])
6121 {
6122 	unsigned int k;
6123 	bool NotEnoughUrgentLatencyHiding = false;
6124 	bool CalculateVActiveBandwithSupport_val = false;
6125 	double VActiveBandwith = 0;
6126 
6127 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6128 		if (NotUrgentLatencyHiding[k]) {
6129 			NotEnoughUrgentLatencyHiding = true;
6130 		}
6131 	}
6132 
6133 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6134 		VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
6135 	}
6136 
6137 	CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6138 
6139 #ifdef __DML_VBA_DEBUG__
6140 dml_print("DML::%s: NotEnoughUrgentLatencyHiding        = %d\n", __func__, NotEnoughUrgentLatencyHiding);
6141 dml_print("DML::%s: VActiveBandwith                     = %f\n", __func__, VActiveBandwith);
6142 dml_print("DML::%s: ReturnBW                            = %f\n", __func__, ReturnBW);
6143 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val);
6144 #endif
6145 	return CalculateVActiveBandwithSupport_val;
6146 }
6147 
dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,double ReturnBW,bool NotUrgentLatencyHiding[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double cursor_bw[],double meta_row_bandwidth[],double dpte_row_bandwidth[],double cursor_bw_pre[],double prefetch_vmrow_bw[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[],double PrefetchBW[],double VRatio[],double MaxVRatioPre,double * MaxPrefetchBandwidth,double * FractionOfUrgentBandwidth,bool * PrefetchBandwidthSupport)6148 void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
6149 		double ReturnBW,
6150 		bool NotUrgentLatencyHiding[],
6151 		double ReadBandwidthLuma[],
6152 		double ReadBandwidthChroma[],
6153 		double PrefetchBandwidthLuma[],
6154 		double PrefetchBandwidthChroma[],
6155 		double cursor_bw[],
6156 		double meta_row_bandwidth[],
6157 		double dpte_row_bandwidth[],
6158 		double cursor_bw_pre[],
6159 		double prefetch_vmrow_bw[],
6160 		unsigned int NumberOfDPP[],
6161 		double UrgentBurstFactorLuma[],
6162 		double UrgentBurstFactorChroma[],
6163 		double UrgentBurstFactorCursor[],
6164 		double UrgentBurstFactorLumaPre[],
6165 		double UrgentBurstFactorChromaPre[],
6166 		double UrgentBurstFactorCursorPre[],
6167 		double PrefetchBW[],
6168 		double VRatio[],
6169 		double MaxVRatioPre,
6170 
6171 		/* output */
6172 		double  *MaxPrefetchBandwidth,
6173 		double  *FractionOfUrgentBandwidth,
6174 		bool *PrefetchBandwidthSupport)
6175 {
6176 	unsigned int k;
6177 	double ActiveBandwidthPerSurface;
6178 	bool NotEnoughUrgentLatencyHiding = false;
6179 	double TotalActiveBandwidth = 0;
6180 	double TotalPrefetchBandwidth = 0;
6181 
6182 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6183 		if (NotUrgentLatencyHiding[k]) {
6184 			NotEnoughUrgentLatencyHiding = true;
6185 		}
6186 	}
6187 
6188 	*MaxPrefetchBandwidth = 0;
6189 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6190 		ActiveBandwidthPerSurface = ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]);
6191 
6192 		TotalActiveBandwidth += ActiveBandwidthPerSurface;
6193 
6194 		TotalPrefetchBandwidth = TotalPrefetchBandwidth + PrefetchBW[k] * VRatio[k];
6195 
6196 		*MaxPrefetchBandwidth = *MaxPrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6197 				ActiveBandwidthPerSurface,
6198 				NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6199 	}
6200 
6201 	if (MaxVRatioPre == __DML_MAX_VRATIO_PRE__)
6202 		*PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && (TotalPrefetchBandwidth <= TotalActiveBandwidth * __DML_MAX_BW_RATIO_PRE__) && !NotEnoughUrgentLatencyHiding;
6203 	else
6204 		*PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6205 
6206 	*FractionOfUrgentBandwidth = *MaxPrefetchBandwidth / ReturnBW;
6207 }
6208 
dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,double ReturnBW,double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double cursor_bw[],double cursor_bw_pre[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[])6209 double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
6210 		double ReturnBW,
6211 		double ReadBandwidthLuma[],
6212 		double ReadBandwidthChroma[],
6213 		double PrefetchBandwidthLuma[],
6214 		double PrefetchBandwidthChroma[],
6215 		double cursor_bw[],
6216 		double cursor_bw_pre[],
6217 		unsigned int NumberOfDPP[],
6218 		double UrgentBurstFactorLuma[],
6219 		double UrgentBurstFactorChroma[],
6220 		double UrgentBurstFactorCursor[],
6221 		double UrgentBurstFactorLumaPre[],
6222 		double UrgentBurstFactorChromaPre[],
6223 		double UrgentBurstFactorCursorPre[])
6224 {
6225 	unsigned int k;
6226 	double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW;
6227 
6228 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6229 		CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6230 				NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6231 	}
6232 
6233 	return CalculateBandwidthAvailableForImmediateFlip_val;
6234 }
6235 
dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,double ReturnBW,enum immediate_flip_requirement ImmediateFlipRequirement[],double final_flip_bw[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double cursor_bw[],double meta_row_bandwidth[],double dpte_row_bandwidth[],double cursor_bw_pre[],double prefetch_vmrow_bw[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[],double * TotalBandwidth,double * FractionOfUrgentBandwidth,bool * ImmediateFlipBandwidthSupport)6236 void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
6237 		double ReturnBW,
6238 		enum immediate_flip_requirement ImmediateFlipRequirement[],
6239 		double final_flip_bw[],
6240 		double ReadBandwidthLuma[],
6241 		double ReadBandwidthChroma[],
6242 		double PrefetchBandwidthLuma[],
6243 		double PrefetchBandwidthChroma[],
6244 		double cursor_bw[],
6245 		double meta_row_bandwidth[],
6246 		double dpte_row_bandwidth[],
6247 		double cursor_bw_pre[],
6248 		double prefetch_vmrow_bw[],
6249 		unsigned int NumberOfDPP[],
6250 		double UrgentBurstFactorLuma[],
6251 		double UrgentBurstFactorChroma[],
6252 		double UrgentBurstFactorCursor[],
6253 		double UrgentBurstFactorLumaPre[],
6254 		double UrgentBurstFactorChromaPre[],
6255 		double UrgentBurstFactorCursorPre[],
6256 
6257 		/* output */
6258 		double  *TotalBandwidth,
6259 		double  *FractionOfUrgentBandwidth,
6260 		bool *ImmediateFlipBandwidthSupport)
6261 {
6262 	unsigned int k;
6263 	*TotalBandwidth = 0;
6264 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6265 		if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
6266 			*TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6267 					NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6268 					NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6269 		} else {
6270 			*TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6271 					NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6272 					NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6273 		}
6274 	}
6275 	*ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
6276 	*FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
6277 }
6278 
dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces,double ReturnBW,double UrgentLatency,unsigned int SwathHeightY[],unsigned int SwathHeightC[],unsigned int SwathWidthY[],unsigned int SwathWidthC[],double BytePerPixelInDETY[],double BytePerPixelInDETC[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],unsigned int NumOfDPP[],unsigned int HTotal[],double PixelClock[],double VRatioY[],double VRatioC[],enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],enum unbounded_requesting_policy UseUnboundedRequesting)6279 bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces,
6280 		double ReturnBW,
6281 		double UrgentLatency,
6282 		unsigned int SwathHeightY[],
6283 		unsigned int SwathHeightC[],
6284 		unsigned int SwathWidthY[],
6285 		unsigned int SwathWidthC[],
6286 		double  BytePerPixelInDETY[],
6287 		double  BytePerPixelInDETC[],
6288 		unsigned int    DETBufferSizeY[],
6289 		unsigned int    DETBufferSizeC[],
6290 		unsigned int	NumOfDPP[],
6291 		unsigned int	HTotal[],
6292 		double	PixelClock[],
6293 		double	VRatioY[],
6294 		double	VRatioC[],
6295 		enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],
6296 		enum unbounded_requesting_policy UseUnboundedRequesting)
6297 {
6298 	int k;
6299 	double SwathSizeAllSurfaces = 0;
6300 	double SwathSizeAllSurfacesInFetchTimeUs;
6301 	double DETSwathLatencyHidingUs;
6302 	double DETSwathLatencyHidingYUs;
6303 	double DETSwathLatencyHidingCUs;
6304 	double SwathSizePerSurfaceY[DC__NUM_DPP__MAX];
6305 	double SwathSizePerSurfaceC[DC__NUM_DPP__MAX];
6306 	bool NotEnoughDETSwathFillLatencyHiding = false;
6307 
6308 	if (UseUnboundedRequesting == dm_unbounded_requesting)
6309 		return false;
6310 
6311 	/* calculate sum of single swath size for all pipes in bytes */
6312 	for (k = 0; k < NumberOfActiveSurfaces; k++) {
6313 		SwathSizePerSurfaceY[k] = SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k];
6314 
6315 		if (SwathHeightC[k] != 0)
6316 			SwathSizePerSurfaceC[k] = SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k];
6317 		else
6318 			SwathSizePerSurfaceC[k] = 0;
6319 
6320 		SwathSizeAllSurfaces += SwathSizePerSurfaceY[k] + SwathSizePerSurfaceC[k];
6321 	}
6322 
6323 	SwathSizeAllSurfacesInFetchTimeUs = SwathSizeAllSurfaces / ReturnBW + UrgentLatency;
6324 
6325 	/* ensure all DET - 1 swath can hide a fetch for all surfaces */
6326 	for (k = 0; k < NumberOfActiveSurfaces; k++) {
6327 		double LineTime = HTotal[k] / PixelClock[k];
6328 
6329 		/* only care if surface is not phantom */
6330 		if (UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
6331 			DETSwathLatencyHidingYUs = (dml_floor(DETBufferSizeY[k] / BytePerPixelInDETY[k] / SwathWidthY[k], 1.0) - SwathHeightY[k]) / VRatioY[k] * LineTime;
6332 
6333 			if (SwathHeightC[k] != 0) {
6334 				DETSwathLatencyHidingCUs = (dml_floor(DETBufferSizeC[k] / BytePerPixelInDETC[k] / SwathWidthC[k], 1.0) - SwathHeightC[k]) / VRatioC[k] * LineTime;
6335 
6336 				DETSwathLatencyHidingUs = dml_min(DETSwathLatencyHidingYUs, DETSwathLatencyHidingCUs);
6337 			} else {
6338 				DETSwathLatencyHidingUs = DETSwathLatencyHidingYUs;
6339 			}
6340 
6341 			/* DET must be able to hide time to fetch 1 swath for each surface */
6342 			if (DETSwathLatencyHidingUs < SwathSizeAllSurfacesInFetchTimeUs) {
6343 				NotEnoughDETSwathFillLatencyHiding = true;
6344 				break;
6345 			}
6346 		}
6347 	}
6348 
6349 	return NotEnoughDETSwathFillLatencyHiding;
6350 }
6351