xref: /linux/drivers/media/platform/qcom/iris/iris_buffer.c (revision d639d9fa162aadec1ae9980c4dcf6e50bd2f8290)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
4  */
5 
6 #include <media/v4l2-event.h>
7 #include <media/v4l2-mem2mem.h>
8 
9 #include "iris_buffer.h"
10 #include "iris_instance.h"
11 #include "iris_power.h"
12 #include "iris_vpu_buffer.h"
13 
14 #define PIXELS_4K 4096
15 #define MAX_WIDTH 4096
16 #define MAX_HEIGHT 2304
17 #define Y_STRIDE_ALIGN 128
18 #define Y_STRIDE_ALIGN_P010 256
19 #define UV_STRIDE_ALIGN 128
20 #define UV_STRIDE_ALIGN_P010 256
21 #define Y_SCANLINE_ALIGN 32
22 #define Y_SCANLINE_ALIGN_QC10C 16
23 #define UV_SCANLINE_ALIGN 16
24 #define UV_SCANLINE_ALIGN_QC08C 32
25 #define META_STRIDE_ALIGNED 64
26 #define META_SCANLINE_ALIGNED 16
27 #define NUM_MBS_4K (DIV_ROUND_UP(MAX_WIDTH, 16) * DIV_ROUND_UP(MAX_HEIGHT, 16))
28 
29 /*
30  * NV12:
31  * YUV 4:2:0 image with a plane of 8 bit Y samples followed
32  * by an interleaved U/V plane containing 8 bit 2x2 subsampled
33  * colour difference samples.
34  *
35  * <-Y/UV_Stride (aligned to 128)->
36  * <------- Width ------->
37  * Y Y Y Y Y Y Y Y Y Y Y Y . . . .  ^           ^
38  * Y Y Y Y Y Y Y Y Y Y Y Y . . . .  |           |
39  * Y Y Y Y Y Y Y Y Y Y Y Y . . . .  Height      |
40  * Y Y Y Y Y Y Y Y Y Y Y Y . . . .  |          y_scanlines (aligned to 32)
41  * Y Y Y Y Y Y Y Y Y Y Y Y . . . .  |           |
42  * Y Y Y Y Y Y Y Y Y Y Y Y . . . .  |           |
43  * Y Y Y Y Y Y Y Y Y Y Y Y . . . .  |           |
44  * Y Y Y Y Y Y Y Y Y Y Y Y . . . .  V           |
45  * . . . . . . . . . . . . . . . .              |
46  * . . . . . . . . . . . . . . . .              |
47  * . . . . . . . . . . . . . . . .              |
48  * . . . . . . . . . . . . . . . .              V
49  * U V U V U V U V U V U V . . . .  ^
50  * U V U V U V U V U V U V . . . .  |
51  * U V U V U V U V U V U V . . . .  |
52  * U V U V U V U V U V U V . . . .  uv_scanlines (aligned to 16)
53  * . . . . . . . . . . . . . . . .  |
54  * . . . . . . . . . . . . . . . .  V
55  * . . . . . . . . . . . . . . . .  --> Buffer size aligned to 4K
56  *
57  * y_stride : Width aligned to 128
58  * uv_stride : Width aligned to 128
59  * y_scanlines: Height aligned to 32
60  * uv_scanlines: Height/2 aligned to 16
61  * Total size = align((y_stride * y_scanlines
62  *          + uv_stride * uv_scanlines , 4096)
63  *
64  * Note: All the alignments are hardware requirements.
65  */
66 static u32 iris_yuv_buffer_size_nv12(struct iris_inst *inst)
67 {
68 	u32 y_plane, uv_plane, y_stride, uv_stride, y_scanlines, uv_scanlines;
69 	struct v4l2_format *f;
70 
71 	if (inst->domain == DECODER)
72 		f = inst->fmt_dst;
73 	else
74 		f = inst->fmt_src;
75 
76 	y_stride = ALIGN(f->fmt.pix_mp.width, Y_STRIDE_ALIGN);
77 	uv_stride = ALIGN(f->fmt.pix_mp.width, UV_STRIDE_ALIGN);
78 	y_scanlines = ALIGN(f->fmt.pix_mp.height, Y_SCANLINE_ALIGN);
79 	uv_scanlines = ALIGN((f->fmt.pix_mp.height + 1) >> 1, UV_SCANLINE_ALIGN);
80 	y_plane = y_stride * y_scanlines;
81 	uv_plane = uv_stride * uv_scanlines;
82 
83 	return ALIGN(y_plane + uv_plane, PIXELS_4K);
84 }
85 
86 /*
87  * P010:
88  * YUV 4:2:0 image with a plane of 10 bit Y samples followed
89  * by an interleaved U/V plane containing 10 bit 2x2 subsampled
90  * colour difference samples.
91  *
92  * <-Y/UV_Stride (aligned to 256)->
93  * <----- Width*2 ------->
94  * Y Y Y Y Y Y Y Y Y Y Y Y . . . .  ^           ^
95  * Y Y Y Y Y Y Y Y Y Y Y Y . . . .  |           |
96  * Y Y Y Y Y Y Y Y Y Y Y Y . . . .  Height      |
97  * Y Y Y Y Y Y Y Y Y Y Y Y . . . .  |          y_scanlines (aligned to 32)
98  * Y Y Y Y Y Y Y Y Y Y Y Y . . . .  |           |
99  * Y Y Y Y Y Y Y Y Y Y Y Y . . . .  |           |
100  * Y Y Y Y Y Y Y Y Y Y Y Y . . . .  |           |
101  * Y Y Y Y Y Y Y Y Y Y Y Y . . . .  V           |
102  * . . . . . . . . . . . . . . . .              |
103  * . . . . . . . . . . . . . . . .              |
104  * . . . . . . . . . . . . . . . .              |
105  * . . . . . . . . . . . . . . . .              V
106  * U V U V U V U V U V U V . . . .  ^
107  * U V U V U V U V U V U V . . . .  |
108  * U V U V U V U V U V U V . . . .  |
109  * U V U V U V U V U V U V . . . .  uv_scanlines (aligned to 16)
110  * . . . . . . . . . . . . . . . .  |
111  * . . . . . . . . . . . . . . . .  V
112  * . . . . . . . . . . . . . . . .  --> Buffer size aligned to 4K
113  *
114  * y_stride : Width*2 aligned to 256
115  * uv_stride : Width*2 aligned to 256
116  * y_scanlines: Height aligned to 32
117  * uv_scanlines: Height/2 aligned to 16
118  * Total size = align((y_stride * y_scanlines
119  *          + uv_stride * uv_scanlines , 4096)
120  *
121  * Note: All the alignments are hardware requirements.
122  */
123 static u32 iris_yuv_buffer_size_p010(struct iris_inst *inst)
124 {
125 	u32 y_plane, uv_plane, y_stride, uv_stride, y_scanlines, uv_scanlines;
126 	struct v4l2_format *f;
127 
128 	if (inst->domain == DECODER)
129 		f = inst->fmt_dst;
130 	else
131 		f = inst->fmt_src;
132 
133 	y_stride = ALIGN(f->fmt.pix_mp.width * 2, Y_STRIDE_ALIGN_P010);
134 	uv_stride = ALIGN(f->fmt.pix_mp.width * 2, UV_STRIDE_ALIGN_P010);
135 	y_scanlines = ALIGN(f->fmt.pix_mp.height, Y_SCANLINE_ALIGN);
136 	uv_scanlines = ALIGN((f->fmt.pix_mp.height + 1) >> 1, UV_SCANLINE_ALIGN);
137 	y_plane = y_stride * y_scanlines;
138 	uv_plane = uv_stride * uv_scanlines;
139 
140 	return ALIGN(y_plane + uv_plane, PIXELS_4K);
141 }
142 
143 /*
144  * QC08C:
145  * Compressed Macro-tile format for NV12.
146  * Contains 4 planes in the following order -
147  * (A) Y_Meta_Plane
148  * (B) Y_UBWC_Plane
149  * (C) UV_Meta_Plane
150  * (D) UV_UBWC_Plane
151  *
152  * Y_Meta_Plane consists of meta information to decode compressed
153  * tile data in Y_UBWC_Plane.
154  * Y_UBWC_Plane consists of Y data in compressed macro-tile format.
155  * UBWC decoder block will use the Y_Meta_Plane data together with
156  * Y_UBWC_Plane data to produce loss-less uncompressed 8 bit Y samples.
157  *
158  * UV_Meta_Plane consists of meta information to decode compressed
159  * tile data in UV_UBWC_Plane.
160  * UV_UBWC_Plane consists of UV data in compressed macro-tile format.
161  * UBWC decoder block will use UV_Meta_Plane data together with
162  * UV_UBWC_Plane data to produce loss-less uncompressed 8 bit 2x2
163  * subsampled color difference samples.
164  *
165  * Each tile in Y_UBWC_Plane/UV_UBWC_Plane is independently decodable
166  * and randomly accessible. There is no dependency between tiles.
167  *
168  * <----- y_meta_stride ----> (aligned to 64)
169  * <-------- Width ------>
170  * M M M M M M M M M M M M . .      ^           ^
171  * M M M M M M M M M M M M . .      |           |
172  * M M M M M M M M M M M M . .      Height      |
173  * M M M M M M M M M M M M . .      |         y_meta_scanlines  (aligned to 16)
174  * M M M M M M M M M M M M . .      |           |
175  * M M M M M M M M M M M M . .      |           |
176  * M M M M M M M M M M M M . .      |           |
177  * M M M M M M M M M M M M . .      V           |
178  * . . . . . . . . . . . . . .                  |
179  * . . . . . . . . . . . . . .                  |
180  * . . . . . . . . . . . . . .      -------> Buffer size aligned to 4k
181  * . . . . . . . . . . . . . .                  V
182  * <--Compressed tile y_stride---> (aligned to 128)
183  * <------- Width ------->
184  * Y* Y* Y* Y* Y* Y* Y* Y* . . . .  ^           ^
185  * Y* Y* Y* Y* Y* Y* Y* Y* . . . .  |           |
186  * Y* Y* Y* Y* Y* Y* Y* Y* . . . .  Height      |
187  * Y* Y* Y* Y* Y* Y* Y* Y* . . . .  |        Macro_tile y_scanlines (aligned to 32)
188  * Y* Y* Y* Y* Y* Y* Y* Y* . . . .  |           |
189  * Y* Y* Y* Y* Y* Y* Y* Y* . . . .  |           |
190  * Y* Y* Y* Y* Y* Y* Y* Y* . . . .  |           |
191  * Y* Y* Y* Y* Y* Y* Y* Y* . . . .  V           |
192  * . . . . . . . . . . . . . . . .              |
193  * . . . . . . . . . . . . . . . .              |
194  * . . . . . . . . . . . . . . . .  -------> Buffer size aligned to 4k
195  * . . . . . . . . . . . . . . . .              V
196  * <----- uv_meta_stride ---->  (aligned to 64)
197  * M M M M M M M M M M M M . .      ^
198  * M M M M M M M M M M M M . .      |
199  * M M M M M M M M M M M M . .      |
200  * M M M M M M M M M M M M . .      uv_meta_scanlines (aligned to 16)
201  * . . . . . . . . . . . . . .      |
202  * . . . . . . . . . . . . . .      V
203  * . . . . . . . . . . . . . .      -------> Buffer size aligned to 4k
204  * <--Compressed tile uv_stride---> (aligned to 128)
205  * U* V* U* V* U* V* U* V* . . . .  ^
206  * U* V* U* V* U* V* U* V* . . . .  |
207  * U* V* U* V* U* V* U* V* . . . .  |
208  * U* V* U* V* U* V* U* V* . . . .  uv_scanlines (aligned to 32)
209  * . . . . . . . . . . . . . . . .  |
210  * . . . . . . . . . . . . . . . .  V
211  * . . . . . . . . . . . . . . . .  -------> Buffer size aligned to 4k
212  *
213  * y_stride: width aligned to 128
214  * uv_stride: width aligned to 128
215  * y_scanlines: height aligned to 32
216  * uv_scanlines: height aligned to 32
217  * y_plane: buffer size aligned to 4096
218  * uv_plane: buffer size aligned to 4096
219  * y_meta_stride: width aligned to 64
220  * y_meta_scanlines: height aligned to 16
221  * y_meta_plane: buffer size aligned to 4096
222  * uv_meta_stride: width aligned to 64
223  * uv_meta_scanlines: height aligned to 16
224  * uv_meta_plane: buffer size aligned to 4096
225  *
226  * Total size = align( y_plane + uv_plane +
227  *           y_meta_plane + uv_meta_plane, 4096)
228  *
229  * Note: All the alignments are hardware requirements.
230  */
231 static u32 iris_yuv_buffer_size_qc08c(struct iris_inst *inst)
232 {
233 	u32 y_plane, uv_plane, y_stride, uv_stride;
234 	u32 uv_meta_stride, uv_meta_plane;
235 	u32 y_meta_stride, y_meta_plane;
236 	struct v4l2_format *f = NULL;
237 
238 	if (inst->domain == DECODER)
239 		f = inst->fmt_dst;
240 	else
241 		f = inst->fmt_src;
242 
243 	y_meta_stride = ALIGN(DIV_ROUND_UP(f->fmt.pix_mp.width, META_STRIDE_ALIGNED >> 1),
244 			      META_STRIDE_ALIGNED);
245 	y_meta_plane = y_meta_stride * ALIGN(DIV_ROUND_UP(f->fmt.pix_mp.height,
246 							  META_SCANLINE_ALIGNED >> 1),
247 					     META_SCANLINE_ALIGNED);
248 	y_meta_plane = ALIGN(y_meta_plane, PIXELS_4K);
249 
250 	y_stride = ALIGN(f->fmt.pix_mp.width, Y_STRIDE_ALIGN);
251 	y_plane = ALIGN(y_stride * ALIGN(f->fmt.pix_mp.height, Y_SCANLINE_ALIGN), PIXELS_4K);
252 
253 	uv_meta_stride = ALIGN(DIV_ROUND_UP(f->fmt.pix_mp.width / 2, META_STRIDE_ALIGNED >> 2),
254 			       META_STRIDE_ALIGNED);
255 	uv_meta_plane = uv_meta_stride * ALIGN(DIV_ROUND_UP(f->fmt.pix_mp.height / 2,
256 							    META_SCANLINE_ALIGNED >> 1),
257 					       META_SCANLINE_ALIGNED);
258 	uv_meta_plane = ALIGN(uv_meta_plane, PIXELS_4K);
259 
260 	uv_stride = ALIGN(f->fmt.pix_mp.width, UV_STRIDE_ALIGN);
261 	uv_plane = ALIGN(uv_stride * ALIGN(f->fmt.pix_mp.height / 2, UV_SCANLINE_ALIGN_QC08C),
262 			 PIXELS_4K);
263 
264 	return ALIGN(y_meta_plane + y_plane + uv_meta_plane + uv_plane, PIXELS_4K);
265 }
266 
267 /*
268  * QC10C:
269  * UBWC-compressed format for P010.
270  * Contains 4 planes in the following order -
271  * (A) Y_Meta_Plane
272  * (B) Y_UBWC_Plane
273  * (C) UV_Meta_Plane
274  * (D) UV_UBWC_Plane
275  *
276  * Y_Meta_Plane consists of meta information to decode compressed
277  * tile data in Y_UBWC_Plane.
278  * Y_UBWC_Plane consists of Y data in compressed macro-tile format.
279  * UBWC decoder block will use the Y_Meta_Plane data together with
280  * Y_UBWC_Plane data to produce loss-less uncompressed 10 bit Y samples.
281  *
282  * UV_Meta_Plane consists of meta information to decode compressed
283  * tile data in UV_UBWC_Plane.
284  * UV_UBWC_Plane consists of UV data in compressed macro-tile format.
285  * UBWC decoder block will use UV_Meta_Plane data together with
286  * UV_UBWC_Plane data to produce loss-less uncompressed 10 bit 2x2
287  * subsampled color difference samples.
288  *
289  * Each tile in Y_UBWC_Plane/UV_UBWC_Plane is independently decodable
290  * and randomly accessible. There is no dependency between tiles.
291  *
292  * <----- Y Meta stride -----> (aligned to 64)
293  * <-------- Width ----------> (aligned to 48)
294  * M M M M M M M M M M M M . .      ^           ^
295  * M M M M M M M M M M M M . .      |           |
296  * M M M M M M M M M M M M . .      Height      |
297  * M M M M M M M M M M M M . .      |         Meta_Y_Scanlines (aligned to 16)
298  * M M M M M M M M M M M M . .      |           |
299  * M M M M M M M M M M M M . .      |           |
300  * M M M M M M M M M M M M . .      |           |
301  * M M M M M M M M M M M M . .      V           |
302  * . . . . . . . . . . . . . .                  |
303  * . . . . . . . . . . . . . .                  |
304  * . . . . . . . . . . . . . .      -------> Buffer size aligned to 4k
305  * . . . . . . . . . . . . . .                  V
306  * <--Compressed tile Y stride --> (aligned to 256)
307  * <------- Width * 4/3 ---------> (aligned to 48)
308  * Y* Y* Y* Y* Y* Y* Y* Y* . . . .  ^           ^
309  * Y* Y* Y* Y* Y* Y* Y* Y* . . . .  |           |
310  * Y* Y* Y* Y* Y* Y* Y* Y* . . . .  Height      |
311  * Y* Y* Y* Y* Y* Y* Y* Y* . . . .  |        Macro_tile_Y_Scanlines (aligned to 16)
312  * Y* Y* Y* Y* Y* Y* Y* Y* . . . .  |           |
313  * Y* Y* Y* Y* Y* Y* Y* Y* . . . .  |           |
314  * Y* Y* Y* Y* Y* Y* Y* Y* . . . .  |           |
315  * Y* Y* Y* Y* Y* Y* Y* Y* . . . .  V           |
316  * . . . . . . . . . . . . . . . .              |
317  * . . . . . . . . . . . . . . . .              |
318  * . . . . . . . . . . . . . . . .  -------> Buffer size aligned to 4k
319  * . . . . . . . . . . . . . . . .              V
320  * <---- UV Meta stride ----> (aligned to 64)
321  * <----- Width / 2 --------> (aligned to 24)
322  * M M M M M M M M M M M M . .    ^           ^
323  * M M M M M M M M M M M M . .    |           |
324  * M M M M M M M M M M M M . .    Height/2    |
325  * M M M M M M M M M M M M . .    V           M_UV_Scanlines (aligned to 16)
326  * . . . . . . . . . . . . . .                |
327  * . . . . . . . . . . . . . .                V
328  * . . . . . . . . . . . . . .      -------> Buffer size aligned to 4k
329  * <--Compressed tile UV stride--> (aligned to 256)
330  * <------- Width * 4/3 ---------> (aligned to 48)
331  * U* V* U* V* U* V* U* V* . . . .  ^
332  * U* V* U* V* U* V* U* V* . . . .  |
333  * U* V* U* V* U* V* U* V* . . . .  |
334  * U* V* U* V* U* V* U* V* . . . .  UV_Scanlines (aligned to 16)
335  * . . . . . . . . . . . . . . . .  |
336  * . . . . . . . . . . . . . . . .  V
337  * . . . . . . . . . . . . . . . .  -------> Buffer size aligned to 4k
338  *
339  * y_stride: width aligned to 256
340  * uv_stride: width aligned to 256
341  * y_scanlines: height aligned to 16
342  * uv_scanlines: height aligned to 16
343  * y_plane: buffer size aligned to 4096
344  * uv_plane: buffer size aligned to 4096
345  * y_meta_stride: width aligned to 64
346  * y_meta_scanlines: height aligned to 16
347  * y_meta_plane: buffer size aligned to 4096
348  * uv_meta_stride: width aligned to 64
349  * uv_meta_scanlines: height aligned to 16
350  * uv_meta_plane: buffer size aligned to 4096
351  *
352  * Total size = align( y_plane + uv_plane +
353  *           y_meta_plane + uv_meta_plane, 4096)
354  *
355  * Note: All the alignments are hardware requirements.
356  */
357 static u32 iris_yuv_buffer_size_qc10c(struct iris_inst *inst)
358 {
359 	u32 y_plane, uv_plane, y_stride, uv_stride;
360 	u32 uv_meta_stride, uv_meta_plane;
361 	u32 y_meta_stride, y_meta_plane;
362 	struct v4l2_format *f;
363 
364 	if (inst->domain == DECODER)
365 		f = inst->fmt_dst;
366 	else
367 		f = inst->fmt_src;
368 
369 	y_meta_stride = ALIGN(DIV_ROUND_UP(f->fmt.pix_mp.width, 48),
370 			      META_STRIDE_ALIGNED);
371 	y_meta_plane = y_meta_stride * ALIGN(DIV_ROUND_UP(f->fmt.pix_mp.height, 4),
372 					     META_SCANLINE_ALIGNED);
373 	y_meta_plane = ALIGN(y_meta_plane, PIXELS_4K);
374 
375 	y_stride = ALIGN(f->fmt.pix_mp.width * 4 / 3, Y_STRIDE_ALIGN_P010);
376 	y_plane = ALIGN(y_stride * ALIGN(f->fmt.pix_mp.height, Y_SCANLINE_ALIGN_QC10C),
377 			PIXELS_4K);
378 
379 	uv_meta_stride = ALIGN(DIV_ROUND_UP((f->fmt.pix_mp.width + 1) >> 1, 24),
380 			       META_STRIDE_ALIGNED);
381 	uv_meta_plane = uv_meta_stride *
382 			ALIGN(DIV_ROUND_UP((f->fmt.pix_mp.height + 1) >> 1, 4),
383 			      META_SCANLINE_ALIGNED);
384 	uv_meta_plane = ALIGN(uv_meta_plane, PIXELS_4K);
385 
386 	uv_stride = ALIGN(f->fmt.pix_mp.width * 4 / 3, UV_STRIDE_ALIGN_P010);
387 	uv_plane = ALIGN(uv_stride * ALIGN((f->fmt.pix_mp.height + 1) >> 1, UV_SCANLINE_ALIGN),
388 			 PIXELS_4K);
389 
390 	return ALIGN(y_meta_plane + y_plane + uv_meta_plane + uv_plane, PIXELS_4K);
391 }
392 
393 static u32 iris_dec_bitstream_buffer_size(struct iris_inst *inst)
394 {
395 	struct platform_inst_caps *caps = inst->core->iris_platform_data->inst_caps;
396 	u32 base_res_mbs = NUM_MBS_4K;
397 	u32 frame_size, num_mbs;
398 	u32 div_factor = 2;
399 
400 	num_mbs = iris_get_mbpf(inst);
401 	if (num_mbs > NUM_MBS_4K) {
402 		div_factor = 4;
403 		base_res_mbs = caps->max_mbpf;
404 	} else {
405 		if (inst->codec == V4L2_PIX_FMT_VP9)
406 			div_factor = 1;
407 	}
408 
409 	/*
410 	 * frame_size = YUVsize / div_factor
411 	 * where YUVsize = resolution_in_MBs * MBs_in_pixel * 3 / 2
412 	 */
413 	frame_size = base_res_mbs * (16 * 16) * 3 / 2 / div_factor;
414 
415 	return ALIGN(frame_size, PIXELS_4K);
416 }
417 
418 static u32 iris_enc_bitstream_buffer_size(struct iris_inst *inst)
419 {
420 	u32 aligned_width, aligned_height, bitstream_size, yuv_size;
421 	int bitrate_mode, frame_rc;
422 	struct v4l2_format *f;
423 
424 	f = inst->fmt_dst;
425 
426 	bitrate_mode = inst->fw_caps[BITRATE_MODE].value;
427 	frame_rc = inst->fw_caps[FRAME_RC_ENABLE].value;
428 
429 	aligned_width = ALIGN(f->fmt.pix_mp.width, 32);
430 	aligned_height = ALIGN(f->fmt.pix_mp.height, 32);
431 	bitstream_size = aligned_width * aligned_height * 3;
432 	yuv_size = (aligned_width * aligned_height * 3) >> 1;
433 	if (aligned_width * aligned_height > (4096 * 2176))
434 		/* bitstream_size = 0.25 * yuv_size; */
435 		bitstream_size = (bitstream_size >> 3);
436 	else if (aligned_width * aligned_height > (1280 * 720))
437 		/* bitstream_size = 0.5 * yuv_size; */
438 		bitstream_size = (bitstream_size >> 2);
439 
440 	if ((!frame_rc || bitrate_mode == V4L2_MPEG_VIDEO_BITRATE_MODE_CQ) &&
441 	    bitstream_size < yuv_size)
442 		bitstream_size = (bitstream_size << 1);
443 
444 	return ALIGN(bitstream_size, 4096);
445 }
446 
447 int iris_get_buffer_size(struct iris_inst *inst,
448 			 enum iris_buffer_type buffer_type)
449 {
450 	if (inst->domain == DECODER) {
451 		switch (buffer_type) {
452 		case BUF_INPUT:
453 			return iris_dec_bitstream_buffer_size(inst);
454 		case BUF_OUTPUT:
455 			if (inst->fmt_dst->fmt.pix_mp.pixelformat == V4L2_PIX_FMT_QC08C)
456 				return iris_yuv_buffer_size_qc08c(inst);
457 			else if (inst->fmt_dst->fmt.pix_mp.pixelformat == V4L2_PIX_FMT_QC10C)
458 				return iris_yuv_buffer_size_qc10c(inst);
459 			else if (inst->fmt_dst->fmt.pix_mp.pixelformat == V4L2_PIX_FMT_P010)
460 				return iris_yuv_buffer_size_p010(inst);
461 			else
462 				return iris_yuv_buffer_size_nv12(inst);
463 		case BUF_DPB:
464 			if (iris_fmt_is_10bit(inst->fmt_dst->fmt.pix_mp.pixelformat))
465 				return iris_yuv_buffer_size_qc10c(inst);
466 			else
467 				return iris_yuv_buffer_size_qc08c(inst);
468 		default:
469 			return 0;
470 		}
471 	} else {
472 		switch (buffer_type) {
473 		case BUF_INPUT:
474 			if (inst->fmt_src->fmt.pix_mp.pixelformat == V4L2_PIX_FMT_QC08C)
475 				return iris_yuv_buffer_size_qc08c(inst);
476 			else
477 				return iris_yuv_buffer_size_nv12(inst);
478 		case BUF_OUTPUT:
479 			return iris_enc_bitstream_buffer_size(inst);
480 		default:
481 			return 0;
482 		}
483 	}
484 }
485 
486 static void iris_fill_internal_buf_info(struct iris_inst *inst,
487 					enum iris_buffer_type buffer_type)
488 {
489 	struct iris_buffers *buffers = &inst->buffers[buffer_type];
490 
491 	buffers->size = inst->core->iris_firmware_desc->get_vpu_buffer_size(inst, buffer_type);
492 	buffers->min_count = iris_vpu_buf_count(inst, buffer_type);
493 }
494 
495 void iris_get_internal_buffers(struct iris_inst *inst, u32 plane)
496 {
497 	const struct iris_firmware_data *firmware_data = inst->core->iris_firmware_data;
498 	const u32 *internal_buf_type;
499 	u32 internal_buffer_count, i;
500 
501 	if (inst->domain == DECODER) {
502 		if (V4L2_TYPE_IS_OUTPUT(plane)) {
503 			internal_buf_type = firmware_data->dec_ip_int_buf_tbl;
504 			internal_buffer_count = firmware_data->dec_ip_int_buf_tbl_size;
505 			for (i = 0; i < internal_buffer_count; i++)
506 				iris_fill_internal_buf_info(inst, internal_buf_type[i]);
507 		} else {
508 			internal_buf_type = firmware_data->dec_op_int_buf_tbl;
509 			internal_buffer_count = firmware_data->dec_op_int_buf_tbl_size;
510 			for (i = 0; i < internal_buffer_count; i++)
511 				iris_fill_internal_buf_info(inst, internal_buf_type[i]);
512 		}
513 	} else {
514 		if (V4L2_TYPE_IS_OUTPUT(plane)) {
515 			internal_buf_type = firmware_data->enc_ip_int_buf_tbl;
516 			internal_buffer_count = firmware_data->enc_ip_int_buf_tbl_size;
517 			for (i = 0; i < internal_buffer_count; i++)
518 				iris_fill_internal_buf_info(inst, internal_buf_type[i]);
519 		} else {
520 			internal_buf_type = firmware_data->enc_op_int_buf_tbl;
521 			internal_buffer_count = firmware_data->enc_op_int_buf_tbl_size;
522 			for (i = 0; i < internal_buffer_count; i++)
523 				iris_fill_internal_buf_info(inst, internal_buf_type[i]);
524 		}
525 	}
526 }
527 
528 static int iris_create_internal_buffer(struct iris_inst *inst,
529 				       enum iris_buffer_type buffer_type, u32 index)
530 {
531 	struct iris_buffers *buffers = &inst->buffers[buffer_type];
532 	struct iris_core *core = inst->core;
533 	struct iris_buffer *buffer;
534 
535 	if (!buffers->size)
536 		return 0;
537 
538 	buffer = kzalloc_obj(*buffer);
539 	if (!buffer)
540 		return -ENOMEM;
541 
542 	INIT_LIST_HEAD(&buffer->list);
543 	buffer->type = buffer_type;
544 	buffer->index = index;
545 	buffer->buffer_size = buffers->size;
546 	buffer->dma_attrs = DMA_ATTR_WRITE_COMBINE | DMA_ATTR_NO_KERNEL_MAPPING;
547 
548 	buffer->kvaddr = dma_alloc_attrs(core->dev, buffer->buffer_size,
549 					 &buffer->device_addr, GFP_KERNEL, buffer->dma_attrs);
550 	if (!buffer->kvaddr) {
551 		kfree(buffer);
552 		return -ENOMEM;
553 	}
554 
555 	list_add_tail(&buffer->list, &buffers->list);
556 
557 	return 0;
558 }
559 
560 int iris_create_internal_buffers(struct iris_inst *inst, u32 plane)
561 {
562 	const struct iris_firmware_data *firmware_data = inst->core->iris_firmware_data;
563 	u32 internal_buffer_count, i, j;
564 	struct iris_buffers *buffers;
565 	const u32 *internal_buf_type;
566 	int ret;
567 
568 	if (inst->domain == DECODER) {
569 		if (V4L2_TYPE_IS_OUTPUT(plane)) {
570 			internal_buf_type = firmware_data->dec_ip_int_buf_tbl;
571 			internal_buffer_count = firmware_data->dec_ip_int_buf_tbl_size;
572 		} else {
573 			internal_buf_type = firmware_data->dec_op_int_buf_tbl;
574 			internal_buffer_count = firmware_data->dec_op_int_buf_tbl_size;
575 		}
576 	} else {
577 		if (V4L2_TYPE_IS_OUTPUT(plane)) {
578 			internal_buf_type = firmware_data->enc_ip_int_buf_tbl;
579 			internal_buffer_count = firmware_data->enc_ip_int_buf_tbl_size;
580 		} else {
581 			internal_buf_type = firmware_data->enc_op_int_buf_tbl;
582 			internal_buffer_count = firmware_data->enc_op_int_buf_tbl_size;
583 		}
584 	}
585 
586 	for (i = 0; i < internal_buffer_count; i++) {
587 		buffers = &inst->buffers[internal_buf_type[i]];
588 		for (j = 0; j < buffers->min_count; j++) {
589 			ret = iris_create_internal_buffer(inst, internal_buf_type[i], j);
590 			if (ret)
591 				return ret;
592 		}
593 	}
594 
595 	return 0;
596 }
597 
598 int iris_queue_buffer(struct iris_inst *inst, struct iris_buffer *buf)
599 {
600 	const struct iris_hfi_session_ops *hfi_ops = inst->hfi_session_ops;
601 	int ret;
602 
603 	ret = hfi_ops->session_queue_buf(inst, buf);
604 	if (ret)
605 		return ret;
606 
607 	buf->attr &= ~BUF_ATTR_DEFERRED;
608 	buf->attr |= BUF_ATTR_QUEUED;
609 
610 	return 0;
611 }
612 
613 int iris_queue_internal_deferred_buffers(struct iris_inst *inst, enum iris_buffer_type buffer_type)
614 {
615 	struct iris_buffer *buffer, *next;
616 	struct iris_buffers *buffers;
617 	int ret = 0;
618 
619 	buffers = &inst->buffers[buffer_type];
620 	list_for_each_entry_safe(buffer, next, &buffers->list, list) {
621 		if (buffer->attr & BUF_ATTR_PENDING_RELEASE)
622 			continue;
623 		if (buffer->attr & BUF_ATTR_QUEUED)
624 			continue;
625 
626 		if (buffer->attr & BUF_ATTR_DEFERRED) {
627 			ret = iris_queue_buffer(inst, buffer);
628 			if (ret)
629 				return ret;
630 		}
631 	}
632 
633 	return ret;
634 }
635 
636 int iris_queue_internal_buffers(struct iris_inst *inst, u32 plane)
637 {
638 	const struct iris_firmware_data *firmware_data = inst->core->iris_firmware_data;
639 	struct iris_buffer *buffer, *next;
640 	struct iris_buffers *buffers;
641 	const u32 *internal_buf_type;
642 	u32 internal_buffer_count, i;
643 	int ret;
644 
645 	if (inst->domain == DECODER) {
646 		if (V4L2_TYPE_IS_OUTPUT(plane)) {
647 			internal_buf_type = firmware_data->dec_ip_int_buf_tbl;
648 			internal_buffer_count = firmware_data->dec_ip_int_buf_tbl_size;
649 		} else {
650 			internal_buf_type = firmware_data->dec_op_int_buf_tbl;
651 			internal_buffer_count = firmware_data->dec_op_int_buf_tbl_size;
652 		}
653 	} else {
654 		if (V4L2_TYPE_IS_OUTPUT(plane)) {
655 			internal_buf_type = firmware_data->enc_ip_int_buf_tbl;
656 			internal_buffer_count = firmware_data->enc_ip_int_buf_tbl_size;
657 		} else {
658 			internal_buf_type = firmware_data->enc_op_int_buf_tbl;
659 			internal_buffer_count = firmware_data->enc_op_int_buf_tbl_size;
660 		}
661 	}
662 
663 	for (i = 0; i < internal_buffer_count; i++) {
664 		buffers = &inst->buffers[internal_buf_type[i]];
665 		list_for_each_entry_safe(buffer, next, &buffers->list, list) {
666 			if (buffer->attr & BUF_ATTR_PENDING_RELEASE)
667 				continue;
668 			if (buffer->attr & BUF_ATTR_QUEUED)
669 				continue;
670 			if (buffer->type == BUF_DPB && inst->state != IRIS_INST_STREAMING) {
671 				buffer->attr |= BUF_ATTR_DEFERRED;
672 				continue;
673 			}
674 			ret = iris_queue_buffer(inst, buffer);
675 			if (ret)
676 				return ret;
677 		}
678 	}
679 
680 	return 0;
681 }
682 
683 int iris_destroy_internal_buffer(struct iris_inst *inst, struct iris_buffer *buffer)
684 {
685 	struct iris_core *core = inst->core;
686 
687 	list_del(&buffer->list);
688 	dma_free_attrs(core->dev, buffer->buffer_size, buffer->kvaddr,
689 		       buffer->device_addr, buffer->dma_attrs);
690 	kfree(buffer);
691 
692 	return 0;
693 }
694 
695 static int iris_destroy_internal_buffers(struct iris_inst *inst, u32 plane, bool force)
696 {
697 	const struct iris_firmware_data *firmware_data = inst->core->iris_firmware_data;
698 	struct iris_buffer *buf, *next;
699 	struct iris_buffers *buffers;
700 	const u32 *internal_buf_type;
701 	u32 i, len;
702 	int ret;
703 
704 	if (inst->domain == DECODER) {
705 		if (V4L2_TYPE_IS_OUTPUT(plane)) {
706 			internal_buf_type = firmware_data->dec_ip_int_buf_tbl;
707 			len = firmware_data->dec_ip_int_buf_tbl_size;
708 		} else {
709 			internal_buf_type = firmware_data->dec_op_int_buf_tbl;
710 			len = firmware_data->dec_op_int_buf_tbl_size;
711 		}
712 	} else {
713 		if (V4L2_TYPE_IS_OUTPUT(plane)) {
714 			internal_buf_type = firmware_data->enc_ip_int_buf_tbl;
715 			len = firmware_data->enc_ip_int_buf_tbl_size;
716 		} else {
717 			internal_buf_type = firmware_data->enc_op_int_buf_tbl;
718 			len = firmware_data->enc_op_int_buf_tbl_size;
719 		}
720 	}
721 
722 	for (i = 0; i < len; i++) {
723 		buffers = &inst->buffers[internal_buf_type[i]];
724 		list_for_each_entry_safe(buf, next, &buffers->list, list) {
725 			/*
726 			 * during stream on, skip destroying internal(DPB) buffer
727 			 * if firmware did not return it.
728 			 * during close, destroy all buffers irrespectively.
729 			 */
730 			if (!force && buf->attr & BUF_ATTR_QUEUED)
731 				continue;
732 
733 			ret = iris_destroy_internal_buffer(inst, buf);
734 			if (ret)
735 				return ret;
736 		}
737 	}
738 
739 	if (force) {
740 		if (inst->domain == DECODER)
741 			buffers = &inst->buffers[BUF_PERSIST];
742 		else
743 			buffers = &inst->buffers[BUF_ARP];
744 
745 		list_for_each_entry_safe(buf, next, &buffers->list, list) {
746 			ret = iris_destroy_internal_buffer(inst, buf);
747 			if (ret)
748 				return ret;
749 		}
750 	}
751 
752 	return 0;
753 }
754 
755 int iris_destroy_all_internal_buffers(struct iris_inst *inst, u32 plane)
756 {
757 	return iris_destroy_internal_buffers(inst, plane, true);
758 }
759 
760 int iris_destroy_dequeued_internal_buffers(struct iris_inst *inst, u32 plane)
761 {
762 	return iris_destroy_internal_buffers(inst, plane, false);
763 }
764 
765 static int iris_release_internal_buffers(struct iris_inst *inst,
766 					 enum iris_buffer_type buffer_type)
767 {
768 	const struct iris_hfi_session_ops *hfi_ops = inst->hfi_session_ops;
769 	struct iris_buffers *buffers = &inst->buffers[buffer_type];
770 	struct iris_buffer *buffer, *next;
771 	int ret;
772 
773 	list_for_each_entry_safe(buffer, next, &buffers->list, list) {
774 		if (buffer->attr & BUF_ATTR_PENDING_RELEASE)
775 			continue;
776 		if (!(buffer->attr & BUF_ATTR_QUEUED))
777 			continue;
778 		buffer->attr |= BUF_ATTR_PENDING_RELEASE;
779 		ret = hfi_ops->session_release_buf(inst, buffer);
780 		if (ret) {
781 			buffer->attr &= ~BUF_ATTR_PENDING_RELEASE;
782 			return ret;
783 		}
784 	}
785 
786 	return 0;
787 }
788 
789 static int iris_release_input_internal_buffers(struct iris_inst *inst)
790 {
791 	const struct iris_firmware_data *firmware_data = inst->core->iris_firmware_data;
792 	const u32 *internal_buf_type;
793 	u32 internal_buffer_count, i;
794 	int ret;
795 
796 	if (inst->domain == DECODER) {
797 		internal_buf_type = firmware_data->dec_ip_int_buf_tbl;
798 		internal_buffer_count = firmware_data->dec_ip_int_buf_tbl_size;
799 	} else {
800 		internal_buf_type = firmware_data->enc_ip_int_buf_tbl;
801 		internal_buffer_count = firmware_data->enc_ip_int_buf_tbl_size;
802 	}
803 
804 	for (i = 0; i < internal_buffer_count; i++) {
805 		ret = iris_release_internal_buffers(inst, internal_buf_type[i]);
806 		if (ret)
807 			return ret;
808 	}
809 
810 	return 0;
811 }
812 
813 int iris_alloc_and_queue_persist_bufs(struct iris_inst *inst, enum iris_buffer_type buffer_type)
814 {
815 	struct iris_buffers *buffers = &inst->buffers[buffer_type];
816 	struct iris_buffer *buffer, *next;
817 	int ret;
818 	u32 i;
819 
820 	if (!list_empty(&buffers->list))
821 		return 0;
822 
823 	iris_fill_internal_buf_info(inst, buffer_type);
824 
825 	for (i = 0; i < buffers->min_count; i++) {
826 		ret = iris_create_internal_buffer(inst, buffer_type, i);
827 		if (ret)
828 			return ret;
829 	}
830 
831 	list_for_each_entry_safe(buffer, next, &buffers->list, list) {
832 		if (buffer->attr & BUF_ATTR_PENDING_RELEASE)
833 			continue;
834 		if (buffer->attr & BUF_ATTR_QUEUED)
835 			continue;
836 		ret = iris_queue_buffer(inst, buffer);
837 		if (ret)
838 			return ret;
839 	}
840 
841 	return 0;
842 }
843 
844 int iris_alloc_and_queue_input_int_bufs(struct iris_inst *inst)
845 {
846 	int ret;
847 
848 	iris_get_internal_buffers(inst, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE);
849 
850 	ret = iris_release_input_internal_buffers(inst);
851 	if (ret)
852 		return ret;
853 
854 	ret = iris_create_internal_buffers(inst, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE);
855 	if (ret)
856 		return ret;
857 
858 	return iris_queue_internal_buffers(inst, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE);
859 }
860 
861 int iris_queue_deferred_buffers(struct iris_inst *inst, enum iris_buffer_type buf_type)
862 {
863 	struct v4l2_m2m_ctx *m2m_ctx = inst->m2m_ctx;
864 	struct v4l2_m2m_buffer *buffer, *n;
865 	struct iris_buffer *buf;
866 	int ret;
867 
868 	iris_scale_power(inst);
869 
870 	if (buf_type == BUF_INPUT) {
871 		v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buffer, n) {
872 			buf = to_iris_buffer(&buffer->vb);
873 			if (!(buf->attr & BUF_ATTR_DEFERRED))
874 				continue;
875 			ret = iris_queue_buffer(inst, buf);
876 			if (ret)
877 				return ret;
878 		}
879 	} else {
880 		v4l2_m2m_for_each_dst_buf_safe(m2m_ctx, buffer, n) {
881 			buf = to_iris_buffer(&buffer->vb);
882 			if (!(buf->attr & BUF_ATTR_DEFERRED))
883 				continue;
884 			ret = iris_queue_buffer(inst, buf);
885 			if (ret)
886 				return ret;
887 		}
888 	}
889 
890 	return 0;
891 }
892 
893 void iris_vb2_queue_error(struct iris_inst *inst)
894 {
895 	struct v4l2_m2m_ctx *m2m_ctx = inst->m2m_ctx;
896 	struct vb2_queue *q;
897 
898 	q = v4l2_m2m_get_src_vq(m2m_ctx);
899 	vb2_queue_error(q);
900 	q = v4l2_m2m_get_dst_vq(m2m_ctx);
901 	vb2_queue_error(q);
902 }
903 
904 static struct vb2_v4l2_buffer *
905 iris_helper_find_buf(struct iris_inst *inst, u32 type, u32 idx)
906 {
907 	struct v4l2_m2m_ctx *m2m_ctx = inst->m2m_ctx;
908 
909 	if (V4L2_TYPE_IS_OUTPUT(type))
910 		return v4l2_m2m_src_buf_remove_by_idx(m2m_ctx, idx);
911 	else
912 		return v4l2_m2m_dst_buf_remove_by_idx(m2m_ctx, idx);
913 }
914 
915 static void iris_get_ts_metadata(struct iris_inst *inst, u64 timestamp_ns,
916 				 struct vb2_v4l2_buffer *vbuf)
917 {
918 	u32 mask = V4L2_BUF_FLAG_TIMECODE | V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
919 	u32 i;
920 
921 	for (i = 0; i < ARRAY_SIZE(inst->tss); ++i) {
922 		if (inst->tss[i].ts_ns != timestamp_ns)
923 			continue;
924 
925 		vbuf->flags &= ~mask;
926 		vbuf->flags |= inst->tss[i].flags;
927 		vbuf->timecode = inst->tss[i].tc;
928 		return;
929 	}
930 
931 	vbuf->flags &= ~mask;
932 	vbuf->flags |= inst->tss[inst->metadata_idx].flags;
933 	vbuf->timecode = inst->tss[inst->metadata_idx].tc;
934 }
935 
936 int iris_vb2_buffer_done(struct iris_inst *inst, struct iris_buffer *buf)
937 {
938 	struct v4l2_m2m_ctx *m2m_ctx = inst->m2m_ctx;
939 	struct vb2_v4l2_buffer *vbuf;
940 	struct vb2_buffer *vb2;
941 	u32 type, state;
942 
943 	switch (buf->type) {
944 	case BUF_INPUT:
945 		type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
946 		break;
947 	case BUF_OUTPUT:
948 		type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
949 		break;
950 	default:
951 		return 0; /* Internal DPB Buffers */
952 	}
953 
954 	vbuf = iris_helper_find_buf(inst, type, buf->index);
955 	if (!vbuf)
956 		return -EINVAL;
957 
958 	vb2 = &vbuf->vb2_buf;
959 
960 	vbuf->flags |= buf->flags;
961 
962 	if (buf->flags & V4L2_BUF_FLAG_ERROR) {
963 		state = VB2_BUF_STATE_ERROR;
964 		vb2_set_plane_payload(vb2, 0, 0);
965 		vb2->timestamp = 0;
966 		v4l2_m2m_buf_done(vbuf, state);
967 		return 0;
968 	}
969 
970 	if (V4L2_TYPE_IS_CAPTURE(type)) {
971 		vb2_set_plane_payload(vb2, 0, buf->data_size);
972 		vbuf->sequence = inst->sequence_cap++;
973 		iris_get_ts_metadata(inst, buf->timestamp, vbuf);
974 	} else {
975 		vbuf->sequence = inst->sequence_out++;
976 	}
977 
978 	if (vbuf->flags & V4L2_BUF_FLAG_LAST) {
979 		if (!v4l2_m2m_has_stopped(m2m_ctx)) {
980 			const struct v4l2_event ev = { .type = V4L2_EVENT_EOS };
981 
982 			v4l2_event_queue_fh(&inst->fh, &ev);
983 			v4l2_m2m_mark_stopped(m2m_ctx);
984 		}
985 		inst->last_buffer_dequeued = true;
986 	}
987 
988 	state = VB2_BUF_STATE_DONE;
989 	vb2->timestamp = buf->timestamp;
990 	v4l2_m2m_buf_done(vbuf, state);
991 
992 	return 0;
993 }
994