xref: /linux/drivers/staging/media/sunxi/cedrus/cedrus_h264.c (revision 10358ea986c3c85516d1c8206486464f79d36e76)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Cedrus VPU driver
4  *
5  * Copyright (c) 2013 Jens Kuske <jenskuske@gmail.com>
6  * Copyright (c) 2018 Bootlin
7  */
8 
9 #include <linux/delay.h>
10 #include <linux/types.h>
11 
12 #include <media/videobuf2-dma-contig.h>
13 
14 #include "cedrus.h"
15 #include "cedrus_hw.h"
16 #include "cedrus_regs.h"
17 
18 enum cedrus_h264_sram_off {
19 	CEDRUS_SRAM_H264_PRED_WEIGHT_TABLE	= 0x000,
20 	CEDRUS_SRAM_H264_FRAMEBUFFER_LIST	= 0x100,
21 	CEDRUS_SRAM_H264_REF_LIST_0		= 0x190,
22 	CEDRUS_SRAM_H264_REF_LIST_1		= 0x199,
23 	CEDRUS_SRAM_H264_SCALING_LIST_8x8_0	= 0x200,
24 	CEDRUS_SRAM_H264_SCALING_LIST_8x8_1	= 0x210,
25 	CEDRUS_SRAM_H264_SCALING_LIST_4x4	= 0x220,
26 };
27 
28 struct cedrus_h264_sram_ref_pic {
29 	__le32	top_field_order_cnt;
30 	__le32	bottom_field_order_cnt;
31 	__le32	frame_info;
32 	__le32	luma_ptr;
33 	__le32	chroma_ptr;
34 	__le32	mv_col_top_ptr;
35 	__le32	mv_col_bot_ptr;
36 	__le32	reserved;
37 } __packed;
38 
39 #define CEDRUS_H264_FRAME_NUM		18
40 
41 #define CEDRUS_NEIGHBOR_INFO_BUF_SIZE	(32 * SZ_1K)
42 #define CEDRUS_MIN_PIC_INFO_BUF_SIZE       (130 * SZ_1K)
43 
44 static void cedrus_h264_write_sram(struct cedrus_dev *dev,
45 				   enum cedrus_h264_sram_off off,
46 				   const void *data, size_t len)
47 {
48 	const u32 *buffer = data;
49 	size_t count = DIV_ROUND_UP(len, 4);
50 
51 	cedrus_write(dev, VE_AVC_SRAM_PORT_OFFSET, off << 2);
52 
53 	while (count--)
54 		cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, *buffer++);
55 }
56 
57 static dma_addr_t cedrus_h264_mv_col_buf_addr(struct cedrus_buffer *buf,
58 					      unsigned int field)
59 {
60 	dma_addr_t addr = buf->codec.h264.mv_col_buf_dma;
61 
62 	/* Adjust for the field */
63 	addr += field * buf->codec.h264.mv_col_buf_size / 2;
64 
65 	return addr;
66 }
67 
68 static void cedrus_fill_ref_pic(struct cedrus_ctx *ctx,
69 				struct cedrus_buffer *buf,
70 				unsigned int top_field_order_cnt,
71 				unsigned int bottom_field_order_cnt,
72 				struct cedrus_h264_sram_ref_pic *pic)
73 {
74 	struct vb2_buffer *vbuf = &buf->m2m_buf.vb.vb2_buf;
75 
76 	pic->top_field_order_cnt = cpu_to_le32(top_field_order_cnt);
77 	pic->bottom_field_order_cnt = cpu_to_le32(bottom_field_order_cnt);
78 	pic->frame_info = cpu_to_le32(buf->codec.h264.pic_type << 8);
79 
80 	pic->luma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 0));
81 	pic->chroma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 1));
82 	pic->mv_col_top_ptr = cpu_to_le32(cedrus_h264_mv_col_buf_addr(buf, 0));
83 	pic->mv_col_bot_ptr = cpu_to_le32(cedrus_h264_mv_col_buf_addr(buf, 1));
84 }
85 
86 static int cedrus_write_frame_list(struct cedrus_ctx *ctx,
87 				   struct cedrus_run *run)
88 {
89 	struct cedrus_h264_sram_ref_pic pic_list[CEDRUS_H264_FRAME_NUM];
90 	const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
91 	const struct v4l2_ctrl_h264_sps *sps = run->h264.sps;
92 	struct vb2_queue *cap_q;
93 	struct cedrus_buffer *output_buf;
94 	struct cedrus_dev *dev = ctx->dev;
95 	unsigned long used_dpbs = 0;
96 	unsigned int position;
97 	int output = -1;
98 	unsigned int i;
99 
100 	cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
101 
102 	memset(pic_list, 0, sizeof(pic_list));
103 
104 	for (i = 0; i < ARRAY_SIZE(decode->dpb); i++) {
105 		const struct v4l2_h264_dpb_entry *dpb = &decode->dpb[i];
106 		struct cedrus_buffer *cedrus_buf;
107 		struct vb2_buffer *buf;
108 
109 		if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_VALID))
110 			continue;
111 
112 		buf = vb2_find_buffer(cap_q, dpb->reference_ts);
113 		if (!buf)
114 			continue;
115 
116 		cedrus_buf = vb2_to_cedrus_buffer(buf);
117 		position = cedrus_buf->codec.h264.position;
118 		used_dpbs |= BIT(position);
119 
120 		if (run->dst->vb2_buf.timestamp == dpb->reference_ts) {
121 			output = position;
122 			continue;
123 		}
124 
125 		if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
126 			continue;
127 
128 		cedrus_fill_ref_pic(ctx, cedrus_buf,
129 				    dpb->top_field_order_cnt,
130 				    dpb->bottom_field_order_cnt,
131 				    &pic_list[position]);
132 	}
133 
134 	if (output >= 0)
135 		position = output;
136 	else
137 		position = find_first_zero_bit(&used_dpbs, CEDRUS_H264_FRAME_NUM);
138 
139 	output_buf = vb2_to_cedrus_buffer(&run->dst->vb2_buf);
140 	output_buf->codec.h264.position = position;
141 
142 	if (!output_buf->codec.h264.mv_col_buf_size) {
143 		const struct v4l2_ctrl_h264_sps *sps = run->h264.sps;
144 		unsigned int field_size;
145 
146 		field_size = DIV_ROUND_UP(ctx->src_fmt.width, 16) *
147 			DIV_ROUND_UP(ctx->src_fmt.height, 16) * 16;
148 		if (!(sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE))
149 			field_size = field_size * 2;
150 		if (!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY))
151 			field_size = field_size * 2;
152 
153 		output_buf->codec.h264.mv_col_buf_size = field_size * 2;
154 		/* Buffer is never accessed by CPU, so we can skip kernel mapping. */
155 		output_buf->codec.h264.mv_col_buf =
156 			dma_alloc_attrs(dev->dev,
157 					output_buf->codec.h264.mv_col_buf_size,
158 					&output_buf->codec.h264.mv_col_buf_dma,
159 					GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING);
160 
161 		if (!output_buf->codec.h264.mv_col_buf) {
162 			output_buf->codec.h264.mv_col_buf_size = 0;
163 			return -ENOMEM;
164 		}
165 	}
166 
167 	if (decode->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC)
168 		output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_FIELD;
169 	else if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)
170 		output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_MBAFF;
171 	else
172 		output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_FRAME;
173 
174 	cedrus_fill_ref_pic(ctx, output_buf,
175 			    decode->top_field_order_cnt,
176 			    decode->bottom_field_order_cnt,
177 			    &pic_list[position]);
178 
179 	cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_FRAMEBUFFER_LIST,
180 			       pic_list, sizeof(pic_list));
181 
182 	cedrus_write(dev, VE_H264_OUTPUT_FRAME_IDX, position);
183 
184 	return 0;
185 }
186 
187 #define CEDRUS_MAX_REF_IDX	32
188 
189 static void _cedrus_write_ref_list(struct cedrus_ctx *ctx,
190 				   struct cedrus_run *run,
191 				   const struct v4l2_h264_reference *ref_list,
192 				   u8 num_ref, enum cedrus_h264_sram_off sram)
193 {
194 	const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
195 	struct vb2_queue *cap_q;
196 	struct cedrus_dev *dev = ctx->dev;
197 	u8 sram_array[CEDRUS_MAX_REF_IDX];
198 	unsigned int i;
199 	size_t size;
200 
201 	cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
202 
203 	memset(sram_array, 0, sizeof(sram_array));
204 
205 	for (i = 0; i < num_ref; i++) {
206 		const struct v4l2_h264_dpb_entry *dpb;
207 		const struct cedrus_buffer *cedrus_buf;
208 		unsigned int position;
209 		struct vb2_buffer *buf;
210 		u8 dpb_idx;
211 
212 		dpb_idx = ref_list[i].index;
213 		if (dpb_idx >= V4L2_H264_NUM_DPB_ENTRIES)
214 			continue;
215 
216 		dpb = &decode->dpb[dpb_idx];
217 
218 		if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
219 			continue;
220 
221 		buf = vb2_find_buffer(cap_q, dpb->reference_ts);
222 		if (!buf)
223 			continue;
224 
225 		cedrus_buf = vb2_to_cedrus_buffer(buf);
226 		position = cedrus_buf->codec.h264.position;
227 
228 		sram_array[i] |= position << 1;
229 		if (ref_list[i].fields == V4L2_H264_BOTTOM_FIELD_REF)
230 			sram_array[i] |= BIT(0);
231 	}
232 
233 	size = min_t(size_t, ALIGN(num_ref, 4), sizeof(sram_array));
234 	cedrus_h264_write_sram(dev, sram, &sram_array, size);
235 }
236 
237 static void cedrus_write_ref_list0(struct cedrus_ctx *ctx,
238 				   struct cedrus_run *run)
239 {
240 	const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
241 
242 	_cedrus_write_ref_list(ctx, run,
243 			       slice->ref_pic_list0,
244 			       slice->num_ref_idx_l0_active_minus1 + 1,
245 			       CEDRUS_SRAM_H264_REF_LIST_0);
246 }
247 
248 static void cedrus_write_ref_list1(struct cedrus_ctx *ctx,
249 				   struct cedrus_run *run)
250 {
251 	const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
252 
253 	_cedrus_write_ref_list(ctx, run,
254 			       slice->ref_pic_list1,
255 			       slice->num_ref_idx_l1_active_minus1 + 1,
256 			       CEDRUS_SRAM_H264_REF_LIST_1);
257 }
258 
259 static void cedrus_write_scaling_lists(struct cedrus_ctx *ctx,
260 				       struct cedrus_run *run)
261 {
262 	const struct v4l2_ctrl_h264_scaling_matrix *scaling =
263 		run->h264.scaling_matrix;
264 	const struct v4l2_ctrl_h264_pps *pps = run->h264.pps;
265 	struct cedrus_dev *dev = ctx->dev;
266 
267 	if (!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT))
268 		return;
269 
270 	cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_0,
271 			       scaling->scaling_list_8x8[0],
272 			       sizeof(scaling->scaling_list_8x8[0]));
273 
274 	cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_1,
275 			       scaling->scaling_list_8x8[1],
276 			       sizeof(scaling->scaling_list_8x8[1]));
277 
278 	cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_4x4,
279 			       scaling->scaling_list_4x4,
280 			       sizeof(scaling->scaling_list_4x4));
281 }
282 
283 static void cedrus_write_pred_weight_table(struct cedrus_ctx *ctx,
284 					   struct cedrus_run *run)
285 {
286 	const struct v4l2_ctrl_h264_pred_weights *pred_weight =
287 		run->h264.pred_weights;
288 	struct cedrus_dev *dev = ctx->dev;
289 	int i, j, k;
290 
291 	cedrus_write(dev, VE_H264_SHS_WP,
292 		     ((pred_weight->chroma_log2_weight_denom & 0x7) << 4) |
293 		     ((pred_weight->luma_log2_weight_denom & 0x7) << 0));
294 
295 	cedrus_write(dev, VE_AVC_SRAM_PORT_OFFSET,
296 		     CEDRUS_SRAM_H264_PRED_WEIGHT_TABLE << 2);
297 
298 	for (i = 0; i < ARRAY_SIZE(pred_weight->weight_factors); i++) {
299 		const struct v4l2_h264_weight_factors *factors =
300 			&pred_weight->weight_factors[i];
301 
302 		for (j = 0; j < ARRAY_SIZE(factors->luma_weight); j++) {
303 			u32 val;
304 
305 			val = (((u32)factors->luma_offset[j] & 0x1ff) << 16) |
306 				(factors->luma_weight[j] & 0x1ff);
307 			cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, val);
308 		}
309 
310 		for (j = 0; j < ARRAY_SIZE(factors->chroma_weight); j++) {
311 			for (k = 0; k < ARRAY_SIZE(factors->chroma_weight[0]); k++) {
312 				u32 val;
313 
314 				val = (((u32)factors->chroma_offset[j][k] & 0x1ff) << 16) |
315 					(factors->chroma_weight[j][k] & 0x1ff);
316 				cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, val);
317 			}
318 		}
319 	}
320 }
321 
322 /*
323  * It turns out that using VE_H264_VLD_OFFSET to skip bits is not reliable. In
324  * rare cases frame is not decoded correctly. However, setting offset to 0 and
325  * skipping appropriate amount of bits with flush bits trigger always works.
326  */
327 static void cedrus_skip_bits(struct cedrus_dev *dev, int num)
328 {
329 	int count = 0;
330 
331 	while (count < num) {
332 		int tmp = min(num - count, 32);
333 
334 		cedrus_write(dev, VE_H264_TRIGGER_TYPE,
335 			     VE_H264_TRIGGER_TYPE_FLUSH_BITS |
336 			     VE_H264_TRIGGER_TYPE_N_BITS(tmp));
337 		while (cedrus_read(dev, VE_H264_STATUS) & VE_H264_STATUS_VLD_BUSY)
338 			udelay(1);
339 
340 		count += tmp;
341 	}
342 }
343 
344 static void cedrus_set_params(struct cedrus_ctx *ctx,
345 			      struct cedrus_run *run)
346 {
347 	const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
348 	const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
349 	const struct v4l2_ctrl_h264_pps *pps = run->h264.pps;
350 	const struct v4l2_ctrl_h264_sps *sps = run->h264.sps;
351 	struct vb2_buffer *src_buf = &run->src->vb2_buf;
352 	struct cedrus_dev *dev = ctx->dev;
353 	dma_addr_t src_buf_addr;
354 	size_t slice_bytes = vb2_get_plane_payload(src_buf, 0);
355 	unsigned int pic_width_in_mbs;
356 	bool mbaff_pic;
357 	u32 reg;
358 
359 	cedrus_write(dev, VE_H264_VLD_LEN, slice_bytes * 8);
360 	cedrus_write(dev, VE_H264_VLD_OFFSET, 0);
361 
362 	src_buf_addr = vb2_dma_contig_plane_dma_addr(src_buf, 0);
363 	cedrus_write(dev, VE_H264_VLD_END, src_buf_addr + slice_bytes);
364 	cedrus_write(dev, VE_H264_VLD_ADDR,
365 		     VE_H264_VLD_ADDR_VAL(src_buf_addr) |
366 		     VE_H264_VLD_ADDR_FIRST | VE_H264_VLD_ADDR_VALID |
367 		     VE_H264_VLD_ADDR_LAST);
368 
369 	if (ctx->src_fmt.width > 2048) {
370 		cedrus_write(dev, VE_BUF_CTRL,
371 			     VE_BUF_CTRL_INTRAPRED_MIXED_RAM |
372 			     VE_BUF_CTRL_DBLK_MIXED_RAM);
373 		cedrus_write(dev, VE_DBLK_DRAM_BUF_ADDR,
374 			     ctx->codec.h264.deblk_buf_dma);
375 		cedrus_write(dev, VE_INTRAPRED_DRAM_BUF_ADDR,
376 			     ctx->codec.h264.intra_pred_buf_dma);
377 	} else {
378 		cedrus_write(dev, VE_BUF_CTRL,
379 			     VE_BUF_CTRL_INTRAPRED_INT_SRAM |
380 			     VE_BUF_CTRL_DBLK_INT_SRAM);
381 	}
382 
383 	/*
384 	 * FIXME: Since the bitstream parsing is done in software, and
385 	 * in userspace, this shouldn't be needed anymore. But it
386 	 * turns out that removing it breaks the decoding process,
387 	 * without any clear indication why.
388 	 */
389 	cedrus_write(dev, VE_H264_TRIGGER_TYPE,
390 		     VE_H264_TRIGGER_TYPE_INIT_SWDEC);
391 
392 	cedrus_skip_bits(dev, slice->header_bit_size);
393 
394 	if (V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED(pps, slice))
395 		cedrus_write_pred_weight_table(ctx, run);
396 
397 	if ((slice->slice_type == V4L2_H264_SLICE_TYPE_P) ||
398 	    (slice->slice_type == V4L2_H264_SLICE_TYPE_SP) ||
399 	    (slice->slice_type == V4L2_H264_SLICE_TYPE_B))
400 		cedrus_write_ref_list0(ctx, run);
401 
402 	if (slice->slice_type == V4L2_H264_SLICE_TYPE_B)
403 		cedrus_write_ref_list1(ctx, run);
404 
405 	// picture parameters
406 	reg = 0;
407 	/*
408 	 * FIXME: the kernel headers are allowing the default value to
409 	 * be passed, but the libva doesn't give us that.
410 	 */
411 	reg |= (slice->num_ref_idx_l0_active_minus1 & 0x1f) << 10;
412 	reg |= (slice->num_ref_idx_l1_active_minus1 & 0x1f) << 5;
413 	reg |= (pps->weighted_bipred_idc & 0x3) << 2;
414 	if (pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE)
415 		reg |= VE_H264_PPS_ENTROPY_CODING_MODE;
416 	if (pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED)
417 		reg |= VE_H264_PPS_WEIGHTED_PRED;
418 	if (pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED)
419 		reg |= VE_H264_PPS_CONSTRAINED_INTRA_PRED;
420 	if (pps->flags & V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE)
421 		reg |= VE_H264_PPS_TRANSFORM_8X8_MODE;
422 	cedrus_write(dev, VE_H264_PPS, reg);
423 
424 	// sequence parameters
425 	reg = 0;
426 	reg |= (sps->chroma_format_idc & 0x7) << 19;
427 	reg |= (sps->pic_width_in_mbs_minus1 & 0xff) << 8;
428 	reg |= sps->pic_height_in_map_units_minus1 & 0xff;
429 	if (sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY)
430 		reg |= VE_H264_SPS_MBS_ONLY;
431 	if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)
432 		reg |= VE_H264_SPS_MB_ADAPTIVE_FRAME_FIELD;
433 	if (sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE)
434 		reg |= VE_H264_SPS_DIRECT_8X8_INFERENCE;
435 	cedrus_write(dev, VE_H264_SPS, reg);
436 
437 	mbaff_pic = !(decode->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) &&
438 		    (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD);
439 	pic_width_in_mbs = sps->pic_width_in_mbs_minus1 + 1;
440 
441 	// slice parameters
442 	reg = 0;
443 	reg |= ((slice->first_mb_in_slice % pic_width_in_mbs) & 0xff) << 24;
444 	reg |= (((slice->first_mb_in_slice / pic_width_in_mbs) *
445 		 (mbaff_pic + 1)) & 0xff) << 16;
446 	reg |= decode->nal_ref_idc ? BIT(12) : 0;
447 	reg |= (slice->slice_type & 0xf) << 8;
448 	reg |= slice->cabac_init_idc & 0x3;
449 	if (ctx->fh.m2m_ctx->new_frame)
450 		reg |= VE_H264_SHS_FIRST_SLICE_IN_PIC;
451 	if (decode->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC)
452 		reg |= VE_H264_SHS_FIELD_PIC;
453 	if (decode->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD)
454 		reg |= VE_H264_SHS_BOTTOM_FIELD;
455 	if (slice->flags & V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED)
456 		reg |= VE_H264_SHS_DIRECT_SPATIAL_MV_PRED;
457 	cedrus_write(dev, VE_H264_SHS, reg);
458 
459 	reg = 0;
460 	reg |= VE_H264_SHS2_NUM_REF_IDX_ACTIVE_OVRD;
461 	reg |= (slice->num_ref_idx_l0_active_minus1 & 0x1f) << 24;
462 	reg |= (slice->num_ref_idx_l1_active_minus1 & 0x1f) << 16;
463 	reg |= (slice->disable_deblocking_filter_idc & 0x3) << 8;
464 	reg |= (slice->slice_alpha_c0_offset_div2 & 0xf) << 4;
465 	reg |= slice->slice_beta_offset_div2 & 0xf;
466 	cedrus_write(dev, VE_H264_SHS2, reg);
467 
468 	reg = 0;
469 	reg |= (pps->second_chroma_qp_index_offset & 0x3f) << 16;
470 	reg |= (pps->chroma_qp_index_offset & 0x3f) << 8;
471 	reg |= (pps->pic_init_qp_minus26 + 26 + slice->slice_qp_delta) & 0x3f;
472 	if (!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT))
473 		reg |= VE_H264_SHS_QP_SCALING_MATRIX_DEFAULT;
474 	cedrus_write(dev, VE_H264_SHS_QP, reg);
475 
476 	// clear status flags
477 	cedrus_write(dev, VE_H264_STATUS, cedrus_read(dev, VE_H264_STATUS));
478 
479 	// enable int
480 	cedrus_write(dev, VE_H264_CTRL,
481 		     VE_H264_CTRL_SLICE_DECODE_INT |
482 		     VE_H264_CTRL_DECODE_ERR_INT |
483 		     VE_H264_CTRL_VLD_DATA_REQ_INT);
484 }
485 
486 static enum cedrus_irq_status
487 cedrus_h264_irq_status(struct cedrus_ctx *ctx)
488 {
489 	struct cedrus_dev *dev = ctx->dev;
490 	u32 reg = cedrus_read(dev, VE_H264_STATUS);
491 
492 	if (reg & (VE_H264_STATUS_DECODE_ERR_INT |
493 		   VE_H264_STATUS_VLD_DATA_REQ_INT))
494 		return CEDRUS_IRQ_ERROR;
495 
496 	if (reg & VE_H264_CTRL_SLICE_DECODE_INT)
497 		return CEDRUS_IRQ_OK;
498 
499 	return CEDRUS_IRQ_NONE;
500 }
501 
502 static void cedrus_h264_irq_clear(struct cedrus_ctx *ctx)
503 {
504 	struct cedrus_dev *dev = ctx->dev;
505 
506 	cedrus_write(dev, VE_H264_STATUS,
507 		     VE_H264_STATUS_INT_MASK);
508 }
509 
510 static void cedrus_h264_irq_disable(struct cedrus_ctx *ctx)
511 {
512 	struct cedrus_dev *dev = ctx->dev;
513 	u32 reg = cedrus_read(dev, VE_H264_CTRL);
514 
515 	cedrus_write(dev, VE_H264_CTRL,
516 		     reg & ~VE_H264_CTRL_INT_MASK);
517 }
518 
519 static int cedrus_h264_setup(struct cedrus_ctx *ctx, struct cedrus_run *run)
520 {
521 	struct cedrus_dev *dev = ctx->dev;
522 	int ret;
523 
524 	cedrus_engine_enable(ctx);
525 
526 	cedrus_write(dev, VE_H264_SDROT_CTRL, 0);
527 	cedrus_write(dev, VE_H264_EXTRA_BUFFER1,
528 		     ctx->codec.h264.pic_info_buf_dma);
529 	cedrus_write(dev, VE_H264_EXTRA_BUFFER2,
530 		     ctx->codec.h264.neighbor_info_buf_dma);
531 
532 	cedrus_write_scaling_lists(ctx, run);
533 	ret = cedrus_write_frame_list(ctx, run);
534 	if (ret)
535 		return ret;
536 
537 	cedrus_set_params(ctx, run);
538 
539 	return 0;
540 }
541 
542 static int cedrus_h264_start(struct cedrus_ctx *ctx)
543 {
544 	struct cedrus_dev *dev = ctx->dev;
545 	unsigned int pic_info_size;
546 	int ret;
547 
548 	/*
549 	 * NOTE: All buffers allocated here are only used by HW, so we
550 	 * can add DMA_ATTR_NO_KERNEL_MAPPING flag when allocating them.
551 	 */
552 
553 	/* Formula for picture buffer size is taken from CedarX source. */
554 
555 	if (ctx->src_fmt.width > 2048)
556 		pic_info_size = CEDRUS_H264_FRAME_NUM * 0x4000;
557 	else
558 		pic_info_size = CEDRUS_H264_FRAME_NUM * 0x1000;
559 
560 	/*
561 	 * FIXME: If V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY is set,
562 	 * there is no need to multiply by 2.
563 	 */
564 	pic_info_size += ctx->src_fmt.height * 2 * 64;
565 
566 	if (pic_info_size < CEDRUS_MIN_PIC_INFO_BUF_SIZE)
567 		pic_info_size = CEDRUS_MIN_PIC_INFO_BUF_SIZE;
568 
569 	ctx->codec.h264.pic_info_buf_size = pic_info_size;
570 	ctx->codec.h264.pic_info_buf =
571 		dma_alloc_attrs(dev->dev, ctx->codec.h264.pic_info_buf_size,
572 				&ctx->codec.h264.pic_info_buf_dma,
573 				GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING);
574 	if (!ctx->codec.h264.pic_info_buf)
575 		return -ENOMEM;
576 
577 	/*
578 	 * That buffer is supposed to be 16kiB in size, and be aligned
579 	 * on 16kiB as well. However, dma_alloc_attrs provides the
580 	 * guarantee that we'll have a DMA address aligned on the
581 	 * smallest page order that is greater to the requested size,
582 	 * so we don't have to overallocate.
583 	 */
584 	ctx->codec.h264.neighbor_info_buf =
585 		dma_alloc_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
586 				&ctx->codec.h264.neighbor_info_buf_dma,
587 				GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING);
588 	if (!ctx->codec.h264.neighbor_info_buf) {
589 		ret = -ENOMEM;
590 		goto err_pic_buf;
591 	}
592 
593 	if (ctx->src_fmt.width > 2048) {
594 		/*
595 		 * Formulas for deblock and intra prediction buffer sizes
596 		 * are taken from CedarX source.
597 		 */
598 
599 		ctx->codec.h264.deblk_buf_size =
600 			ALIGN(ctx->src_fmt.width, 32) * 12;
601 		ctx->codec.h264.deblk_buf =
602 			dma_alloc_attrs(dev->dev,
603 					ctx->codec.h264.deblk_buf_size,
604 					&ctx->codec.h264.deblk_buf_dma,
605 					GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING);
606 		if (!ctx->codec.h264.deblk_buf) {
607 			ret = -ENOMEM;
608 			goto err_neighbor_buf;
609 		}
610 
611 		/*
612 		 * NOTE: Multiplying by two deviates from CedarX logic, but it
613 		 * is for some unknown reason needed for H264 4K decoding on H6.
614 		 */
615 		ctx->codec.h264.intra_pred_buf_size =
616 			ALIGN(ctx->src_fmt.width, 64) * 5 * 2;
617 		ctx->codec.h264.intra_pred_buf =
618 			dma_alloc_attrs(dev->dev,
619 					ctx->codec.h264.intra_pred_buf_size,
620 					&ctx->codec.h264.intra_pred_buf_dma,
621 					GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING);
622 		if (!ctx->codec.h264.intra_pred_buf) {
623 			ret = -ENOMEM;
624 			goto err_deblk_buf;
625 		}
626 	}
627 
628 	return 0;
629 
630 err_deblk_buf:
631 	dma_free_attrs(dev->dev, ctx->codec.h264.deblk_buf_size,
632 		       ctx->codec.h264.deblk_buf,
633 		       ctx->codec.h264.deblk_buf_dma,
634 		       DMA_ATTR_NO_KERNEL_MAPPING);
635 
636 err_neighbor_buf:
637 	dma_free_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
638 		       ctx->codec.h264.neighbor_info_buf,
639 		       ctx->codec.h264.neighbor_info_buf_dma,
640 		       DMA_ATTR_NO_KERNEL_MAPPING);
641 
642 err_pic_buf:
643 	dma_free_attrs(dev->dev, ctx->codec.h264.pic_info_buf_size,
644 		       ctx->codec.h264.pic_info_buf,
645 		       ctx->codec.h264.pic_info_buf_dma,
646 		       DMA_ATTR_NO_KERNEL_MAPPING);
647 	return ret;
648 }
649 
650 static void cedrus_h264_stop(struct cedrus_ctx *ctx)
651 {
652 	struct cedrus_dev *dev = ctx->dev;
653 	struct cedrus_buffer *buf;
654 	struct vb2_queue *vq;
655 	unsigned int i;
656 
657 	vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
658 
659 	for (i = 0; i < vb2_get_num_buffers(vq); i++) {
660 		struct vb2_buffer *vb = vb2_get_buffer(vq, i);
661 
662 		if (!vb)
663 			continue;
664 
665 		buf = vb2_to_cedrus_buffer(vb);
666 
667 		if (buf->codec.h264.mv_col_buf_size > 0) {
668 			dma_free_attrs(dev->dev,
669 				       buf->codec.h264.mv_col_buf_size,
670 				       buf->codec.h264.mv_col_buf,
671 				       buf->codec.h264.mv_col_buf_dma,
672 				       DMA_ATTR_NO_KERNEL_MAPPING);
673 
674 			buf->codec.h264.mv_col_buf_size = 0;
675 		}
676 	}
677 
678 	dma_free_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
679 		       ctx->codec.h264.neighbor_info_buf,
680 		       ctx->codec.h264.neighbor_info_buf_dma,
681 		       DMA_ATTR_NO_KERNEL_MAPPING);
682 	dma_free_attrs(dev->dev, ctx->codec.h264.pic_info_buf_size,
683 		       ctx->codec.h264.pic_info_buf,
684 		       ctx->codec.h264.pic_info_buf_dma,
685 		       DMA_ATTR_NO_KERNEL_MAPPING);
686 	if (ctx->codec.h264.deblk_buf_size)
687 		dma_free_attrs(dev->dev, ctx->codec.h264.deblk_buf_size,
688 			       ctx->codec.h264.deblk_buf,
689 			       ctx->codec.h264.deblk_buf_dma,
690 			       DMA_ATTR_NO_KERNEL_MAPPING);
691 	if (ctx->codec.h264.intra_pred_buf_size)
692 		dma_free_attrs(dev->dev, ctx->codec.h264.intra_pred_buf_size,
693 			       ctx->codec.h264.intra_pred_buf,
694 			       ctx->codec.h264.intra_pred_buf_dma,
695 			       DMA_ATTR_NO_KERNEL_MAPPING);
696 }
697 
698 static void cedrus_h264_trigger(struct cedrus_ctx *ctx)
699 {
700 	struct cedrus_dev *dev = ctx->dev;
701 
702 	cedrus_write(dev, VE_H264_TRIGGER_TYPE,
703 		     VE_H264_TRIGGER_TYPE_AVC_SLICE_DECODE);
704 }
705 
706 struct cedrus_dec_ops cedrus_dec_ops_h264 = {
707 	.irq_clear	= cedrus_h264_irq_clear,
708 	.irq_disable	= cedrus_h264_irq_disable,
709 	.irq_status	= cedrus_h264_irq_status,
710 	.setup		= cedrus_h264_setup,
711 	.start		= cedrus_h264_start,
712 	.stop		= cedrus_h264_stop,
713 	.trigger	= cedrus_h264_trigger,
714 };
715