xref: /linux/drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-h264.c (revision bf4afc53b77aeaa48b5409da5c8da6bb4eff7f43)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Rockchip Video Decoder VDPU383 H264 backend
4  *
5  * Copyright (C) 2024 Collabora, Ltd.
6  *  Detlev Casanova <detlev.casanova@collabora.com>
7  */
8 
9 #include <media/v4l2-h264.h>
10 #include <media/v4l2-mem2mem.h>
11 
12 #include <linux/iopoll.h>
13 
14 #include "rkvdec-rcb.h"
15 #include "rkvdec-cabac.h"
16 #include "rkvdec-vdpu383-regs.h"
17 #include "rkvdec-h264-common.h"
18 
19 struct rkvdec_sps {
20 	u16 seq_parameter_set_id:			4;
21 	u16 profile_idc:				8;
22 	u16 constraint_set3_flag:			1;
23 	u16 chroma_format_idc:				2;
24 	u16 bit_depth_luma:				3;
25 	u16 bit_depth_chroma:				3;
26 	u16 qpprime_y_zero_transform_bypass_flag:	1;
27 	u16 log2_max_frame_num_minus4:			4;
28 	u16 max_num_ref_frames:				5;
29 	u16 pic_order_cnt_type:				2;
30 	u16 log2_max_pic_order_cnt_lsb_minus4:		4;
31 	u16 delta_pic_order_always_zero_flag:		1;
32 
33 	u16 pic_width_in_mbs:				16;
34 	u16 pic_height_in_mbs:				16;
35 
36 	u16 frame_mbs_only_flag:			1;
37 	u16 mb_adaptive_frame_field_flag:		1;
38 	u16 direct_8x8_inference_flag:			1;
39 	u16 mvc_extension_enable:			1;
40 	u16 num_views:					2;
41 	u16 view_id0:                                   10;
42 	u16 view_id1:                                   10;
43 } __packed;
44 
45 struct rkvdec_pps {
46 	u32 pic_parameter_set_id:				8;
47 	u32 pps_seq_parameter_set_id:				5;
48 	u32 entropy_coding_mode_flag:				1;
49 	u32 bottom_field_pic_order_in_frame_present_flag:	1;
50 	u32 num_ref_idx_l0_default_active_minus1:		5;
51 	u32 num_ref_idx_l1_default_active_minus1:		5;
52 	u32 weighted_pred_flag:					1;
53 	u32 weighted_bipred_idc:				2;
54 	u32 pic_init_qp_minus26:				7;
55 	u32 pic_init_qs_minus26:				6;
56 	u32 chroma_qp_index_offset:				5;
57 	u32 deblocking_filter_control_present_flag:		1;
58 	u32 constrained_intra_pred_flag:			1;
59 	u32 redundant_pic_cnt_present:				1;
60 	u32 transform_8x8_mode_flag:				1;
61 	u32 second_chroma_qp_index_offset:			5;
62 	u32 scaling_list_enable_flag:				1;
63 	u32 is_longterm:					16;
64 	u32 voidx:						16;
65 
66 	// dpb
67 	u32 pic_field_flag:                                     1;
68 	u32 pic_associated_flag:                                1;
69 	u32 cur_top_field:					32;
70 	u32 cur_bot_field:					32;
71 
72 	u32 top_field_order_cnt0:				32;
73 	u32 bot_field_order_cnt0:				32;
74 	u32 top_field_order_cnt1:				32;
75 	u32 bot_field_order_cnt1:				32;
76 	u32 top_field_order_cnt2:				32;
77 	u32 bot_field_order_cnt2:				32;
78 	u32 top_field_order_cnt3:				32;
79 	u32 bot_field_order_cnt3:				32;
80 	u32 top_field_order_cnt4:				32;
81 	u32 bot_field_order_cnt4:				32;
82 	u32 top_field_order_cnt5:				32;
83 	u32 bot_field_order_cnt5:				32;
84 	u32 top_field_order_cnt6:				32;
85 	u32 bot_field_order_cnt6:				32;
86 	u32 top_field_order_cnt7:				32;
87 	u32 bot_field_order_cnt7:				32;
88 	u32 top_field_order_cnt8:				32;
89 	u32 bot_field_order_cnt8:				32;
90 	u32 top_field_order_cnt9:				32;
91 	u32 bot_field_order_cnt9:				32;
92 	u32 top_field_order_cnt10:				32;
93 	u32 bot_field_order_cnt10:				32;
94 	u32 top_field_order_cnt11:				32;
95 	u32 bot_field_order_cnt11:				32;
96 	u32 top_field_order_cnt12:				32;
97 	u32 bot_field_order_cnt12:				32;
98 	u32 top_field_order_cnt13:				32;
99 	u32 bot_field_order_cnt13:				32;
100 	u32 top_field_order_cnt14:				32;
101 	u32 bot_field_order_cnt14:				32;
102 	u32 top_field_order_cnt15:				32;
103 	u32 bot_field_order_cnt15:				32;
104 
105 	u32 ref_field_flags:					16;
106 	u32 ref_topfield_used:					16;
107 	u32 ref_botfield_used:					16;
108 	u32 ref_colmv_use_flag:					16;
109 
110 	u32 reserved0:						30;
111 	u32 reserved[3];
112 } __packed;
113 
114 struct rkvdec_sps_pps {
115 	struct rkvdec_sps sps;
116 	struct rkvdec_pps pps;
117 } __packed;
118 
119 /* Data structure describing auxiliary buffer format. */
120 struct rkvdec_h264_priv_tbl {
121 	s8 cabac_table[4][464][2];
122 	struct rkvdec_h264_scaling_list scaling_list;
123 	struct rkvdec_sps_pps param_set[256];
124 	struct rkvdec_rps rps;
125 } __packed;
126 
127 struct rkvdec_h264_ctx {
128 	struct rkvdec_aux_buf priv_tbl;
129 	struct rkvdec_h264_reflists reflists;
130 	struct vdpu383_regs_h26x regs;
131 };
132 
set_field_order_cnt(struct rkvdec_pps * pps,const struct v4l2_h264_dpb_entry * dpb)133 static void set_field_order_cnt(struct rkvdec_pps *pps, const struct v4l2_h264_dpb_entry *dpb)
134 {
135 	pps->top_field_order_cnt0 = dpb[0].top_field_order_cnt;
136 	pps->bot_field_order_cnt0 = dpb[0].bottom_field_order_cnt;
137 	pps->top_field_order_cnt1 = dpb[1].top_field_order_cnt;
138 	pps->bot_field_order_cnt1 = dpb[1].bottom_field_order_cnt;
139 	pps->top_field_order_cnt2 = dpb[2].top_field_order_cnt;
140 	pps->bot_field_order_cnt2 = dpb[2].bottom_field_order_cnt;
141 	pps->top_field_order_cnt3 = dpb[3].top_field_order_cnt;
142 	pps->bot_field_order_cnt3 = dpb[3].bottom_field_order_cnt;
143 	pps->top_field_order_cnt4 = dpb[4].top_field_order_cnt;
144 	pps->bot_field_order_cnt4 = dpb[4].bottom_field_order_cnt;
145 	pps->top_field_order_cnt5 = dpb[5].top_field_order_cnt;
146 	pps->bot_field_order_cnt5 = dpb[5].bottom_field_order_cnt;
147 	pps->top_field_order_cnt6 = dpb[6].top_field_order_cnt;
148 	pps->bot_field_order_cnt6 = dpb[6].bottom_field_order_cnt;
149 	pps->top_field_order_cnt7 = dpb[7].top_field_order_cnt;
150 	pps->bot_field_order_cnt7 = dpb[7].bottom_field_order_cnt;
151 	pps->top_field_order_cnt8 = dpb[8].top_field_order_cnt;
152 	pps->bot_field_order_cnt8 = dpb[8].bottom_field_order_cnt;
153 	pps->top_field_order_cnt9 = dpb[9].top_field_order_cnt;
154 	pps->bot_field_order_cnt9 = dpb[9].bottom_field_order_cnt;
155 	pps->top_field_order_cnt10 = dpb[10].top_field_order_cnt;
156 	pps->bot_field_order_cnt10 = dpb[10].bottom_field_order_cnt;
157 	pps->top_field_order_cnt11 = dpb[11].top_field_order_cnt;
158 	pps->bot_field_order_cnt11 = dpb[11].bottom_field_order_cnt;
159 	pps->top_field_order_cnt12 = dpb[12].top_field_order_cnt;
160 	pps->bot_field_order_cnt12 = dpb[12].bottom_field_order_cnt;
161 	pps->top_field_order_cnt13 = dpb[13].top_field_order_cnt;
162 	pps->bot_field_order_cnt13 = dpb[13].bottom_field_order_cnt;
163 	pps->top_field_order_cnt14 = dpb[14].top_field_order_cnt;
164 	pps->bot_field_order_cnt14 = dpb[14].bottom_field_order_cnt;
165 	pps->top_field_order_cnt15 = dpb[15].top_field_order_cnt;
166 	pps->bot_field_order_cnt15 = dpb[15].bottom_field_order_cnt;
167 }
168 
assemble_hw_pps(struct rkvdec_ctx * ctx,struct rkvdec_h264_run * run)169 static void assemble_hw_pps(struct rkvdec_ctx *ctx,
170 			    struct rkvdec_h264_run *run)
171 {
172 	struct rkvdec_h264_ctx *h264_ctx = ctx->priv;
173 	const struct v4l2_ctrl_h264_sps *sps = run->sps;
174 	const struct v4l2_ctrl_h264_pps *pps = run->pps;
175 	const struct v4l2_ctrl_h264_decode_params *dec_params = run->decode_params;
176 	const struct v4l2_h264_dpb_entry *dpb = dec_params->dpb;
177 	struct rkvdec_h264_priv_tbl *priv_tbl = h264_ctx->priv_tbl.cpu;
178 	struct rkvdec_sps_pps *hw_ps;
179 	u32 pic_width, pic_height;
180 	u32 i;
181 
182 	/*
183 	 * HW read the SPS/PPS information from PPS packet index by PPS id.
184 	 * offset from the base can be calculated by PPS_id * 32 (size per PPS
185 	 * packet unit). so the driver copy SPS/PPS information to the exact PPS
186 	 * packet unit for HW accessing.
187 	 */
188 	hw_ps = &priv_tbl->param_set[pps->pic_parameter_set_id];
189 	memset(hw_ps, 0, sizeof(*hw_ps));
190 
191 	/* write sps */
192 	hw_ps->sps.seq_parameter_set_id = sps->seq_parameter_set_id;
193 	hw_ps->sps.profile_idc = sps->profile_idc;
194 	hw_ps->sps.constraint_set3_flag = !!(sps->constraint_set_flags & (1 << 3));
195 	hw_ps->sps.chroma_format_idc = sps->chroma_format_idc;
196 	hw_ps->sps.bit_depth_luma = sps->bit_depth_luma_minus8;
197 	hw_ps->sps.bit_depth_chroma = sps->bit_depth_chroma_minus8;
198 	hw_ps->sps.qpprime_y_zero_transform_bypass_flag =
199 		!!(sps->flags & V4L2_H264_SPS_FLAG_QPPRIME_Y_ZERO_TRANSFORM_BYPASS);
200 	hw_ps->sps.log2_max_frame_num_minus4 = sps->log2_max_frame_num_minus4;
201 	hw_ps->sps.max_num_ref_frames = sps->max_num_ref_frames;
202 	hw_ps->sps.pic_order_cnt_type = sps->pic_order_cnt_type;
203 	hw_ps->sps.log2_max_pic_order_cnt_lsb_minus4 =
204 		sps->log2_max_pic_order_cnt_lsb_minus4;
205 	hw_ps->sps.delta_pic_order_always_zero_flag =
206 		!!(sps->flags & V4L2_H264_SPS_FLAG_DELTA_PIC_ORDER_ALWAYS_ZERO);
207 	hw_ps->sps.mvc_extension_enable = 0;
208 	hw_ps->sps.num_views = 0;
209 
210 	/*
211 	 * Use the SPS values since they are already in macroblocks
212 	 * dimensions, height can be field height (halved) if
213 	 * V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY is not set and also it allows
214 	 * decoding smaller images into larger allocation which can be used
215 	 * to implementing SVC spatial layer support.
216 	 */
217 	pic_width = 16 * (sps->pic_width_in_mbs_minus1 + 1);
218 	pic_height = 16 * (sps->pic_height_in_map_units_minus1 + 1);
219 	if (!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY))
220 		pic_height *= 2;
221 	if (!!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC))
222 		pic_height /= 2;
223 
224 	hw_ps->sps.pic_width_in_mbs = pic_width;
225 	hw_ps->sps.pic_height_in_mbs = pic_height;
226 
227 	hw_ps->sps.frame_mbs_only_flag =
228 		!!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY);
229 	hw_ps->sps.mb_adaptive_frame_field_flag =
230 		!!(sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD);
231 	hw_ps->sps.direct_8x8_inference_flag =
232 		!!(sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE);
233 
234 	/* write pps */
235 	hw_ps->pps.pic_parameter_set_id = pps->pic_parameter_set_id;
236 	hw_ps->pps.pps_seq_parameter_set_id = pps->seq_parameter_set_id;
237 	hw_ps->pps.entropy_coding_mode_flag =
238 		!!(pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE);
239 	hw_ps->pps.bottom_field_pic_order_in_frame_present_flag =
240 		!!(pps->flags & V4L2_H264_PPS_FLAG_BOTTOM_FIELD_PIC_ORDER_IN_FRAME_PRESENT);
241 	hw_ps->pps.num_ref_idx_l0_default_active_minus1 =
242 		pps->num_ref_idx_l0_default_active_minus1;
243 	hw_ps->pps.num_ref_idx_l1_default_active_minus1 =
244 		pps->num_ref_idx_l1_default_active_minus1;
245 	hw_ps->pps.weighted_pred_flag =
246 		!!(pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED);
247 	hw_ps->pps.weighted_bipred_idc = pps->weighted_bipred_idc;
248 	hw_ps->pps.pic_init_qp_minus26 = pps->pic_init_qp_minus26;
249 	hw_ps->pps.pic_init_qs_minus26 = pps->pic_init_qs_minus26;
250 	hw_ps->pps.chroma_qp_index_offset = pps->chroma_qp_index_offset;
251 	hw_ps->pps.deblocking_filter_control_present_flag =
252 		!!(pps->flags & V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT);
253 	hw_ps->pps.constrained_intra_pred_flag =
254 		!!(pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED);
255 	hw_ps->pps.redundant_pic_cnt_present =
256 		!!(pps->flags & V4L2_H264_PPS_FLAG_REDUNDANT_PIC_CNT_PRESENT);
257 	hw_ps->pps.transform_8x8_mode_flag =
258 		!!(pps->flags & V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE);
259 	hw_ps->pps.second_chroma_qp_index_offset = pps->second_chroma_qp_index_offset;
260 	hw_ps->pps.scaling_list_enable_flag =
261 		!!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT);
262 
263 	set_field_order_cnt(&hw_ps->pps, dpb);
264 
265 	for (i = 0; i < ARRAY_SIZE(dec_params->dpb); i++) {
266 		if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)
267 			hw_ps->pps.is_longterm |= (1 << i);
268 
269 		hw_ps->pps.ref_field_flags |=
270 			(!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_FIELD)) << i;
271 		hw_ps->pps.ref_colmv_use_flag |=
272 			(!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) << i;
273 		hw_ps->pps.ref_topfield_used |=
274 			(!!(dpb[i].fields & V4L2_H264_TOP_FIELD_REF)) << i;
275 		hw_ps->pps.ref_botfield_used |=
276 			(!!(dpb[i].fields & V4L2_H264_BOTTOM_FIELD_REF)) << i;
277 	}
278 
279 	hw_ps->pps.pic_field_flag =
280 		!!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC);
281 	hw_ps->pps.pic_associated_flag =
282 		!!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD);
283 
284 	hw_ps->pps.cur_top_field = dec_params->top_field_order_cnt;
285 	hw_ps->pps.cur_bot_field = dec_params->bottom_field_order_cnt;
286 }
287 
rkvdec_write_regs(struct rkvdec_ctx * ctx)288 static void rkvdec_write_regs(struct rkvdec_ctx *ctx)
289 {
290 	struct rkvdec_dev *rkvdec = ctx->dev;
291 	struct rkvdec_h264_ctx *h264_ctx = ctx->priv;
292 
293 	rkvdec_memcpy_toio(rkvdec->regs + VDPU383_OFFSET_COMMON_REGS,
294 			   &h264_ctx->regs.common,
295 			   sizeof(h264_ctx->regs.common));
296 	rkvdec_memcpy_toio(rkvdec->regs + VDPU383_OFFSET_COMMON_ADDR_REGS,
297 			   &h264_ctx->regs.common_addr,
298 			   sizeof(h264_ctx->regs.common_addr));
299 	rkvdec_memcpy_toio(rkvdec->regs + VDPU383_OFFSET_CODEC_PARAMS_REGS,
300 			   &h264_ctx->regs.h26x_params,
301 			   sizeof(h264_ctx->regs.h26x_params));
302 	rkvdec_memcpy_toio(rkvdec->regs + VDPU383_OFFSET_CODEC_ADDR_REGS,
303 			   &h264_ctx->regs.h26x_addr,
304 			   sizeof(h264_ctx->regs.h26x_addr));
305 }
306 
config_registers(struct rkvdec_ctx * ctx,struct rkvdec_h264_run * run)307 static void config_registers(struct rkvdec_ctx *ctx,
308 			     struct rkvdec_h264_run *run)
309 {
310 	const struct v4l2_ctrl_h264_decode_params *dec_params = run->decode_params;
311 	struct rkvdec_h264_ctx *h264_ctx = ctx->priv;
312 	dma_addr_t priv_start_addr = h264_ctx->priv_tbl.dma;
313 	const struct v4l2_pix_format_mplane *dst_fmt;
314 	struct vb2_v4l2_buffer *src_buf = run->base.bufs.src;
315 	struct vb2_v4l2_buffer *dst_buf = run->base.bufs.dst;
316 	struct vdpu383_regs_h26x *regs = &h264_ctx->regs;
317 	const struct v4l2_format *f;
318 	dma_addr_t rlc_addr;
319 	dma_addr_t dst_addr;
320 	u32 hor_virstride;
321 	u32 ver_virstride;
322 	u32 y_virstride;
323 	u32 offset;
324 	u32 pixels;
325 	u32 i;
326 
327 	memset(regs, 0, sizeof(*regs));
328 
329 	/* Set H264 mode */
330 	regs->common.reg008_dec_mode = VDPU383_MODE_H264;
331 
332 	/* Set input stream length */
333 	regs->h26x_params.reg066_stream_len = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
334 
335 	/* Set strides */
336 	f = &ctx->decoded_fmt;
337 	dst_fmt = &f->fmt.pix_mp;
338 	hor_virstride = dst_fmt->plane_fmt[0].bytesperline;
339 	ver_virstride = dst_fmt->height;
340 	y_virstride = hor_virstride * ver_virstride;
341 
342 	pixels = dst_fmt->height * dst_fmt->width;
343 
344 	regs->h26x_params.reg068_hor_virstride = hor_virstride / 16;
345 	regs->h26x_params.reg069_raster_uv_hor_virstride = hor_virstride / 16;
346 	regs->h26x_params.reg070_y_virstride = y_virstride / 16;
347 
348 	/* Activate block gating */
349 	regs->common.reg010_block_gating_en.strmd_auto_gating_e      = 1;
350 	regs->common.reg010_block_gating_en.inter_auto_gating_e      = 1;
351 	regs->common.reg010_block_gating_en.intra_auto_gating_e      = 1;
352 	regs->common.reg010_block_gating_en.transd_auto_gating_e     = 1;
353 	regs->common.reg010_block_gating_en.recon_auto_gating_e      = 1;
354 	regs->common.reg010_block_gating_en.filterd_auto_gating_e    = 1;
355 	regs->common.reg010_block_gating_en.bus_auto_gating_e        = 1;
356 	regs->common.reg010_block_gating_en.ctrl_auto_gating_e       = 1;
357 	regs->common.reg010_block_gating_en.rcb_auto_gating_e        = 1;
358 	regs->common.reg010_block_gating_en.err_prc_auto_gating_e    = 1;
359 
360 	/* Set timeout threshold */
361 	if (pixels < RKVDEC_1080P_PIXELS)
362 		regs->common.reg013_core_timeout_threshold = VDPU383_TIMEOUT_1080p;
363 	else if (pixels < RKVDEC_4K_PIXELS)
364 		regs->common.reg013_core_timeout_threshold = VDPU383_TIMEOUT_4K;
365 	else if (pixels < RKVDEC_8K_PIXELS)
366 		regs->common.reg013_core_timeout_threshold = VDPU383_TIMEOUT_8K;
367 	else
368 		regs->common.reg013_core_timeout_threshold = VDPU383_TIMEOUT_MAX;
369 
370 	regs->common.reg016_error_ctrl_set.error_proc_disable = 1;
371 
372 	/* Set ref pic address & poc */
373 	for (i = 0; i < ARRAY_SIZE(dec_params->dpb); i++) {
374 		struct vb2_buffer *vb_buf = run->ref_buf[i];
375 		dma_addr_t buf_dma;
376 
377 		/*
378 		 * If a DPB entry is unused or invalid, address of current destination
379 		 * buffer is returned.
380 		 */
381 		if (!vb_buf)
382 			vb_buf = &dst_buf->vb2_buf;
383 
384 		buf_dma = vb2_dma_contig_plane_dma_addr(vb_buf, 0);
385 
386 		/* Set reference addresses */
387 		regs->h26x_addr.reg170_185_ref_base[i] = buf_dma;
388 		regs->h26x_addr.reg195_210_payload_st_ref_base[i] = buf_dma;
389 
390 		/* Set COLMV addresses */
391 		regs->h26x_addr.reg217_232_colmv_ref_base[i] = buf_dma + ctx->colmv_offset;
392 	}
393 
394 	/* Set rlc base address (input stream) */
395 	rlc_addr = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0);
396 	regs->common_addr.reg128_strm_base = rlc_addr;
397 
398 	/* Set output base address */
399 	dst_addr = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
400 	regs->h26x_addr.reg168_decout_base = dst_addr;
401 	regs->h26x_addr.reg169_error_ref_base = dst_addr;
402 	regs->h26x_addr.reg192_payload_st_cur_base = dst_addr;
403 
404 	/* Set colmv address */
405 	regs->h26x_addr.reg216_colmv_cur_base = dst_addr + ctx->colmv_offset;
406 
407 	/* Set RCB addresses */
408 	for (i = 0; i < rkvdec_rcb_buf_count(ctx); i++) {
409 		regs->common_addr.reg140_162_rcb_info[i].offset = rkvdec_rcb_buf_dma_addr(ctx, i);
410 		regs->common_addr.reg140_162_rcb_info[i].size = rkvdec_rcb_buf_size(ctx, i);
411 	}
412 
413 	/* Set hw pps address */
414 	offset = offsetof(struct rkvdec_h264_priv_tbl, param_set);
415 	regs->common_addr.reg131_gbl_base = priv_start_addr + offset;
416 	regs->h26x_params.reg067_global_len = sizeof(struct rkvdec_sps_pps) / 16;
417 
418 	/* Set hw rps address */
419 	offset = offsetof(struct rkvdec_h264_priv_tbl, rps);
420 	regs->common_addr.reg129_rps_base = priv_start_addr + offset;
421 
422 	/* Set cabac table */
423 	offset = offsetof(struct rkvdec_h264_priv_tbl, cabac_table);
424 	regs->common_addr.reg130_cabactbl_base = priv_start_addr + offset;
425 
426 	/* Set scaling list address */
427 	offset = offsetof(struct rkvdec_h264_priv_tbl, scaling_list);
428 	regs->common_addr.reg132_scanlist_addr = priv_start_addr + offset;
429 
430 	rkvdec_write_regs(ctx);
431 }
432 
rkvdec_h264_start(struct rkvdec_ctx * ctx)433 static int rkvdec_h264_start(struct rkvdec_ctx *ctx)
434 {
435 	struct rkvdec_dev *rkvdec = ctx->dev;
436 	struct rkvdec_h264_priv_tbl *priv_tbl;
437 	struct rkvdec_h264_ctx *h264_ctx;
438 	struct v4l2_ctrl *ctrl;
439 	int ret;
440 
441 	ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl,
442 			      V4L2_CID_STATELESS_H264_SPS);
443 	if (!ctrl)
444 		return -EINVAL;
445 
446 	ret = rkvdec_h264_validate_sps(ctx, ctrl->p_new.p_h264_sps);
447 	if (ret)
448 		return ret;
449 
450 	h264_ctx = kzalloc_obj(*h264_ctx);
451 	if (!h264_ctx)
452 		return -ENOMEM;
453 
454 	priv_tbl = dma_alloc_coherent(rkvdec->dev, sizeof(*priv_tbl),
455 				      &h264_ctx->priv_tbl.dma, GFP_KERNEL);
456 	if (!priv_tbl) {
457 		ret = -ENOMEM;
458 		goto err_free_ctx;
459 	}
460 
461 	h264_ctx->priv_tbl.size = sizeof(*priv_tbl);
462 	h264_ctx->priv_tbl.cpu = priv_tbl;
463 	memcpy(priv_tbl->cabac_table, rkvdec_h264_cabac_table,
464 	       sizeof(rkvdec_h264_cabac_table));
465 
466 	ctx->priv = h264_ctx;
467 
468 	return 0;
469 
470 err_free_ctx:
471 	kfree(h264_ctx);
472 	return ret;
473 }
474 
rkvdec_h264_stop(struct rkvdec_ctx * ctx)475 static void rkvdec_h264_stop(struct rkvdec_ctx *ctx)
476 {
477 	struct rkvdec_h264_ctx *h264_ctx = ctx->priv;
478 	struct rkvdec_dev *rkvdec = ctx->dev;
479 
480 	dma_free_coherent(rkvdec->dev, h264_ctx->priv_tbl.size,
481 			  h264_ctx->priv_tbl.cpu, h264_ctx->priv_tbl.dma);
482 	kfree(h264_ctx);
483 }
484 
rkvdec_h264_run(struct rkvdec_ctx * ctx)485 static int rkvdec_h264_run(struct rkvdec_ctx *ctx)
486 {
487 	struct v4l2_h264_reflist_builder reflist_builder;
488 	struct rkvdec_dev *rkvdec = ctx->dev;
489 	struct rkvdec_h264_ctx *h264_ctx = ctx->priv;
490 	struct rkvdec_h264_run run;
491 	struct rkvdec_h264_priv_tbl *tbl = h264_ctx->priv_tbl.cpu;
492 	u32 timeout_threshold;
493 
494 	rkvdec_h264_run_preamble(ctx, &run);
495 
496 	/* Build the P/B{0,1} ref lists. */
497 	v4l2_h264_init_reflist_builder(&reflist_builder, run.decode_params,
498 				       run.sps, run.decode_params->dpb);
499 	v4l2_h264_build_p_ref_list(&reflist_builder, h264_ctx->reflists.p);
500 	v4l2_h264_build_b_ref_lists(&reflist_builder, h264_ctx->reflists.b0,
501 				    h264_ctx->reflists.b1);
502 
503 	assemble_hw_scaling_list(&run, &tbl->scaling_list);
504 	assemble_hw_pps(ctx, &run);
505 	lookup_ref_buf_idx(ctx, &run);
506 	assemble_hw_rps(&reflist_builder, &run, &h264_ctx->reflists, &tbl->rps);
507 
508 	config_registers(ctx, &run);
509 
510 	rkvdec_run_postamble(ctx, &run.base);
511 
512 	timeout_threshold = h264_ctx->regs.common.reg013_core_timeout_threshold;
513 	rkvdec_schedule_watchdog(rkvdec, timeout_threshold);
514 
515 	/* Start decoding! */
516 	writel(timeout_threshold, rkvdec->link + VDPU383_LINK_TIMEOUT_THRESHOLD);
517 	writel(0, rkvdec->link + VDPU383_LINK_IP_ENABLE);
518 	writel(VDPU383_DEC_E_BIT, rkvdec->link + VDPU383_LINK_DEC_ENABLE);
519 
520 	return 0;
521 }
522 
rkvdec_h264_try_ctrl(struct rkvdec_ctx * ctx,struct v4l2_ctrl * ctrl)523 static int rkvdec_h264_try_ctrl(struct rkvdec_ctx *ctx, struct v4l2_ctrl *ctrl)
524 {
525 	if (ctrl->id == V4L2_CID_STATELESS_H264_SPS)
526 		return rkvdec_h264_validate_sps(ctx, ctrl->p_new.p_h264_sps);
527 
528 	return 0;
529 }
530 
531 const struct rkvdec_coded_fmt_ops rkvdec_vdpu383_h264_fmt_ops = {
532 	.adjust_fmt = rkvdec_h264_adjust_fmt,
533 	.get_image_fmt = rkvdec_h264_get_image_fmt,
534 	.start = rkvdec_h264_start,
535 	.stop = rkvdec_h264_stop,
536 	.run = rkvdec_h264_run,
537 	.try_ctrl = rkvdec_h264_try_ctrl,
538 };
539