xref: /linux/drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-hevc.c (revision 4ae45bf4663ed93c61e9f716e81455122fb66ee2)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Rockchip VDPU383 HEVC backend
4  *
5  * Copyright (C) 2025 Collabora, Ltd.
6  *  Detlev Casanova <detlev.casanova@collabora.com>
7  */
8 
9 #include <media/v4l2-mem2mem.h>
10 
11 #include "rkvdec.h"
12 #include "rkvdec-cabac.h"
13 #include "rkvdec-rcb.h"
14 #include "rkvdec-hevc-common.h"
15 #include "rkvdec-vdpu383-regs.h"
16 #include "rkvdec-bitwriter.h"
17 
18 #define VIDEO_PARAMETER_SET_ID				BW_FIELD(0, 4)
19 #define SEQ_PARAMETER_SET_ID				BW_FIELD(4, 4)
20 #define CHROMA_FORMAT_IDC				BW_FIELD(8, 2)
21 #define PIC_WIDTH_IN_LUMA_SAMPLES			BW_FIELD(10, 16)
22 #define PIC_HEIGHT_IN_LUMA_SAMPLES			BW_FIELD(26, 16)
23 #define BIT_DEPTH_LUMA					BW_FIELD(42, 3)
24 #define BIT_DEPTH_CHROMA				BW_FIELD(45, 3)
25 #define LOG2_MAX_PIC_ORDER_CNT_LSB			BW_FIELD(48, 5)
26 #define LOG2_DIFF_MAX_MIN_LUMA_CODING_BLOCK_SIZE	BW_FIELD(53, 2)
27 #define LOG2_MIN_LUMA_CODING_BLOCK_SIZE			BW_FIELD(55, 3)
28 #define LOG2_MIN_TRANSFORM_BLOCK_SIZE			BW_FIELD(58, 3)
29 #define LOG2_DIFF_MAX_MIN_LUMA_TRANSFORM_BLOCK_SIZE	BW_FIELD(61, 2)
30 #define MAX_TRANSFORM_HIERARCHY_DEPTH_INTER		BW_FIELD(63, 3)
31 #define MAX_TRANSFORM_HIERARCHY_DEPTH_INTRA		BW_FIELD(66, 3)
32 #define SCALING_LIST_ENABLED_FLAG			BW_FIELD(69, 1)
33 #define AMP_ENABLED_FLAG				BW_FIELD(70, 1)
34 #define SAMPLE_ADAPTIVE_OFFSET_ENABLED_FLAG		BW_FIELD(71, 1)
35 #define PCM_ENABLED_FLAG				BW_FIELD(72, 1)
36 #define PCM_SAMPLE_BIT_DEPTH_LUMA			BW_FIELD(73, 4)
37 #define PCM_SAMPLE_BIT_DEPTH_CHROMA			BW_FIELD(77, 4)
38 #define PCM_LOOP_FILTER_DISABLED_FLAG			BW_FIELD(81, 1)
39 #define LOG2_DIFF_MAX_MIN_PCM_LUMA_CODING_BLOCK_SIZE	BW_FIELD(82, 3)
40 #define LOG2_MIN_PCM_LUMA_CODING_BLOCK_SIZE		BW_FIELD(85, 3)
41 #define NUM_SHORT_TERM_REF_PIC_SETS			BW_FIELD(88, 7)
42 #define LONG_TERM_REF_PICS_PRESENT_FLAG			BW_FIELD(95, 1)
43 #define NUM_LONG_TERM_REF_PICS_SPS			BW_FIELD(96, 6)
44 #define SPS_TEMPORAL_MVP_ENABLED_FLAG			BW_FIELD(102, 1)
45 #define STRONG_INTRA_SMOOTHING_ENABLED_FLAG		BW_FIELD(103, 1)
46 #define SPS_MAX_DEC_PIC_BUFFERING_MINUS1		BW_FIELD(111, 4)
47 #define SEPARATE_COLOUR_PLANE_FLAG			BW_FIELD(115, 1)
48 #define HIGH_PRECISION_OFFSETS_ENABLED_FLAG		BW_FIELD(116, 1)
49 #define PERSISTENT_RICE_ADAPTATION_ENABLED_FLAG		BW_FIELD(117, 1)
50 
51 /* PPS */
52 #define PIC_PARAMETER_SET_ID				BW_FIELD(118, 6)
53 #define PPS_SEQ_PARAMETER_SET_ID			BW_FIELD(124, 4)
54 #define DEPENDENT_SLICE_SEGMENTS_ENABLED_FLAG		BW_FIELD(128, 1)
55 #define OUTPUT_FLAG_PRESENT_FLAG			BW_FIELD(129, 1)
56 #define NUM_EXTRA_SLICE_HEADER_BITS			BW_FIELD(130, 13)
57 #define SIGN_DATA_HIDING_ENABLED_FLAG			BW_FIELD(143, 1)
58 #define CABAC_INIT_PRESENT_FLAG				BW_FIELD(144, 1)
59 #define NUM_REF_IDX_L0_DEFAULT_ACTIVE			BW_FIELD(145, 4)
60 #define NUM_REF_IDX_L1_DEFAULT_ACTIVE			BW_FIELD(149, 4)
61 #define INIT_QP_MINUS26					BW_FIELD(153, 7)
62 #define CONSTRAINED_INTRA_PRED_FLAG			BW_FIELD(160, 1)
63 #define TRANSFORM_SKIP_ENABLED_FLAG			BW_FIELD(161, 1)
64 #define CU_QP_DELTA_ENABLED_FLAG			BW_FIELD(162, 1)
65 #define LOG2_MIN_CU_QP_DELTA_SIZE			BW_FIELD(163, 3)
66 #define PPS_CB_QP_OFFSET				BW_FIELD(166, 5)
67 #define PPS_CR_QP_OFFSET				BW_FIELD(171, 5)
68 #define PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT_FLAG	BW_FIELD(176, 1)
69 #define WEIGHTED_PRED_FLAG				BW_FIELD(177, 1)
70 #define WEIGHTED_BIPRED_FLAG				BW_FIELD(178, 1)
71 #define TRANSQUANT_BYPASS_ENABLED_FLAG			BW_FIELD(179, 1)
72 #define TILES_ENABLED_FLAG				BW_FIELD(180, 1)
73 #define ENTROPY_CODING_SYNC_ENABLED_FLAG		BW_FIELD(181, 1)
74 #define PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED_FLAG	BW_FIELD(182, 1)
75 #define LOOP_FILTER_ACROSS_TILES_ENABLED_FLAG		BW_FIELD(183, 1)
76 #define DEBLOCKING_FILTER_OVERRIDE_ENABLED_FLAG		BW_FIELD(184, 1)
77 #define PPS_DEBLOCKING_FILTER_DISABLED_FLAG		BW_FIELD(185, 1)
78 #define PPS_BETA_OFFSET_DIV2				BW_FIELD(186, 4)
79 #define PPS_TC_OFFSET_DIV2				BW_FIELD(190, 4)
80 #define LISTS_MODIFICATION_PRESENT_FLAG			BW_FIELD(194, 1)
81 #define LOG2_PARALLEL_MERGE_LEVEL			BW_FIELD(195, 3)
82 #define SLICE_SEGMENT_HEADER_EXTENSION_PRESENT_FLAG	BW_FIELD(198, 1)
83 
84 /* pps extensions */
85 #define LOG2_MAX_TRANSFORM_SKIP_BLOCK_SIZE		BW_FIELD(202, 2)
86 #define CROSS_COMPONENT_PREDICTION_ENABLED_FLAG		BW_FIELD(204, 1)
87 #define CHROMA_QP_OFFSET_LIST_ENABLED_FLAG		BW_FIELD(205, 1)
88 #define LOG2_MIN_CU_CHROMA_QP_DELTA_SIZE		BW_FIELD(206, 3)
89 #define CB_QP_OFFSET_LIST(i)				BW_FIELD(209 + (i) * 5, 5) // i: 0-5
90 #define CB_CR_OFFSET_LIST(i)				BW_FIELD(239 + (i) * 5, 5) // i: 0-5
91 #define CHROMA_QP_OFFSET_LIST_LEN_MINUS1		BW_FIELD(269, 3)
92 
93 /* mvc0 && mvc1 */
94 #define MVC_FF						BW_FIELD(272, 16)
95 #define MVC_00						BW_FIELD(288, 9)
96 
97 /* poc info */
98 #define RESERVED2					BW_FIELD(297, 3)
99 #define CURRENT_POC					BW_FIELD(300, 32)
100 #define REF_PIC_POC(i)					BW_FIELD(332 + (i) * 32, 32) // i: 0-14
101 #define RESERVED3					BW_FIELD(812, 32)
102 #define REF_IS_VALID(i)					BW_FIELD(844 + (i), 1) // i: 0-14
103 #define RESERVED4					BW_FIELD(859, 1)
104 
105 /* tile info*/
106 #define NUM_TILE_COLUMNS				BW_FIELD(860, 5)
107 #define NUM_TILE_ROWS					BW_FIELD(865, 5)
108 #define COLUMN_WIDTH(i)					BW_FIELD(870 + (i) * 12, 12) // i: 0-19
109 #define ROW_HEIGHT(i)					BW_FIELD(1110 + (i) * 12, 12) // i: 0-21
110 
111 #define HEVC_SPS_SIZE					ALIGN(1110 + 22 * 12, 256)
112 
113 struct rkvdec_hevc_sps_pps {
114 	u32 info[HEVC_SPS_SIZE / 8 / 4];
115 };
116 
117 struct rkvdec_hevc_priv_tbl {
118 	struct rkvdec_hevc_sps_pps param_set;
119 	struct rkvdec_rps rps;
120 	struct scaling_factor scaling_list;
121 	u8 cabac_table[27456];
122 }  __packed;
123 
124 struct rkvdec_hevc_ctx {
125 	struct rkvdec_aux_buf			priv_tbl;
126 	struct v4l2_ctrl_hevc_scaling_matrix	scaling_matrix_cache;
127 	struct v4l2_ctrl_hevc_ext_sps_st_rps	st_cache;
128 	struct vdpu383_regs_h26x		regs;
129 };
130 
131 static void assemble_hw_pps(struct rkvdec_ctx *ctx,
132 			    struct rkvdec_hevc_run *run)
133 {
134 	struct rkvdec_hevc_ctx *h264_ctx = ctx->priv;
135 	const struct v4l2_ctrl_hevc_sps *sps = run->sps;
136 	const struct v4l2_ctrl_hevc_pps *pps = run->pps;
137 	const struct v4l2_ctrl_hevc_decode_params *dec_params = run->decode_params;
138 	struct rkvdec_hevc_priv_tbl *priv_tbl = h264_ctx->priv_tbl.cpu;
139 	struct rkvdec_hevc_sps_pps *hw_ps;
140 	bool tiles_enabled;
141 	s32 max_cu_width;
142 	s32 pic_in_cts_width;
143 	s32 pic_in_cts_height;
144 	u16 log2_min_cb_size, width, height;
145 	u16 column_width[22];
146 	u16 row_height[22];
147 	u8 pcm_enabled;
148 	u32 i;
149 
150 	/*
151 	 * HW read the SPS/PPS information from PPS packet index by PPS id.
152 	 * offset from the base can be calculated by PPS_id * 32 (size per PPS
153 	 * packet unit). so the driver copy SPS/PPS information to the exact PPS
154 	 * packet unit for HW accessing.
155 	 */
156 	hw_ps = &priv_tbl->param_set;
157 	memset(hw_ps, 0, sizeof(*hw_ps));
158 
159 	/* write sps */
160 	rkvdec_set_bw_field(hw_ps->info, VIDEO_PARAMETER_SET_ID, sps->video_parameter_set_id);
161 	rkvdec_set_bw_field(hw_ps->info, SEQ_PARAMETER_SET_ID, sps->seq_parameter_set_id);
162 	rkvdec_set_bw_field(hw_ps->info, CHROMA_FORMAT_IDC, sps->chroma_format_idc);
163 
164 	log2_min_cb_size = sps->log2_min_luma_coding_block_size_minus3 + 3;
165 	width = sps->pic_width_in_luma_samples;
166 	height = sps->pic_height_in_luma_samples;
167 
168 	rkvdec_set_bw_field(hw_ps->info, PIC_WIDTH_IN_LUMA_SAMPLES, width);
169 	rkvdec_set_bw_field(hw_ps->info, PIC_HEIGHT_IN_LUMA_SAMPLES, height);
170 	rkvdec_set_bw_field(hw_ps->info, BIT_DEPTH_LUMA, sps->bit_depth_luma_minus8 + 8);
171 	rkvdec_set_bw_field(hw_ps->info, BIT_DEPTH_CHROMA, sps->bit_depth_chroma_minus8 + 8);
172 	rkvdec_set_bw_field(hw_ps->info, LOG2_MAX_PIC_ORDER_CNT_LSB,
173 			    sps->log2_max_pic_order_cnt_lsb_minus4 + 4);
174 	rkvdec_set_bw_field(hw_ps->info, LOG2_DIFF_MAX_MIN_LUMA_CODING_BLOCK_SIZE,
175 			    sps->log2_diff_max_min_luma_coding_block_size);
176 	rkvdec_set_bw_field(hw_ps->info, LOG2_MIN_LUMA_CODING_BLOCK_SIZE,
177 			    sps->log2_min_luma_coding_block_size_minus3 + 3);
178 	rkvdec_set_bw_field(hw_ps->info, LOG2_MIN_TRANSFORM_BLOCK_SIZE,
179 			    sps->log2_min_luma_transform_block_size_minus2 + 2);
180 	rkvdec_set_bw_field(hw_ps->info, LOG2_DIFF_MAX_MIN_LUMA_TRANSFORM_BLOCK_SIZE,
181 			    sps->log2_diff_max_min_luma_transform_block_size);
182 	rkvdec_set_bw_field(hw_ps->info, MAX_TRANSFORM_HIERARCHY_DEPTH_INTER,
183 			    sps->max_transform_hierarchy_depth_inter);
184 	rkvdec_set_bw_field(hw_ps->info, MAX_TRANSFORM_HIERARCHY_DEPTH_INTRA,
185 			    sps->max_transform_hierarchy_depth_intra);
186 	rkvdec_set_bw_field(hw_ps->info, SCALING_LIST_ENABLED_FLAG,
187 			    !!(sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED));
188 	rkvdec_set_bw_field(hw_ps->info, AMP_ENABLED_FLAG,
189 			    !!(sps->flags & V4L2_HEVC_SPS_FLAG_AMP_ENABLED));
190 	rkvdec_set_bw_field(hw_ps->info, SAMPLE_ADAPTIVE_OFFSET_ENABLED_FLAG,
191 			    !!(sps->flags & V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET));
192 
193 	pcm_enabled = !!(sps->flags & V4L2_HEVC_SPS_FLAG_PCM_ENABLED);
194 	rkvdec_set_bw_field(hw_ps->info, PCM_ENABLED_FLAG, pcm_enabled);
195 	rkvdec_set_bw_field(hw_ps->info, PCM_SAMPLE_BIT_DEPTH_LUMA,
196 			    pcm_enabled ? sps->pcm_sample_bit_depth_luma_minus1 + 1 : 0);
197 	rkvdec_set_bw_field(hw_ps->info, PCM_SAMPLE_BIT_DEPTH_CHROMA,
198 			    pcm_enabled ? sps->pcm_sample_bit_depth_chroma_minus1 + 1 : 0);
199 	rkvdec_set_bw_field(hw_ps->info, PCM_LOOP_FILTER_DISABLED_FLAG,
200 			    !!(sps->flags & V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED));
201 	rkvdec_set_bw_field(hw_ps->info, LOG2_DIFF_MAX_MIN_PCM_LUMA_CODING_BLOCK_SIZE,
202 			    sps->log2_diff_max_min_pcm_luma_coding_block_size);
203 	rkvdec_set_bw_field(hw_ps->info, LOG2_MIN_PCM_LUMA_CODING_BLOCK_SIZE,
204 			    pcm_enabled ? sps->log2_min_pcm_luma_coding_block_size_minus3 + 3 : 0);
205 	rkvdec_set_bw_field(hw_ps->info, NUM_SHORT_TERM_REF_PIC_SETS,
206 			    sps->num_short_term_ref_pic_sets);
207 	rkvdec_set_bw_field(hw_ps->info, LONG_TERM_REF_PICS_PRESENT_FLAG,
208 			    !!(sps->flags & V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT));
209 	rkvdec_set_bw_field(hw_ps->info, NUM_LONG_TERM_REF_PICS_SPS,
210 			    sps->num_long_term_ref_pics_sps);
211 	rkvdec_set_bw_field(hw_ps->info, SPS_TEMPORAL_MVP_ENABLED_FLAG,
212 			    !!(sps->flags & V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED));
213 	rkvdec_set_bw_field(hw_ps->info, STRONG_INTRA_SMOOTHING_ENABLED_FLAG,
214 			    !!(sps->flags & V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED));
215 	rkvdec_set_bw_field(hw_ps->info, SPS_MAX_DEC_PIC_BUFFERING_MINUS1,
216 			    sps->sps_max_dec_pic_buffering_minus1);
217 
218 	/* write pps */
219 	rkvdec_set_bw_field(hw_ps->info, PIC_PARAMETER_SET_ID, pps->pic_parameter_set_id);
220 	rkvdec_set_bw_field(hw_ps->info, SEQ_PARAMETER_SET_ID, sps->seq_parameter_set_id);
221 	rkvdec_set_bw_field(hw_ps->info, DEPENDENT_SLICE_SEGMENTS_ENABLED_FLAG,
222 			    !!(pps->flags & V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED));
223 	rkvdec_set_bw_field(hw_ps->info, OUTPUT_FLAG_PRESENT_FLAG,
224 			    !!(pps->flags & V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT));
225 	rkvdec_set_bw_field(hw_ps->info, NUM_EXTRA_SLICE_HEADER_BITS,
226 			    pps->num_extra_slice_header_bits);
227 	rkvdec_set_bw_field(hw_ps->info, SIGN_DATA_HIDING_ENABLED_FLAG,
228 			    !!(pps->flags & V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED));
229 	rkvdec_set_bw_field(hw_ps->info, CABAC_INIT_PRESENT_FLAG,
230 			    !!(pps->flags & V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT));
231 	rkvdec_set_bw_field(hw_ps->info, NUM_REF_IDX_L0_DEFAULT_ACTIVE,
232 			    pps->num_ref_idx_l0_default_active_minus1 + 1);
233 	rkvdec_set_bw_field(hw_ps->info, NUM_REF_IDX_L1_DEFAULT_ACTIVE,
234 			    pps->num_ref_idx_l1_default_active_minus1 + 1);
235 	rkvdec_set_bw_field(hw_ps->info, INIT_QP_MINUS26, pps->init_qp_minus26);
236 	rkvdec_set_bw_field(hw_ps->info, CONSTRAINED_INTRA_PRED_FLAG,
237 			    !!(pps->flags & V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED));
238 	rkvdec_set_bw_field(hw_ps->info, TRANSFORM_SKIP_ENABLED_FLAG,
239 			    !!(pps->flags & V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED));
240 	rkvdec_set_bw_field(hw_ps->info, CU_QP_DELTA_ENABLED_FLAG,
241 			    !!(pps->flags & V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED));
242 	rkvdec_set_bw_field(hw_ps->info, LOG2_MIN_CU_QP_DELTA_SIZE, log2_min_cb_size +
243 			    sps->log2_diff_max_min_luma_coding_block_size -
244 			    pps->diff_cu_qp_delta_depth);
245 	rkvdec_set_bw_field(hw_ps->info, PPS_CB_QP_OFFSET, pps->pps_cb_qp_offset);
246 	rkvdec_set_bw_field(hw_ps->info, PPS_CR_QP_OFFSET, pps->pps_cr_qp_offset);
247 	rkvdec_set_bw_field(hw_ps->info, PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT_FLAG,
248 			    !!(pps->flags &
249 			       V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT));
250 	rkvdec_set_bw_field(hw_ps->info, WEIGHTED_PRED_FLAG,
251 			    !!(pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED));
252 	rkvdec_set_bw_field(hw_ps->info, WEIGHTED_BIPRED_FLAG,
253 			    !!(pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED));
254 	rkvdec_set_bw_field(hw_ps->info, TRANSQUANT_BYPASS_ENABLED_FLAG,
255 			    !!(pps->flags & V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED));
256 	tiles_enabled = !!(pps->flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED);
257 	rkvdec_set_bw_field(hw_ps->info, TILES_ENABLED_FLAG, tiles_enabled);
258 	rkvdec_set_bw_field(hw_ps->info, ENTROPY_CODING_SYNC_ENABLED_FLAG,
259 			    !!(pps->flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED));
260 	rkvdec_set_bw_field(hw_ps->info, PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED_FLAG,
261 			    !!(pps->flags &
262 			       V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED));
263 	rkvdec_set_bw_field(hw_ps->info, LOOP_FILTER_ACROSS_TILES_ENABLED_FLAG,
264 			    !!(pps->flags & V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED));
265 	rkvdec_set_bw_field(hw_ps->info, DEBLOCKING_FILTER_OVERRIDE_ENABLED_FLAG,
266 			    !!(pps->flags &
267 			       V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED));
268 	rkvdec_set_bw_field(hw_ps->info, PPS_DEBLOCKING_FILTER_DISABLED_FLAG,
269 			    !!(pps->flags & V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER));
270 	rkvdec_set_bw_field(hw_ps->info, PPS_BETA_OFFSET_DIV2, pps->pps_beta_offset_div2);
271 	rkvdec_set_bw_field(hw_ps->info, PPS_TC_OFFSET_DIV2, pps->pps_tc_offset_div2);
272 	rkvdec_set_bw_field(hw_ps->info, LISTS_MODIFICATION_PRESENT_FLAG,
273 			    !!(pps->flags & V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT));
274 	rkvdec_set_bw_field(hw_ps->info, LOG2_PARALLEL_MERGE_LEVEL,
275 			    pps->log2_parallel_merge_level_minus2 + 2);
276 	rkvdec_set_bw_field(hw_ps->info, SLICE_SEGMENT_HEADER_EXTENSION_PRESENT_FLAG,
277 			    !!(pps->flags &
278 			       V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT));
279 	rkvdec_set_bw_field(hw_ps->info, NUM_TILE_COLUMNS,
280 			    tiles_enabled ? pps->num_tile_columns_minus1 + 1 : 1);
281 	rkvdec_set_bw_field(hw_ps->info, NUM_TILE_ROWS,
282 			    tiles_enabled ? pps->num_tile_rows_minus1 + 1 : 1);
283 	rkvdec_set_bw_field(hw_ps->info, MVC_FF, 0xffff);
284 
285 	// Setup tiles information
286 	memset(column_width, 0, sizeof(column_width));
287 	memset(row_height, 0, sizeof(row_height));
288 
289 	max_cu_width = 1 << (sps->log2_diff_max_min_luma_coding_block_size + log2_min_cb_size);
290 	pic_in_cts_width = (width + max_cu_width - 1) / max_cu_width;
291 	pic_in_cts_height = (height + max_cu_width - 1) / max_cu_width;
292 
293 	if (tiles_enabled) {
294 		if (pps->flags & V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING) {
295 			compute_tiles_uniform(run, log2_min_cb_size, width, height,
296 					      pic_in_cts_width, pic_in_cts_height,
297 					      column_width, row_height);
298 		} else {
299 			compute_tiles_non_uniform(run, log2_min_cb_size, width, height,
300 						  pic_in_cts_width, pic_in_cts_height,
301 						  column_width, row_height);
302 		}
303 	} else {
304 		column_width[0] = (width + max_cu_width - 1) / max_cu_width;
305 		row_height[0] = (height + max_cu_width - 1) / max_cu_width;
306 	}
307 
308 	for (i = 0; i < 20; i++)
309 		rkvdec_set_bw_field(hw_ps->info, COLUMN_WIDTH(i), column_width[i]);
310 	for (i = 0; i < 22; i++)
311 		rkvdec_set_bw_field(hw_ps->info, ROW_HEIGHT(i), row_height[i]);
312 
313 	// Setup POC information
314 	rkvdec_set_bw_field(hw_ps->info, CURRENT_POC, dec_params->pic_order_cnt_val);
315 
316 	for (i = 0; i < ARRAY_SIZE(dec_params->dpb); i++) {
317 		rkvdec_set_bw_field(hw_ps->info, REF_IS_VALID(i),
318 				    !!(dec_params->num_active_dpb_entries > i));
319 		rkvdec_set_bw_field(hw_ps->info, REF_PIC_POC(i),
320 				    dec_params->dpb[i].pic_order_cnt_val);
321 	}
322 }
323 
324 static void rkvdec_write_regs(struct rkvdec_ctx *ctx)
325 {
326 	struct rkvdec_dev *rkvdec = ctx->dev;
327 	struct rkvdec_hevc_ctx *h265_ctx = ctx->priv;
328 
329 	rkvdec_memcpy_toio(rkvdec->regs + VDPU383_OFFSET_COMMON_REGS,
330 			   &h265_ctx->regs.common,
331 			   sizeof(h265_ctx->regs.common));
332 	rkvdec_memcpy_toio(rkvdec->regs + VDPU383_OFFSET_COMMON_ADDR_REGS,
333 			   &h265_ctx->regs.common_addr,
334 			   sizeof(h265_ctx->regs.common_addr));
335 	rkvdec_memcpy_toio(rkvdec->regs + VDPU383_OFFSET_CODEC_PARAMS_REGS,
336 			   &h265_ctx->regs.h26x_params,
337 			   sizeof(h265_ctx->regs.h26x_params));
338 	rkvdec_memcpy_toio(rkvdec->regs + VDPU383_OFFSET_CODEC_ADDR_REGS,
339 			   &h265_ctx->regs.h26x_addr,
340 			   sizeof(h265_ctx->regs.h26x_addr));
341 }
342 
343 static void config_registers(struct rkvdec_ctx *ctx,
344 			     struct rkvdec_hevc_run *run)
345 {
346 	const struct v4l2_ctrl_hevc_decode_params *dec_params = run->decode_params;
347 	struct rkvdec_hevc_ctx *h265_ctx = ctx->priv;
348 	const struct v4l2_ctrl_hevc_sps *sps = run->sps;
349 	dma_addr_t priv_start_addr = h265_ctx->priv_tbl.dma;
350 	const struct v4l2_pix_format_mplane *dst_fmt;
351 	struct vb2_v4l2_buffer *src_buf = run->base.bufs.src;
352 	struct vb2_v4l2_buffer *dst_buf = run->base.bufs.dst;
353 	struct vdpu383_regs_h26x *regs = &h265_ctx->regs;
354 	const struct v4l2_format *f;
355 	dma_addr_t rlc_addr;
356 	dma_addr_t dst_addr;
357 	u32 hor_virstride;
358 	u32 ver_virstride;
359 	u32 y_virstride;
360 	u32 offset;
361 	u32 pixels;
362 	u32 i;
363 
364 	memset(regs, 0, sizeof(*regs));
365 
366 	/* Set HEVC mode */
367 	regs->common.reg008_dec_mode = VDPU383_MODE_HEVC;
368 
369 	/* Set input stream length */
370 	regs->h26x_params.reg066_stream_len = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
371 
372 	/* Set strides */
373 	f = &ctx->decoded_fmt;
374 	dst_fmt = &f->fmt.pix_mp;
375 	hor_virstride = dst_fmt->plane_fmt[0].bytesperline;
376 	ver_virstride = dst_fmt->height;
377 	y_virstride = hor_virstride * ver_virstride;
378 
379 	pixels = dst_fmt->height * dst_fmt->width;
380 
381 	regs->h26x_params.reg068_hor_virstride = hor_virstride / 16;
382 	regs->h26x_params.reg069_raster_uv_hor_virstride = hor_virstride / 16;
383 	regs->h26x_params.reg070_y_virstride = y_virstride / 16;
384 
385 	/* Activate block gating */
386 	regs->common.reg010_block_gating_en.strmd_auto_gating_e      = 1;
387 	regs->common.reg010_block_gating_en.inter_auto_gating_e      = 1;
388 	regs->common.reg010_block_gating_en.intra_auto_gating_e      = 1;
389 	regs->common.reg010_block_gating_en.transd_auto_gating_e     = 1;
390 	regs->common.reg010_block_gating_en.recon_auto_gating_e      = 1;
391 	regs->common.reg010_block_gating_en.filterd_auto_gating_e    = 1;
392 	regs->common.reg010_block_gating_en.bus_auto_gating_e	     = 1;
393 	regs->common.reg010_block_gating_en.ctrl_auto_gating_e       = 1;
394 	regs->common.reg010_block_gating_en.rcb_auto_gating_e	     = 1;
395 	regs->common.reg010_block_gating_en.err_prc_auto_gating_e    = 1;
396 
397 	/* Set timeout threshold */
398 	if (pixels < RKVDEC_1080P_PIXELS)
399 		regs->common.reg013_core_timeout_threshold = VDPU383_TIMEOUT_1080p;
400 	else if (pixels < RKVDEC_4K_PIXELS)
401 		regs->common.reg013_core_timeout_threshold = VDPU383_TIMEOUT_4K;
402 	else if (pixels < RKVDEC_8K_PIXELS)
403 		regs->common.reg013_core_timeout_threshold = VDPU383_TIMEOUT_8K;
404 	else
405 		regs->common.reg013_core_timeout_threshold = VDPU383_TIMEOUT_MAX;
406 
407 	regs->common.reg016_error_ctrl_set.error_proc_disable = 1;
408 
409 	/* Set ref pic address & poc */
410 	for (i = 0; i < ARRAY_SIZE(dec_params->dpb) - 1; i++) {
411 		struct vb2_buffer *vb_buf = get_ref_buf(ctx, run, i);
412 		dma_addr_t buf_dma;
413 
414 		buf_dma = vb2_dma_contig_plane_dma_addr(vb_buf, 0);
415 
416 		/* Set reference addresses */
417 		regs->h26x_addr.reg170_185_ref_base[i] = buf_dma;
418 		regs->h26x_addr.reg195_210_payload_st_ref_base[i] = buf_dma;
419 
420 		/* Set COLMV addresses */
421 		regs->h26x_addr.reg217_232_colmv_ref_base[i] = buf_dma + ctx->colmv_offset;
422 	}
423 
424 	/* Set rlc base address (input stream) */
425 	rlc_addr = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0);
426 	regs->common_addr.reg128_strm_base = rlc_addr;
427 
428 	/* Set output base address */
429 	dst_addr = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
430 	regs->h26x_addr.reg168_decout_base = dst_addr;
431 	regs->h26x_addr.reg169_error_ref_base = dst_addr;
432 	regs->h26x_addr.reg192_payload_st_cur_base = dst_addr;
433 
434 	/* Set colmv address */
435 	regs->h26x_addr.reg216_colmv_cur_base = dst_addr + ctx->colmv_offset;
436 
437 	/* Set RCB addresses */
438 	for (i = 0; i < rkvdec_rcb_buf_count(ctx); i++) {
439 		regs->common_addr.reg140_162_rcb_info[i].offset = rkvdec_rcb_buf_dma_addr(ctx, i);
440 		regs->common_addr.reg140_162_rcb_info[i].size = rkvdec_rcb_buf_size(ctx, i);
441 	}
442 
443 	if (sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) {
444 		/* Set scaling matrix */
445 		offset = offsetof(struct rkvdec_hevc_priv_tbl, scaling_list);
446 		regs->common_addr.reg132_scanlist_addr = priv_start_addr + offset;
447 	}
448 
449 	/* Set hw pps address */
450 	offset = offsetof(struct rkvdec_hevc_priv_tbl, param_set);
451 	regs->common_addr.reg131_gbl_base = priv_start_addr + offset;
452 	regs->h26x_params.reg067_global_len = sizeof(struct rkvdec_hevc_sps_pps) / 16;
453 
454 	/* Set hw rps address */
455 	offset = offsetof(struct rkvdec_hevc_priv_tbl, rps);
456 	regs->common_addr.reg129_rps_base = priv_start_addr + offset;
457 
458 	/* Set cabac table */
459 	offset = offsetof(struct rkvdec_hevc_priv_tbl, cabac_table);
460 	regs->common_addr.reg130_cabactbl_base = priv_start_addr + offset;
461 
462 	rkvdec_write_regs(ctx);
463 }
464 
465 static int rkvdec_hevc_validate_sps(struct rkvdec_ctx *ctx,
466 				    const struct v4l2_ctrl_hevc_sps *sps)
467 {
468 	if (sps->chroma_format_idc != 1)
469 		/* Only 4:2:0 is supported */
470 		return -EINVAL;
471 
472 	if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8)
473 		/* Luma and chroma bit depth mismatch */
474 		return -EINVAL;
475 
476 	if (sps->bit_depth_luma_minus8 != 0 && sps->bit_depth_luma_minus8 != 2)
477 		/* Only 8-bit and 10-bit are supported */
478 		return -EINVAL;
479 
480 	if (sps->pic_width_in_luma_samples > ctx->coded_fmt.fmt.pix_mp.width ||
481 	    sps->pic_height_in_luma_samples > ctx->coded_fmt.fmt.pix_mp.height)
482 		return -EINVAL;
483 
484 	return 0;
485 }
486 
487 static int rkvdec_hevc_start(struct rkvdec_ctx *ctx)
488 {
489 	struct rkvdec_dev *rkvdec = ctx->dev;
490 	struct rkvdec_hevc_priv_tbl *priv_tbl;
491 	struct rkvdec_hevc_ctx *hevc_ctx;
492 	struct v4l2_ctrl *ctrl;
493 	int ret;
494 
495 	ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl,
496 			      V4L2_CID_STATELESS_HEVC_SPS);
497 	if (!ctrl)
498 		return -EINVAL;
499 
500 	ret = rkvdec_hevc_validate_sps(ctx, ctrl->p_new.p_hevc_sps);
501 	if (ret)
502 		return ret;
503 
504 	hevc_ctx = kzalloc_obj(*hevc_ctx);
505 	if (!hevc_ctx)
506 		return -ENOMEM;
507 
508 	priv_tbl = dma_alloc_coherent(rkvdec->dev, sizeof(*priv_tbl),
509 				      &hevc_ctx->priv_tbl.dma, GFP_KERNEL);
510 	if (!priv_tbl) {
511 		ret = -ENOMEM;
512 		goto err_free_ctx;
513 	}
514 
515 	hevc_ctx->priv_tbl.size = sizeof(*priv_tbl);
516 	hevc_ctx->priv_tbl.cpu = priv_tbl;
517 	memcpy(priv_tbl->cabac_table, rkvdec_hevc_cabac_table,
518 	       sizeof(rkvdec_hevc_cabac_table));
519 
520 	ctx->priv = hevc_ctx;
521 	return 0;
522 
523 err_free_ctx:
524 	kfree(hevc_ctx);
525 	return ret;
526 }
527 
528 static void rkvdec_hevc_stop(struct rkvdec_ctx *ctx)
529 {
530 	struct rkvdec_hevc_ctx *hevc_ctx = ctx->priv;
531 	struct rkvdec_dev *rkvdec = ctx->dev;
532 
533 	dma_free_coherent(rkvdec->dev, hevc_ctx->priv_tbl.size,
534 			  hevc_ctx->priv_tbl.cpu, hevc_ctx->priv_tbl.dma);
535 	kfree(hevc_ctx);
536 }
537 
538 static int rkvdec_hevc_run(struct rkvdec_ctx *ctx)
539 {
540 	struct rkvdec_dev *rkvdec = ctx->dev;
541 	struct rkvdec_hevc_run run;
542 	struct rkvdec_hevc_ctx *hevc_ctx = ctx->priv;
543 	struct rkvdec_hevc_priv_tbl *tbl = hevc_ctx->priv_tbl.cpu;
544 	u32 timeout_threshold;
545 
546 	rkvdec_hevc_run_preamble(ctx, &run);
547 
548 	/*
549 	 * On vdpu383, not setting the long and short term ref sets leads to IOMMU page faults.
550 	 * To be on the safe side for this new v4l2 control, write an error in the log and mark
551 	 * the buffer as failed by returning an error here.
552 	 */
553 	if ((!ctx->has_sps_lt_rps && run.sps->num_long_term_ref_pics_sps) ||
554 	    (!ctx->has_sps_st_rps && run.sps->num_short_term_ref_pic_sets)) {
555 		dev_err_ratelimited(rkvdec->dev, "Long and short term RPS not set\n");
556 		return -EINVAL;
557 	}
558 
559 	rkvdec_hevc_assemble_hw_scaling_list(ctx, &run, &tbl->scaling_list,
560 					     &hevc_ctx->scaling_matrix_cache);
561 	assemble_hw_pps(ctx, &run);
562 	rkvdec_hevc_assemble_hw_rps(&run, &tbl->rps, &hevc_ctx->st_cache);
563 
564 	config_registers(ctx, &run);
565 
566 	rkvdec_run_postamble(ctx, &run.base);
567 
568 	timeout_threshold = hevc_ctx->regs.common.reg013_core_timeout_threshold;
569 	rkvdec_schedule_watchdog(rkvdec, timeout_threshold);
570 
571 	/* Start decoding! */
572 	writel(timeout_threshold, rkvdec->link + VDPU383_LINK_TIMEOUT_THRESHOLD);
573 	writel(VDPU383_IP_CRU_MODE, rkvdec->link + VDPU383_LINK_IP_ENABLE);
574 	writel(VDPU383_DEC_E_BIT, rkvdec->link + VDPU383_LINK_DEC_ENABLE);
575 
576 	return 0;
577 }
578 
579 static int rkvdec_hevc_try_ctrl(struct rkvdec_ctx *ctx, struct v4l2_ctrl *ctrl)
580 {
581 	if (ctrl->id == V4L2_CID_STATELESS_HEVC_SPS)
582 		return rkvdec_hevc_validate_sps(ctx, ctrl->p_new.p_hevc_sps);
583 
584 	return 0;
585 }
586 
587 const struct rkvdec_coded_fmt_ops rkvdec_vdpu383_hevc_fmt_ops = {
588 	.adjust_fmt = rkvdec_hevc_adjust_fmt,
589 	.start = rkvdec_hevc_start,
590 	.stop = rkvdec_hevc_stop,
591 	.run = rkvdec_hevc_run,
592 	.try_ctrl = rkvdec_hevc_try_ctrl,
593 	.get_image_fmt = rkvdec_hevc_get_image_fmt,
594 };
595