xref: /linux/drivers/media/platform/rockchip/rkvdec/rkvdec-hevc.c (revision adfc4fa79b62a76cd5509b0339fab971404c46d2)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Rockchip Video Decoder HEVC backend
4  *
5  * Copyright (C) 2023 Collabora, Ltd.
6  *      Sebastian Fricke <sebastian.fricke@collabora.com>
7  *
8  * Copyright (C) 2019 Collabora, Ltd.
9  *	Boris Brezillon <boris.brezillon@collabora.com>
10  *
11  * Copyright (C) 2016 Rockchip Electronics Co., Ltd.
12  *	Jeffy Chen <jeffy.chen@rock-chips.com>
13  */
14 
15 #include <media/v4l2-mem2mem.h>
16 
17 #include "rkvdec.h"
18 #include "rkvdec-regs.h"
19 #include "rkvdec-cabac.h"
20 #include "rkvdec-hevc-common.h"
21 #include "rkvdec-bitwriter.h"
22 
23 /* Size in u8/u32 units. */
24 #define RKV_SCALING_LIST_SIZE		1360
25 #define RKV_PPS_SIZE			(80 / 4)
26 #define RKV_PPS_LEN			64
27 #define RKV_RPS_SIZE			(32 / 4)
28 #define RKV_RPS_LEN			600
29 
30 struct rkvdec_sps_pps_packet {
31 	u32 info[RKV_PPS_SIZE];
32 };
33 
34 struct rkvdec_rps_packet {
35 	u32 info[RKV_RPS_SIZE];
36 };
37 
38 /* SPS */
39 #define VIDEO_PARAMETER_SET_ID				BW_FIELD(0, 4)
40 #define SEQ_PARAMETER_SET_ID				BW_FIELD(4, 4)
41 #define CHROMA_FORMAT_IDC				BW_FIELD(8, 2)
42 #define PIC_WIDTH_IN_LUMA_SAMPLES			BW_FIELD(10, 13)
43 #define PIC_HEIGHT_IN_LUMA_SAMPLES			BW_FIELD(23, 13)
44 #define BIT_DEPTH_LUMA					BW_FIELD(36, 4)
45 #define BIT_DEPTH_CHROMA				BW_FIELD(40, 4)
46 #define LOG2_MAX_PIC_ORDER_CNT_LSB			BW_FIELD(44, 5)
47 #define LOG2_DIFF_MAX_MIN_LUMA_CODING_BLOCK_SIZE	BW_FIELD(49, 2)
48 #define LOG2_MIN_LUMA_CODING_BLOCK_SIZE			BW_FIELD(51, 3)
49 #define LOG2_MIN_TRANSFORM_BLOCK_SIZE			BW_FIELD(54, 3)
50 #define LOG2_DIFF_MAX_MIN_LUMA_TRANSFORM_BLOCK_SIZE	BW_FIELD(57, 2)
51 #define MAX_TRANSFORM_HIERARCHY_DEPTH_INTER		BW_FIELD(59, 3)
52 #define MAX_TRANSFORM_HIERARCHY_DEPTH_INTRA		BW_FIELD(62, 3)
53 #define SCALING_LIST_ENABLED_FLAG			BW_FIELD(65, 1)
54 #define AMP_ENABLED_FLAG				BW_FIELD(66, 1)
55 #define SAMPLE_ADAPTIVE_OFFSET_ENABLED_FLAG		BW_FIELD(67, 1)
56 #define PCM_ENABLED_FLAG				BW_FIELD(68, 1)
57 #define PCM_SAMPLE_BIT_DEPTH_LUMA			BW_FIELD(69, 4)
58 #define PCM_SAMPLE_BIT_DEPTH_CHROMA			BW_FIELD(73, 4)
59 #define PCM_LOOP_FILTER_DISABLED_FLAG			BW_FIELD(77, 1)
60 #define LOG2_DIFF_MAX_MIN_PCM_LUMA_CODING_BLOCK_SIZE	BW_FIELD(78, 3)
61 #define LOG2_MIN_PCM_LUMA_CODING_BLOCK_SIZE		BW_FIELD(81, 3)
62 #define NUM_SHORT_TERM_REF_PIC_SETS			BW_FIELD(84, 7)
63 #define LONG_TERM_REF_PICS_PRESENT_FLAG			BW_FIELD(91, 1)
64 #define NUM_LONG_TERM_REF_PICS_SPS			BW_FIELD(92, 6)
65 #define SPS_TEMPORAL_MVP_ENABLED_FLAG			BW_FIELD(98, 1)
66 #define STRONG_INTRA_SMOOTHING_ENABLED_FLAG		BW_FIELD(99, 1)
67 /* PPS */
68 #define PIC_PARAMETER_SET_ID				BW_FIELD(128, 6)
69 #define PPS_SEQ_PARAMETER_SET_ID			BW_FIELD(134, 4)
70 #define DEPENDENT_SLICE_SEGMENTS_ENABLED_FLAG		BW_FIELD(138, 1)
71 #define OUTPUT_FLAG_PRESENT_FLAG			BW_FIELD(139, 1)
72 #define NUM_EXTRA_SLICE_HEADER_BITS			BW_FIELD(140, 13)
73 #define SIGN_DATA_HIDING_ENABLED_FLAG			BW_FIELD(153, 1)
74 #define CABAC_INIT_PRESENT_FLAG				BW_FIELD(154, 1)
75 #define NUM_REF_IDX_L0_DEFAULT_ACTIVE			BW_FIELD(155, 4)
76 #define NUM_REF_IDX_L1_DEFAULT_ACTIVE			BW_FIELD(159, 4)
77 #define INIT_QP_MINUS26					BW_FIELD(163, 7)
78 #define CONSTRAINED_INTRA_PRED_FLAG			BW_FIELD(170, 1)
79 #define TRANSFORM_SKIP_ENABLED_FLAG			BW_FIELD(171, 1)
80 #define CU_QP_DELTA_ENABLED_FLAG			BW_FIELD(172, 1)
81 #define LOG2_MIN_CU_QP_DELTA_SIZE			BW_FIELD(173, 3)
82 #define PPS_CB_QP_OFFSET				BW_FIELD(176, 5)
83 #define PPS_CR_QP_OFFSET				BW_FIELD(181, 5)
84 #define PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT_FLAG	BW_FIELD(186, 1)
85 #define WEIGHTED_PRED_FLAG				BW_FIELD(187, 1)
86 #define WEIGHTED_BIPRED_FLAG				BW_FIELD(188, 1)
87 #define TRANSQUANT_BYPASS_ENABLED_FLAG			BW_FIELD(189, 1)
88 #define TILES_ENABLED_FLAG				BW_FIELD(190, 1)
89 #define ENTROPY_CODING_SYNC_ENABLED_FLAG		BW_FIELD(191, 1)
90 #define PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED_FLAG	BW_FIELD(192, 1)
91 #define LOOP_FILTER_ACROSS_TILES_ENABLED_FLAG		BW_FIELD(193, 1)
92 #define DEBLOCKING_FILTER_OVERRIDE_ENABLED_FLAG		BW_FIELD(194, 1)
93 #define PPS_DEBLOCKING_FILTER_DISABLED_FLAG		BW_FIELD(195, 1)
94 #define PPS_BETA_OFFSET_DIV2				BW_FIELD(196, 4)
95 #define PPS_TC_OFFSET_DIV2				BW_FIELD(200, 4)
96 #define LISTS_MODIFICATION_PRESENT_FLAG			BW_FIELD(204, 1)
97 #define LOG2_PARALLEL_MERGE_LEVEL			BW_FIELD(205, 3)
98 #define SLICE_SEGMENT_HEADER_EXTENSION_PRESENT_FLAG	BW_FIELD(208, 1)
99 #define NUM_TILE_COLUMNS				BW_FIELD(212, 5)
100 #define NUM_TILE_ROWS					BW_FIELD(217, 5)
101 #define COLUMN_WIDTH(i)					BW_FIELD(256 + ((i) * 8), 8)
102 #define ROW_HEIGHT(i)					BW_FIELD(416 + ((i) * 8), 8)
103 #define SCALING_LIST_ADDRESS				BW_FIELD(592, 32)
104 
105 /* Data structure describing auxiliary buffer format. */
106 struct rkvdec_hevc_priv_tbl {
107 	u8 cabac_table[RKV_HEVC_CABAC_TABLE_SIZE];
108 	struct scaling_factor scaling_list;
109 	struct rkvdec_sps_pps_packet param_set[RKV_PPS_LEN];
110 	struct rkvdec_rps_packet rps[RKV_RPS_LEN];
111 };
112 
113 struct rkvdec_hevc_ctx {
114 	struct rkvdec_aux_buf priv_tbl;
115 	struct v4l2_ctrl_hevc_scaling_matrix scaling_matrix_cache;
116 	struct rkvdec_regs regs;
117 };
118 
119 static void assemble_hw_pps(struct rkvdec_ctx *ctx,
120 			    struct rkvdec_hevc_run *run)
121 {
122 	struct rkvdec_hevc_ctx *hevc_ctx = ctx->priv;
123 	const struct v4l2_ctrl_hevc_sps *sps = run->sps;
124 	const struct v4l2_ctrl_hevc_pps *pps = run->pps;
125 	struct rkvdec_hevc_priv_tbl *priv_tbl = hevc_ctx->priv_tbl.cpu;
126 	struct rkvdec_sps_pps_packet *hw_ps;
127 	u32 min_cb_log2_size_y, ctb_log2_size_y, ctb_size_y;
128 	u32 log2_min_cu_qp_delta_size, scaling_distance;
129 	dma_addr_t scaling_list_address;
130 	int i;
131 
132 	/*
133 	 * HW read the SPS/PPS information from PPS packet index by PPS id.
134 	 * offset from the base can be calculated by PPS_id * 80 (size per PPS
135 	 * packet unit). so the driver copy SPS/PPS information to the exact PPS
136 	 * packet unit for HW accessing.
137 	 */
138 	hw_ps = &priv_tbl->param_set[pps->pic_parameter_set_id];
139 	memset(hw_ps, 0, sizeof(*hw_ps));
140 
141 #define WRITE_PPS(value, field) rkvdec_set_bw_field(hw_ps->info, field, value)
142 	/* write sps */
143 	WRITE_PPS(sps->video_parameter_set_id, VIDEO_PARAMETER_SET_ID);
144 	WRITE_PPS(sps->seq_parameter_set_id, SEQ_PARAMETER_SET_ID);
145 	WRITE_PPS(sps->chroma_format_idc, CHROMA_FORMAT_IDC);
146 	WRITE_PPS(sps->pic_width_in_luma_samples, PIC_WIDTH_IN_LUMA_SAMPLES);
147 	WRITE_PPS(sps->pic_height_in_luma_samples, PIC_HEIGHT_IN_LUMA_SAMPLES);
148 	WRITE_PPS(sps->bit_depth_luma_minus8 + 8, BIT_DEPTH_LUMA);
149 	WRITE_PPS(sps->bit_depth_chroma_minus8 + 8, BIT_DEPTH_CHROMA);
150 	WRITE_PPS(sps->log2_max_pic_order_cnt_lsb_minus4 + 4,
151 		  LOG2_MAX_PIC_ORDER_CNT_LSB);
152 	WRITE_PPS(sps->log2_diff_max_min_luma_coding_block_size,
153 		  LOG2_DIFF_MAX_MIN_LUMA_CODING_BLOCK_SIZE);
154 	WRITE_PPS(sps->log2_min_luma_coding_block_size_minus3 + 3,
155 		  LOG2_MIN_LUMA_CODING_BLOCK_SIZE);
156 	WRITE_PPS(sps->log2_min_luma_transform_block_size_minus2 + 2,
157 		  LOG2_MIN_TRANSFORM_BLOCK_SIZE);
158 	WRITE_PPS(sps->log2_diff_max_min_luma_transform_block_size,
159 		  LOG2_DIFF_MAX_MIN_LUMA_TRANSFORM_BLOCK_SIZE);
160 	WRITE_PPS(sps->max_transform_hierarchy_depth_inter,
161 		  MAX_TRANSFORM_HIERARCHY_DEPTH_INTER);
162 	WRITE_PPS(sps->max_transform_hierarchy_depth_intra,
163 		  MAX_TRANSFORM_HIERARCHY_DEPTH_INTRA);
164 	WRITE_PPS(!!(sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED),
165 		  SCALING_LIST_ENABLED_FLAG);
166 	WRITE_PPS(!!(sps->flags & V4L2_HEVC_SPS_FLAG_AMP_ENABLED),
167 		  AMP_ENABLED_FLAG);
168 	WRITE_PPS(!!(sps->flags & V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET),
169 		  SAMPLE_ADAPTIVE_OFFSET_ENABLED_FLAG);
170 	if (sps->flags & V4L2_HEVC_SPS_FLAG_PCM_ENABLED) {
171 		WRITE_PPS(1, PCM_ENABLED_FLAG);
172 		WRITE_PPS(sps->pcm_sample_bit_depth_luma_minus1 + 1,
173 			  PCM_SAMPLE_BIT_DEPTH_LUMA);
174 		WRITE_PPS(sps->pcm_sample_bit_depth_chroma_minus1 + 1,
175 			  PCM_SAMPLE_BIT_DEPTH_CHROMA);
176 		WRITE_PPS(!!(sps->flags & V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED),
177 			  PCM_LOOP_FILTER_DISABLED_FLAG);
178 		WRITE_PPS(sps->log2_diff_max_min_pcm_luma_coding_block_size,
179 			  LOG2_DIFF_MAX_MIN_PCM_LUMA_CODING_BLOCK_SIZE);
180 		WRITE_PPS(sps->log2_min_pcm_luma_coding_block_size_minus3 + 3,
181 			  LOG2_MIN_PCM_LUMA_CODING_BLOCK_SIZE);
182 	}
183 	WRITE_PPS(sps->num_short_term_ref_pic_sets, NUM_SHORT_TERM_REF_PIC_SETS);
184 	WRITE_PPS(!!(sps->flags & V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT),
185 		  LONG_TERM_REF_PICS_PRESENT_FLAG);
186 	WRITE_PPS(sps->num_long_term_ref_pics_sps, NUM_LONG_TERM_REF_PICS_SPS);
187 	WRITE_PPS(!!(sps->flags & V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED),
188 		  SPS_TEMPORAL_MVP_ENABLED_FLAG);
189 	WRITE_PPS(!!(sps->flags & V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED),
190 		  STRONG_INTRA_SMOOTHING_ENABLED_FLAG);
191 
192 	/* write pps */
193 	WRITE_PPS(pps->pic_parameter_set_id, PIC_PARAMETER_SET_ID);
194 	WRITE_PPS(sps->seq_parameter_set_id, PPS_SEQ_PARAMETER_SET_ID);
195 	WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED),
196 		  DEPENDENT_SLICE_SEGMENTS_ENABLED_FLAG);
197 	WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT),
198 		  OUTPUT_FLAG_PRESENT_FLAG);
199 	WRITE_PPS(pps->num_extra_slice_header_bits, NUM_EXTRA_SLICE_HEADER_BITS);
200 	WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED),
201 		  SIGN_DATA_HIDING_ENABLED_FLAG);
202 	WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT),
203 		  CABAC_INIT_PRESENT_FLAG);
204 	WRITE_PPS(pps->num_ref_idx_l0_default_active_minus1 + 1,
205 		  NUM_REF_IDX_L0_DEFAULT_ACTIVE);
206 	WRITE_PPS(pps->num_ref_idx_l1_default_active_minus1 + 1,
207 		  NUM_REF_IDX_L1_DEFAULT_ACTIVE);
208 	WRITE_PPS(pps->init_qp_minus26, INIT_QP_MINUS26);
209 	WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED),
210 		  CONSTRAINED_INTRA_PRED_FLAG);
211 	WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED),
212 		  TRANSFORM_SKIP_ENABLED_FLAG);
213 	WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED),
214 		  CU_QP_DELTA_ENABLED_FLAG);
215 
216 	min_cb_log2_size_y = sps->log2_min_luma_coding_block_size_minus3 + 3;
217 	ctb_log2_size_y = min_cb_log2_size_y +
218 		sps->log2_diff_max_min_luma_coding_block_size;
219 	ctb_size_y = 1 << ctb_log2_size_y;
220 	log2_min_cu_qp_delta_size = ctb_log2_size_y - pps->diff_cu_qp_delta_depth;
221 	WRITE_PPS(log2_min_cu_qp_delta_size, LOG2_MIN_CU_QP_DELTA_SIZE);
222 	WRITE_PPS(pps->pps_cb_qp_offset, PPS_CB_QP_OFFSET);
223 	WRITE_PPS(pps->pps_cr_qp_offset, PPS_CR_QP_OFFSET);
224 	WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT),
225 		  PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT_FLAG);
226 	WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED),
227 		  WEIGHTED_PRED_FLAG);
228 	WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED),
229 		  WEIGHTED_BIPRED_FLAG);
230 	WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED),
231 		  TRANSQUANT_BYPASS_ENABLED_FLAG);
232 	WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED),
233 		  TILES_ENABLED_FLAG);
234 	WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED),
235 		  ENTROPY_CODING_SYNC_ENABLED_FLAG);
236 	WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED),
237 		  PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED_FLAG);
238 	WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED),
239 		  LOOP_FILTER_ACROSS_TILES_ENABLED_FLAG);
240 	WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED),
241 		  DEBLOCKING_FILTER_OVERRIDE_ENABLED_FLAG);
242 	WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER),
243 		  PPS_DEBLOCKING_FILTER_DISABLED_FLAG);
244 	WRITE_PPS(pps->pps_beta_offset_div2, PPS_BETA_OFFSET_DIV2);
245 	WRITE_PPS(pps->pps_tc_offset_div2, PPS_TC_OFFSET_DIV2);
246 	WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT),
247 		  LISTS_MODIFICATION_PRESENT_FLAG);
248 	WRITE_PPS(pps->log2_parallel_merge_level_minus2 + 2, LOG2_PARALLEL_MERGE_LEVEL);
249 	WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT),
250 		  SLICE_SEGMENT_HEADER_EXTENSION_PRESENT_FLAG);
251 	WRITE_PPS(pps->num_tile_columns_minus1 + 1, NUM_TILE_COLUMNS);
252 	WRITE_PPS(pps->num_tile_rows_minus1 + 1, NUM_TILE_ROWS);
253 
254 	if (pps->flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED) {
255 		/* Userspace also provide column width and row height for uniform spacing */
256 		for (i = 0; i <= pps->num_tile_columns_minus1; i++)
257 			WRITE_PPS(pps->column_width_minus1[i], COLUMN_WIDTH(i));
258 		for (i = 0; i <= pps->num_tile_rows_minus1; i++)
259 			WRITE_PPS(pps->row_height_minus1[i], ROW_HEIGHT(i));
260 	} else {
261 		WRITE_PPS(((sps->pic_width_in_luma_samples + ctb_size_y - 1) / ctb_size_y) - 1,
262 			  COLUMN_WIDTH(0));
263 		WRITE_PPS(((sps->pic_height_in_luma_samples + ctb_size_y - 1) / ctb_size_y) - 1,
264 			  ROW_HEIGHT(0));
265 	}
266 
267 	scaling_distance = offsetof(struct rkvdec_hevc_priv_tbl, scaling_list);
268 	scaling_list_address = hevc_ctx->priv_tbl.dma + scaling_distance;
269 	WRITE_PPS(scaling_list_address, SCALING_LIST_ADDRESS);
270 }
271 
272 /*
273  * Creation of the Reference Picture Set memory blob for the hardware.
274  * The layout looks like this:
275  * [0] 32 bits for L0 (6 references + 2 bits of the 7th reference)
276  * [1] 32 bits for L0 (remaining 3 bits of the 7th reference + 5 references
277  *     + 4 bits of the 13th reference)
278  * [2] 11 bits for L0 (remaining bit for 13 and 2 references) and
279  *     21 bits for L1 (4 references + first bit of 5)
280  * [3] 32 bits of padding with 0s
281  * [4] 32 bits for L1 (remaining 4 bits for 5 + 5 references + 3 bits of 11)
282  * [5] 22 bits for L1 (remaining 2 bits of 11 and 4 references)
283  *     lowdelay flag (bit 23), rps bit offset long term (bit 24 - 32)
284  * [6] rps bit offset long term (bit 1 - 3),  rps bit offset short term (bit 4 - 12)
285  *     number of references (bit 13 - 16), remaining 16 bits of padding with 0s
286  * [7] 32 bits of padding with 0s
287  *
288  * Thus we have to set up padding in between reference 5 of the L1 list.
289  */
290 static void assemble_sw_rps(struct rkvdec_ctx *ctx,
291 			    struct rkvdec_hevc_run *run)
292 {
293 	const struct v4l2_ctrl_hevc_decode_params *decode_params = run->decode_params;
294 	const struct v4l2_ctrl_hevc_sps *sps = run->sps;
295 	const struct v4l2_ctrl_hevc_slice_params *sl_params;
296 	const struct v4l2_hevc_dpb_entry *dpb;
297 	struct rkvdec_hevc_ctx *hevc_ctx = ctx->priv;
298 	struct rkvdec_hevc_priv_tbl *priv_tbl = hevc_ctx->priv_tbl.cpu;
299 	struct rkvdec_rps_packet *hw_ps;
300 	int i, j;
301 	unsigned int lowdelay;
302 
303 #define WRITE_RPS(value, field) rkvdec_set_bw_field(hw_ps->info, field, value)
304 
305 #define REF_PIC_LONG_TERM_L0(n)			BW_FIELD((n) * 5, 1)
306 #define REF_PIC_IDX_L0(n)			BW_FIELD(1 + ((n) * 5), 4)
307 #define REF_PIC_LONG_TERM_L1(n)			BW_FIELD(((n) < 5 ? 75 : 132) + ((n) * 5), 1)
308 #define REF_PIC_IDX_L1(n)			BW_FIELD(((n) < 4 ? 76 : 128) + ((n) * 5), 4)
309 
310 #define LOWDELAY				BW_FIELD(182, 1)
311 #define LONG_TERM_RPS_BIT_OFFSET		BW_FIELD(183, 10)
312 #define SHORT_TERM_RPS_BIT_OFFSET		BW_FIELD(193, 9)
313 #define NUM_RPS_POC				BW_FIELD(202, 4)
314 
315 	for (j = 0; j < run->num_slices; j++) {
316 		uint st_bit_offset = 0;
317 		uint num_l0_refs = 0;
318 		uint num_l1_refs = 0;
319 
320 		sl_params = &run->slices_params[j];
321 		dpb = decode_params->dpb;
322 
323 		if (sl_params->slice_type != V4L2_HEVC_SLICE_TYPE_I) {
324 			num_l0_refs = sl_params->num_ref_idx_l0_active_minus1 + 1;
325 
326 			if (sl_params->slice_type == V4L2_HEVC_SLICE_TYPE_B)
327 				num_l1_refs = sl_params->num_ref_idx_l1_active_minus1 + 1;
328 
329 			lowdelay = 1;
330 		} else {
331 			lowdelay = 0;
332 		}
333 
334 		hw_ps = &priv_tbl->rps[j];
335 		memset(hw_ps, 0, sizeof(*hw_ps));
336 
337 		for (i = 0; i < num_l0_refs; i++) {
338 			const struct v4l2_hevc_dpb_entry dpb_l0 = dpb[sl_params->ref_idx_l0[i]];
339 
340 			WRITE_RPS(!!(dpb_l0.flags & V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE),
341 				  REF_PIC_LONG_TERM_L0(i));
342 			WRITE_RPS(sl_params->ref_idx_l0[i], REF_PIC_IDX_L0(i));
343 
344 			if (dpb_l0.pic_order_cnt_val > sl_params->slice_pic_order_cnt)
345 				lowdelay = 0;
346 		}
347 
348 		for (i = 0; i < num_l1_refs; i++) {
349 			const struct v4l2_hevc_dpb_entry dpb_l1 = dpb[sl_params->ref_idx_l1[i]];
350 			int is_long_term =
351 				!!(dpb_l1.flags & V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE);
352 
353 			WRITE_RPS(is_long_term, REF_PIC_LONG_TERM_L1(i));
354 			WRITE_RPS(sl_params->ref_idx_l1[i], REF_PIC_IDX_L1(i));
355 
356 			if (dpb_l1.pic_order_cnt_val > sl_params->slice_pic_order_cnt)
357 				lowdelay = 0;
358 		}
359 
360 		WRITE_RPS(lowdelay, LOWDELAY);
361 
362 		if (!(decode_params->flags & V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC)) {
363 			if (sl_params->short_term_ref_pic_set_size)
364 				st_bit_offset = sl_params->short_term_ref_pic_set_size;
365 			else if (sps->num_short_term_ref_pic_sets > 1)
366 				st_bit_offset = fls(sps->num_short_term_ref_pic_sets - 1);
367 		}
368 
369 		WRITE_RPS(st_bit_offset + sl_params->long_term_ref_pic_set_size,
370 			  LONG_TERM_RPS_BIT_OFFSET);
371 		WRITE_RPS(sl_params->short_term_ref_pic_set_size,
372 			  SHORT_TERM_RPS_BIT_OFFSET);
373 
374 		WRITE_RPS(decode_params->num_poc_st_curr_before +
375 			  decode_params->num_poc_st_curr_after +
376 			  decode_params->num_poc_lt_curr,
377 			  NUM_RPS_POC);
378 	}
379 }
380 
381 static void config_registers(struct rkvdec_ctx *ctx,
382 			     struct rkvdec_hevc_run *run)
383 {
384 	struct rkvdec_dev *rkvdec = ctx->dev;
385 	const struct v4l2_ctrl_hevc_decode_params *decode_params = run->decode_params;
386 	const struct v4l2_ctrl_hevc_sps *sps = run->sps;
387 	const struct v4l2_ctrl_hevc_slice_params *sl_params = &run->slices_params[0];
388 	const struct v4l2_hevc_dpb_entry *dpb = decode_params->dpb;
389 	struct rkvdec_hevc_ctx *hevc_ctx = ctx->priv;
390 	struct rkvdec_regs *regs = &hevc_ctx->regs;
391 	dma_addr_t priv_start_addr = hevc_ctx->priv_tbl.dma;
392 	const struct v4l2_pix_format_mplane *dst_fmt;
393 	struct vb2_v4l2_buffer *src_buf = run->base.bufs.src;
394 	struct vb2_v4l2_buffer *dst_buf = run->base.bufs.dst;
395 	const struct v4l2_format *f;
396 	dma_addr_t rlc_addr;
397 	dma_addr_t refer_addr;
398 	u32 rlc_len;
399 	u32 hor_virstride;
400 	u32 ver_virstride;
401 	u32 y_virstride;
402 	u32 yuv_virstride = 0;
403 	u32 offset;
404 	dma_addr_t dst_addr;
405 	u32 reg, i;
406 
407 	memset(regs, 0, sizeof(*regs));
408 
409 	regs->common.reg02.dec_mode = RKVDEC_MODE_HEVC;
410 
411 	f = &ctx->decoded_fmt;
412 	dst_fmt = &f->fmt.pix_mp;
413 	hor_virstride = dst_fmt->plane_fmt[0].bytesperline;
414 	ver_virstride = dst_fmt->height;
415 	y_virstride = hor_virstride * ver_virstride;
416 
417 	if (sps->chroma_format_idc == 0)
418 		yuv_virstride = y_virstride;
419 	else if (sps->chroma_format_idc == 1)
420 		yuv_virstride = y_virstride + y_virstride / 2;
421 	else if (sps->chroma_format_idc == 2)
422 		yuv_virstride = 2 * y_virstride;
423 
424 	regs->common.reg03.slice_num_lowbits = run->num_slices;
425 	regs->common.reg03.uv_hor_virstride = hor_virstride / 16;
426 	regs->common.reg03.y_hor_virstride = hor_virstride / 16;
427 
428 	/* config rlc base address */
429 	rlc_addr = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0);
430 	regs->common.strm_rlc_base = rlc_addr;
431 
432 	rlc_len = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
433 	regs->common.stream_len = round_up(rlc_len, 16) + 64;
434 
435 	/* config cabac table */
436 	offset = offsetof(struct rkvdec_hevc_priv_tbl, cabac_table);
437 	regs->common.cabactbl_base = priv_start_addr + offset;
438 
439 	/* config output base address */
440 	dst_addr = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
441 	regs->common.decout_base = dst_addr;
442 
443 	regs->common.reg08.y_virstride = y_virstride / 16;
444 	regs->common.reg09.yuv_virstride = yuv_virstride / 16;
445 
446 	/* config ref pic address */
447 	for (i = 0; i < 15; i++) {
448 		struct vb2_buffer *vb_buf = get_ref_buf(ctx, run, i);
449 
450 		if (i < 4 && decode_params->num_active_dpb_entries) {
451 			reg = GENMASK(decode_params->num_active_dpb_entries - 1, 0);
452 			reg = (reg >> (i * 4)) & 0xf;
453 		} else {
454 			reg = 0;
455 		}
456 
457 		refer_addr = vb2_dma_contig_plane_dma_addr(vb_buf, 0);
458 
459 		regs->h26x.ref0_14_base[i].base_addr = refer_addr >> 4;
460 		regs->h26x.ref0_14_base[i].field_ref = !!(reg & 1);
461 		regs->h26x.ref0_14_base[i].topfield_used_ref = !!(reg & 2);
462 		regs->h26x.ref0_14_base[i].botfield_used_ref = !!(reg & 4);
463 		regs->h26x.ref0_14_base[i].colmv_use_flag_ref = !!(reg & 8);
464 
465 		regs->h26x.ref0_14_poc[i] = i < decode_params->num_active_dpb_entries
466 					    ? dpb[i].pic_order_cnt_val
467 					    : 0;
468 	}
469 
470 	regs->h26x.cur_poc = sl_params->slice_pic_order_cnt;
471 
472 	/* config hw pps address */
473 	offset = offsetof(struct rkvdec_hevc_priv_tbl, param_set);
474 	regs->h26x.pps_base = priv_start_addr + offset;
475 
476 	/* config hw rps address */
477 	offset = offsetof(struct rkvdec_hevc_priv_tbl, rps);
478 	regs->h26x.rps_base = priv_start_addr + offset;
479 
480 	rkvdec_memcpy_toio(rkvdec->regs, regs,
481 			   MIN(sizeof(*regs), sizeof(u32) * rkvdec->variant->num_regs));
482 }
483 
484 static int rkvdec_hevc_validate_sps(struct rkvdec_ctx *ctx,
485 				    const struct v4l2_ctrl_hevc_sps *sps)
486 {
487 	if (sps->chroma_format_idc > 1)
488 		/* Only 4:0:0 and 4:2:0 are supported */
489 		return -EINVAL;
490 	if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8)
491 		/* Luma and chroma bit depth mismatch */
492 		return -EINVAL;
493 	if (sps->bit_depth_luma_minus8 != 0 && sps->bit_depth_luma_minus8 != 2)
494 		/* Only 8-bit and 10-bit are supported */
495 		return -EINVAL;
496 
497 	if (sps->pic_width_in_luma_samples > ctx->coded_fmt.fmt.pix_mp.width ||
498 	    sps->pic_height_in_luma_samples > ctx->coded_fmt.fmt.pix_mp.height)
499 		return -EINVAL;
500 
501 	return 0;
502 }
503 
504 static int rkvdec_hevc_start(struct rkvdec_ctx *ctx)
505 {
506 	struct rkvdec_dev *rkvdec = ctx->dev;
507 	struct rkvdec_hevc_priv_tbl *priv_tbl;
508 	struct rkvdec_hevc_ctx *hevc_ctx;
509 
510 	hevc_ctx = kzalloc_obj(*hevc_ctx);
511 	if (!hevc_ctx)
512 		return -ENOMEM;
513 
514 	priv_tbl = dma_alloc_coherent(rkvdec->dev, sizeof(*priv_tbl),
515 				      &hevc_ctx->priv_tbl.dma, GFP_KERNEL);
516 	if (!priv_tbl) {
517 		kfree(hevc_ctx);
518 		return -ENOMEM;
519 	}
520 
521 	hevc_ctx->priv_tbl.size = sizeof(*priv_tbl);
522 	hevc_ctx->priv_tbl.cpu = priv_tbl;
523 	memcpy(priv_tbl->cabac_table, rkvdec_hevc_cabac_table,
524 	       sizeof(rkvdec_hevc_cabac_table));
525 
526 	ctx->priv = hevc_ctx;
527 	return 0;
528 }
529 
530 static void rkvdec_hevc_stop(struct rkvdec_ctx *ctx)
531 {
532 	struct rkvdec_hevc_ctx *hevc_ctx = ctx->priv;
533 	struct rkvdec_dev *rkvdec = ctx->dev;
534 
535 	dma_free_coherent(rkvdec->dev, hevc_ctx->priv_tbl.size,
536 			  hevc_ctx->priv_tbl.cpu, hevc_ctx->priv_tbl.dma);
537 	kfree(hevc_ctx);
538 }
539 
540 static int rkvdec_hevc_run(struct rkvdec_ctx *ctx)
541 {
542 	struct rkvdec_dev *rkvdec = ctx->dev;
543 	struct rkvdec_hevc_run run;
544 	struct rkvdec_hevc_ctx *hevc_ctx = ctx->priv;
545 	struct rkvdec_hevc_priv_tbl *tbl = hevc_ctx->priv_tbl.cpu;
546 	u32 reg;
547 
548 	rkvdec_hevc_run_preamble(ctx, &run);
549 
550 	rkvdec_hevc_assemble_hw_scaling_list(ctx, &run, &tbl->scaling_list,
551 					     &hevc_ctx->scaling_matrix_cache);
552 	assemble_hw_pps(ctx, &run);
553 	assemble_sw_rps(ctx, &run);
554 	config_registers(ctx, &run);
555 
556 	rkvdec_run_postamble(ctx, &run.base);
557 
558 	schedule_delayed_work(&rkvdec->watchdog_work, msecs_to_jiffies(2000));
559 
560 	writel(1, rkvdec->regs + RKVDEC_REG_PREF_LUMA_CACHE_COMMAND);
561 	writel(1, rkvdec->regs + RKVDEC_REG_PREF_CHR_CACHE_COMMAND);
562 
563 	if (rkvdec->variant->quirks & RKVDEC_QUIRK_DISABLE_QOS)
564 		rkvdec_quirks_disable_qos(ctx);
565 
566 	/* Start decoding! */
567 	reg = (run.pps->flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED) ?
568 		0 : RKVDEC_WR_DDR_ALIGN_EN;
569 	writel(RKVDEC_INTERRUPT_DEC_E | RKVDEC_CONFIG_DEC_CLK_GATE_E |
570 	       RKVDEC_TIMEOUT_E | RKVDEC_BUF_EMPTY_E | reg,
571 	       rkvdec->regs + RKVDEC_REG_INTERRUPT);
572 
573 	return 0;
574 }
575 
576 static int rkvdec_hevc_try_ctrl(struct rkvdec_ctx *ctx, struct v4l2_ctrl *ctrl)
577 {
578 	if (ctrl->id == V4L2_CID_STATELESS_HEVC_SPS)
579 		return rkvdec_hevc_validate_sps(ctx, ctrl->p_new.p_hevc_sps);
580 
581 	return 0;
582 }
583 
584 const struct rkvdec_coded_fmt_ops rkvdec_hevc_fmt_ops = {
585 	.adjust_fmt = rkvdec_hevc_adjust_fmt,
586 	.start = rkvdec_hevc_start,
587 	.stop = rkvdec_hevc_stop,
588 	.run = rkvdec_hevc_run,
589 	.try_ctrl = rkvdec_hevc_try_ctrl,
590 	.get_image_fmt = rkvdec_hevc_get_image_fmt,
591 };
592