1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Rockchip Video Decoder HEVC backend
4 *
5 * Copyright (C) 2023 Collabora, Ltd.
6 * Sebastian Fricke <sebastian.fricke@collabora.com>
7 *
8 * Copyright (C) 2019 Collabora, Ltd.
9 * Boris Brezillon <boris.brezillon@collabora.com>
10 *
11 * Copyright (C) 2016 Rockchip Electronics Co., Ltd.
12 * Jeffy Chen <jeffy.chen@rock-chips.com>
13 */
14
15 #include <media/v4l2-mem2mem.h>
16
17 #include "rkvdec.h"
18 #include "rkvdec-regs.h"
19 #include "rkvdec-cabac.h"
20 #include "rkvdec-hevc-common.h"
21
22 /* Size in u8/u32 units. */
23 #define RKV_SCALING_LIST_SIZE 1360
24 #define RKV_PPS_SIZE (80 / 4)
25 #define RKV_PPS_LEN 64
26 #define RKV_RPS_SIZE (32 / 4)
27 #define RKV_RPS_LEN 600
28
29 struct rkvdec_sps_pps_packet {
30 u32 info[RKV_PPS_SIZE];
31 };
32
33 struct rkvdec_rps_packet {
34 u32 info[RKV_RPS_SIZE];
35 };
36
37 struct rkvdec_ps_field {
38 u16 offset;
39 u8 len;
40 };
41
42 #define PS_FIELD(_offset, _len) \
43 ((struct rkvdec_ps_field){ _offset, _len })
44
45 /* SPS */
46 #define VIDEO_PARAMETER_SET_ID PS_FIELD(0, 4)
47 #define SEQ_PARAMETER_SET_ID PS_FIELD(4, 4)
48 #define CHROMA_FORMAT_IDC PS_FIELD(8, 2)
49 #define PIC_WIDTH_IN_LUMA_SAMPLES PS_FIELD(10, 13)
50 #define PIC_HEIGHT_IN_LUMA_SAMPLES PS_FIELD(23, 13)
51 #define BIT_DEPTH_LUMA PS_FIELD(36, 4)
52 #define BIT_DEPTH_CHROMA PS_FIELD(40, 4)
53 #define LOG2_MAX_PIC_ORDER_CNT_LSB PS_FIELD(44, 5)
54 #define LOG2_DIFF_MAX_MIN_LUMA_CODING_BLOCK_SIZE PS_FIELD(49, 2)
55 #define LOG2_MIN_LUMA_CODING_BLOCK_SIZE PS_FIELD(51, 3)
56 #define LOG2_MIN_TRANSFORM_BLOCK_SIZE PS_FIELD(54, 3)
57 #define LOG2_DIFF_MAX_MIN_LUMA_TRANSFORM_BLOCK_SIZE PS_FIELD(57, 2)
58 #define MAX_TRANSFORM_HIERARCHY_DEPTH_INTER PS_FIELD(59, 3)
59 #define MAX_TRANSFORM_HIERARCHY_DEPTH_INTRA PS_FIELD(62, 3)
60 #define SCALING_LIST_ENABLED_FLAG PS_FIELD(65, 1)
61 #define AMP_ENABLED_FLAG PS_FIELD(66, 1)
62 #define SAMPLE_ADAPTIVE_OFFSET_ENABLED_FLAG PS_FIELD(67, 1)
63 #define PCM_ENABLED_FLAG PS_FIELD(68, 1)
64 #define PCM_SAMPLE_BIT_DEPTH_LUMA PS_FIELD(69, 4)
65 #define PCM_SAMPLE_BIT_DEPTH_CHROMA PS_FIELD(73, 4)
66 #define PCM_LOOP_FILTER_DISABLED_FLAG PS_FIELD(77, 1)
67 #define LOG2_DIFF_MAX_MIN_PCM_LUMA_CODING_BLOCK_SIZE PS_FIELD(78, 3)
68 #define LOG2_MIN_PCM_LUMA_CODING_BLOCK_SIZE PS_FIELD(81, 3)
69 #define NUM_SHORT_TERM_REF_PIC_SETS PS_FIELD(84, 7)
70 #define LONG_TERM_REF_PICS_PRESENT_FLAG PS_FIELD(91, 1)
71 #define NUM_LONG_TERM_REF_PICS_SPS PS_FIELD(92, 6)
72 #define SPS_TEMPORAL_MVP_ENABLED_FLAG PS_FIELD(98, 1)
73 #define STRONG_INTRA_SMOOTHING_ENABLED_FLAG PS_FIELD(99, 1)
74 /* PPS */
75 #define PIC_PARAMETER_SET_ID PS_FIELD(128, 6)
76 #define PPS_SEQ_PARAMETER_SET_ID PS_FIELD(134, 4)
77 #define DEPENDENT_SLICE_SEGMENTS_ENABLED_FLAG PS_FIELD(138, 1)
78 #define OUTPUT_FLAG_PRESENT_FLAG PS_FIELD(139, 1)
79 #define NUM_EXTRA_SLICE_HEADER_BITS PS_FIELD(140, 13)
80 #define SIGN_DATA_HIDING_ENABLED_FLAG PS_FIELD(153, 1)
81 #define CABAC_INIT_PRESENT_FLAG PS_FIELD(154, 1)
82 #define NUM_REF_IDX_L0_DEFAULT_ACTIVE PS_FIELD(155, 4)
83 #define NUM_REF_IDX_L1_DEFAULT_ACTIVE PS_FIELD(159, 4)
84 #define INIT_QP_MINUS26 PS_FIELD(163, 7)
85 #define CONSTRAINED_INTRA_PRED_FLAG PS_FIELD(170, 1)
86 #define TRANSFORM_SKIP_ENABLED_FLAG PS_FIELD(171, 1)
87 #define CU_QP_DELTA_ENABLED_FLAG PS_FIELD(172, 1)
88 #define LOG2_MIN_CU_QP_DELTA_SIZE PS_FIELD(173, 3)
89 #define PPS_CB_QP_OFFSET PS_FIELD(176, 5)
90 #define PPS_CR_QP_OFFSET PS_FIELD(181, 5)
91 #define PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT_FLAG PS_FIELD(186, 1)
92 #define WEIGHTED_PRED_FLAG PS_FIELD(187, 1)
93 #define WEIGHTED_BIPRED_FLAG PS_FIELD(188, 1)
94 #define TRANSQUANT_BYPASS_ENABLED_FLAG PS_FIELD(189, 1)
95 #define TILES_ENABLED_FLAG PS_FIELD(190, 1)
96 #define ENTROPY_CODING_SYNC_ENABLED_FLAG PS_FIELD(191, 1)
97 #define PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED_FLAG PS_FIELD(192, 1)
98 #define LOOP_FILTER_ACROSS_TILES_ENABLED_FLAG PS_FIELD(193, 1)
99 #define DEBLOCKING_FILTER_OVERRIDE_ENABLED_FLAG PS_FIELD(194, 1)
100 #define PPS_DEBLOCKING_FILTER_DISABLED_FLAG PS_FIELD(195, 1)
101 #define PPS_BETA_OFFSET_DIV2 PS_FIELD(196, 4)
102 #define PPS_TC_OFFSET_DIV2 PS_FIELD(200, 4)
103 #define LISTS_MODIFICATION_PRESENT_FLAG PS_FIELD(204, 1)
104 #define LOG2_PARALLEL_MERGE_LEVEL PS_FIELD(205, 3)
105 #define SLICE_SEGMENT_HEADER_EXTENSION_PRESENT_FLAG PS_FIELD(208, 1)
106 #define NUM_TILE_COLUMNS PS_FIELD(212, 5)
107 #define NUM_TILE_ROWS PS_FIELD(217, 5)
108 #define COLUMN_WIDTH(i) PS_FIELD(256 + ((i) * 8), 8)
109 #define ROW_HEIGHT(i) PS_FIELD(416 + ((i) * 8), 8)
110 #define SCALING_LIST_ADDRESS PS_FIELD(592, 32)
111
112 /* Data structure describing auxiliary buffer format. */
113 struct rkvdec_hevc_priv_tbl {
114 u8 cabac_table[RKV_HEVC_CABAC_TABLE_SIZE];
115 struct scaling_factor scaling_list;
116 struct rkvdec_sps_pps_packet param_set[RKV_PPS_LEN];
117 struct rkvdec_rps_packet rps[RKV_RPS_LEN];
118 };
119
120 struct rkvdec_hevc_ctx {
121 struct rkvdec_aux_buf priv_tbl;
122 struct v4l2_ctrl_hevc_scaling_matrix scaling_matrix_cache;
123 struct rkvdec_regs regs;
124 };
125
set_ps_field(u32 * buf,struct rkvdec_ps_field field,u32 value)126 static void set_ps_field(u32 *buf, struct rkvdec_ps_field field, u32 value)
127 {
128 u8 bit = field.offset % 32, word = field.offset / 32;
129 u64 mask = GENMASK_ULL(bit + field.len - 1, bit);
130 u64 val = ((u64)value << bit) & mask;
131
132 buf[word] &= ~mask;
133 buf[word] |= val;
134 if (bit + field.len > 32) {
135 buf[word + 1] &= ~(mask >> 32);
136 buf[word + 1] |= val >> 32;
137 }
138 }
139
assemble_hw_pps(struct rkvdec_ctx * ctx,struct rkvdec_hevc_run * run)140 static void assemble_hw_pps(struct rkvdec_ctx *ctx,
141 struct rkvdec_hevc_run *run)
142 {
143 struct rkvdec_hevc_ctx *hevc_ctx = ctx->priv;
144 const struct v4l2_ctrl_hevc_sps *sps = run->sps;
145 const struct v4l2_ctrl_hevc_pps *pps = run->pps;
146 struct rkvdec_hevc_priv_tbl *priv_tbl = hevc_ctx->priv_tbl.cpu;
147 struct rkvdec_sps_pps_packet *hw_ps;
148 u32 min_cb_log2_size_y, ctb_log2_size_y, ctb_size_y;
149 u32 log2_min_cu_qp_delta_size, scaling_distance;
150 dma_addr_t scaling_list_address;
151 int i;
152
153 /*
154 * HW read the SPS/PPS information from PPS packet index by PPS id.
155 * offset from the base can be calculated by PPS_id * 80 (size per PPS
156 * packet unit). so the driver copy SPS/PPS information to the exact PPS
157 * packet unit for HW accessing.
158 */
159 hw_ps = &priv_tbl->param_set[pps->pic_parameter_set_id];
160 memset(hw_ps, 0, sizeof(*hw_ps));
161
162 #define WRITE_PPS(value, field) set_ps_field(hw_ps->info, field, value)
163 /* write sps */
164 WRITE_PPS(sps->video_parameter_set_id, VIDEO_PARAMETER_SET_ID);
165 WRITE_PPS(sps->seq_parameter_set_id, SEQ_PARAMETER_SET_ID);
166 WRITE_PPS(sps->chroma_format_idc, CHROMA_FORMAT_IDC);
167 WRITE_PPS(sps->pic_width_in_luma_samples, PIC_WIDTH_IN_LUMA_SAMPLES);
168 WRITE_PPS(sps->pic_height_in_luma_samples, PIC_HEIGHT_IN_LUMA_SAMPLES);
169 WRITE_PPS(sps->bit_depth_luma_minus8 + 8, BIT_DEPTH_LUMA);
170 WRITE_PPS(sps->bit_depth_chroma_minus8 + 8, BIT_DEPTH_CHROMA);
171 WRITE_PPS(sps->log2_max_pic_order_cnt_lsb_minus4 + 4,
172 LOG2_MAX_PIC_ORDER_CNT_LSB);
173 WRITE_PPS(sps->log2_diff_max_min_luma_coding_block_size,
174 LOG2_DIFF_MAX_MIN_LUMA_CODING_BLOCK_SIZE);
175 WRITE_PPS(sps->log2_min_luma_coding_block_size_minus3 + 3,
176 LOG2_MIN_LUMA_CODING_BLOCK_SIZE);
177 WRITE_PPS(sps->log2_min_luma_transform_block_size_minus2 + 2,
178 LOG2_MIN_TRANSFORM_BLOCK_SIZE);
179 WRITE_PPS(sps->log2_diff_max_min_luma_transform_block_size,
180 LOG2_DIFF_MAX_MIN_LUMA_TRANSFORM_BLOCK_SIZE);
181 WRITE_PPS(sps->max_transform_hierarchy_depth_inter,
182 MAX_TRANSFORM_HIERARCHY_DEPTH_INTER);
183 WRITE_PPS(sps->max_transform_hierarchy_depth_intra,
184 MAX_TRANSFORM_HIERARCHY_DEPTH_INTRA);
185 WRITE_PPS(!!(sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED),
186 SCALING_LIST_ENABLED_FLAG);
187 WRITE_PPS(!!(sps->flags & V4L2_HEVC_SPS_FLAG_AMP_ENABLED),
188 AMP_ENABLED_FLAG);
189 WRITE_PPS(!!(sps->flags & V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET),
190 SAMPLE_ADAPTIVE_OFFSET_ENABLED_FLAG);
191 if (sps->flags & V4L2_HEVC_SPS_FLAG_PCM_ENABLED) {
192 WRITE_PPS(1, PCM_ENABLED_FLAG);
193 WRITE_PPS(sps->pcm_sample_bit_depth_luma_minus1 + 1,
194 PCM_SAMPLE_BIT_DEPTH_LUMA);
195 WRITE_PPS(sps->pcm_sample_bit_depth_chroma_minus1 + 1,
196 PCM_SAMPLE_BIT_DEPTH_CHROMA);
197 WRITE_PPS(!!(sps->flags & V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED),
198 PCM_LOOP_FILTER_DISABLED_FLAG);
199 WRITE_PPS(sps->log2_diff_max_min_pcm_luma_coding_block_size,
200 LOG2_DIFF_MAX_MIN_PCM_LUMA_CODING_BLOCK_SIZE);
201 WRITE_PPS(sps->log2_min_pcm_luma_coding_block_size_minus3 + 3,
202 LOG2_MIN_PCM_LUMA_CODING_BLOCK_SIZE);
203 }
204 WRITE_PPS(sps->num_short_term_ref_pic_sets, NUM_SHORT_TERM_REF_PIC_SETS);
205 WRITE_PPS(!!(sps->flags & V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT),
206 LONG_TERM_REF_PICS_PRESENT_FLAG);
207 WRITE_PPS(sps->num_long_term_ref_pics_sps, NUM_LONG_TERM_REF_PICS_SPS);
208 WRITE_PPS(!!(sps->flags & V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED),
209 SPS_TEMPORAL_MVP_ENABLED_FLAG);
210 WRITE_PPS(!!(sps->flags & V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED),
211 STRONG_INTRA_SMOOTHING_ENABLED_FLAG);
212
213 /* write pps */
214 WRITE_PPS(pps->pic_parameter_set_id, PIC_PARAMETER_SET_ID);
215 WRITE_PPS(sps->seq_parameter_set_id, PPS_SEQ_PARAMETER_SET_ID);
216 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED),
217 DEPENDENT_SLICE_SEGMENTS_ENABLED_FLAG);
218 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT),
219 OUTPUT_FLAG_PRESENT_FLAG);
220 WRITE_PPS(pps->num_extra_slice_header_bits, NUM_EXTRA_SLICE_HEADER_BITS);
221 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED),
222 SIGN_DATA_HIDING_ENABLED_FLAG);
223 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT),
224 CABAC_INIT_PRESENT_FLAG);
225 WRITE_PPS(pps->num_ref_idx_l0_default_active_minus1 + 1,
226 NUM_REF_IDX_L0_DEFAULT_ACTIVE);
227 WRITE_PPS(pps->num_ref_idx_l1_default_active_minus1 + 1,
228 NUM_REF_IDX_L1_DEFAULT_ACTIVE);
229 WRITE_PPS(pps->init_qp_minus26, INIT_QP_MINUS26);
230 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED),
231 CONSTRAINED_INTRA_PRED_FLAG);
232 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED),
233 TRANSFORM_SKIP_ENABLED_FLAG);
234 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED),
235 CU_QP_DELTA_ENABLED_FLAG);
236
237 min_cb_log2_size_y = sps->log2_min_luma_coding_block_size_minus3 + 3;
238 ctb_log2_size_y = min_cb_log2_size_y +
239 sps->log2_diff_max_min_luma_coding_block_size;
240 ctb_size_y = 1 << ctb_log2_size_y;
241 log2_min_cu_qp_delta_size = ctb_log2_size_y - pps->diff_cu_qp_delta_depth;
242 WRITE_PPS(log2_min_cu_qp_delta_size, LOG2_MIN_CU_QP_DELTA_SIZE);
243 WRITE_PPS(pps->pps_cb_qp_offset, PPS_CB_QP_OFFSET);
244 WRITE_PPS(pps->pps_cr_qp_offset, PPS_CR_QP_OFFSET);
245 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT),
246 PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT_FLAG);
247 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED),
248 WEIGHTED_PRED_FLAG);
249 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED),
250 WEIGHTED_BIPRED_FLAG);
251 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED),
252 TRANSQUANT_BYPASS_ENABLED_FLAG);
253 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED),
254 TILES_ENABLED_FLAG);
255 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED),
256 ENTROPY_CODING_SYNC_ENABLED_FLAG);
257 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED),
258 PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED_FLAG);
259 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED),
260 LOOP_FILTER_ACROSS_TILES_ENABLED_FLAG);
261 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED),
262 DEBLOCKING_FILTER_OVERRIDE_ENABLED_FLAG);
263 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER),
264 PPS_DEBLOCKING_FILTER_DISABLED_FLAG);
265 WRITE_PPS(pps->pps_beta_offset_div2, PPS_BETA_OFFSET_DIV2);
266 WRITE_PPS(pps->pps_tc_offset_div2, PPS_TC_OFFSET_DIV2);
267 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT),
268 LISTS_MODIFICATION_PRESENT_FLAG);
269 WRITE_PPS(pps->log2_parallel_merge_level_minus2 + 2, LOG2_PARALLEL_MERGE_LEVEL);
270 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT),
271 SLICE_SEGMENT_HEADER_EXTENSION_PRESENT_FLAG);
272 WRITE_PPS(pps->num_tile_columns_minus1 + 1, NUM_TILE_COLUMNS);
273 WRITE_PPS(pps->num_tile_rows_minus1 + 1, NUM_TILE_ROWS);
274
275 if (pps->flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED) {
276 /* Userspace also provide column width and row height for uniform spacing */
277 for (i = 0; i <= pps->num_tile_columns_minus1; i++)
278 WRITE_PPS(pps->column_width_minus1[i], COLUMN_WIDTH(i));
279 for (i = 0; i <= pps->num_tile_rows_minus1; i++)
280 WRITE_PPS(pps->row_height_minus1[i], ROW_HEIGHT(i));
281 } else {
282 WRITE_PPS(((sps->pic_width_in_luma_samples + ctb_size_y - 1) / ctb_size_y) - 1,
283 COLUMN_WIDTH(0));
284 WRITE_PPS(((sps->pic_height_in_luma_samples + ctb_size_y - 1) / ctb_size_y) - 1,
285 ROW_HEIGHT(0));
286 }
287
288 scaling_distance = offsetof(struct rkvdec_hevc_priv_tbl, scaling_list);
289 scaling_list_address = hevc_ctx->priv_tbl.dma + scaling_distance;
290 WRITE_PPS(scaling_list_address, SCALING_LIST_ADDRESS);
291 }
292
293 /*
294 * Creation of the Reference Picture Set memory blob for the hardware.
295 * The layout looks like this:
296 * [0] 32 bits for L0 (6 references + 2 bits of the 7th reference)
297 * [1] 32 bits for L0 (remaining 3 bits of the 7th reference + 5 references
298 * + 4 bits of the 13th reference)
299 * [2] 11 bits for L0 (remaining bit for 13 and 2 references) and
300 * 21 bits for L1 (4 references + first bit of 5)
301 * [3] 32 bits of padding with 0s
302 * [4] 32 bits for L1 (remaining 4 bits for 5 + 5 references + 3 bits of 11)
303 * [5] 22 bits for L1 (remaining 2 bits of 11 and 4 references)
304 * lowdelay flag (bit 23), rps bit offset long term (bit 24 - 32)
305 * [6] rps bit offset long term (bit 1 - 3), rps bit offset short term (bit 4 - 12)
306 * number of references (bit 13 - 16), remaining 16 bits of padding with 0s
307 * [7] 32 bits of padding with 0s
308 *
309 * Thus we have to set up padding in between reference 5 of the L1 list.
310 */
assemble_sw_rps(struct rkvdec_ctx * ctx,struct rkvdec_hevc_run * run)311 static void assemble_sw_rps(struct rkvdec_ctx *ctx,
312 struct rkvdec_hevc_run *run)
313 {
314 const struct v4l2_ctrl_hevc_decode_params *decode_params = run->decode_params;
315 const struct v4l2_ctrl_hevc_sps *sps = run->sps;
316 const struct v4l2_ctrl_hevc_slice_params *sl_params;
317 const struct v4l2_hevc_dpb_entry *dpb;
318 struct rkvdec_hevc_ctx *hevc_ctx = ctx->priv;
319 struct rkvdec_hevc_priv_tbl *priv_tbl = hevc_ctx->priv_tbl.cpu;
320 struct rkvdec_rps_packet *hw_ps;
321 int i, j;
322 unsigned int lowdelay;
323
324 #define WRITE_RPS(value, field) set_ps_field(hw_ps->info, field, value)
325
326 #define REF_PIC_LONG_TERM_L0(i) PS_FIELD((i) * 5, 1)
327 #define REF_PIC_IDX_L0(i) PS_FIELD(1 + ((i) * 5), 4)
328 #define REF_PIC_LONG_TERM_L1(i) PS_FIELD(((i) < 5 ? 75 : 132) + ((i) * 5), 1)
329 #define REF_PIC_IDX_L1(i) PS_FIELD(((i) < 4 ? 76 : 128) + ((i) * 5), 4)
330
331 #define LOWDELAY PS_FIELD(182, 1)
332 #define LONG_TERM_RPS_BIT_OFFSET PS_FIELD(183, 10)
333 #define SHORT_TERM_RPS_BIT_OFFSET PS_FIELD(193, 9)
334 #define NUM_RPS_POC PS_FIELD(202, 4)
335
336 for (j = 0; j < run->num_slices; j++) {
337 uint st_bit_offset = 0;
338 uint num_l0_refs = 0;
339 uint num_l1_refs = 0;
340
341 sl_params = &run->slices_params[j];
342 dpb = decode_params->dpb;
343
344 if (sl_params->slice_type != V4L2_HEVC_SLICE_TYPE_I) {
345 num_l0_refs = sl_params->num_ref_idx_l0_active_minus1 + 1;
346
347 if (sl_params->slice_type == V4L2_HEVC_SLICE_TYPE_B)
348 num_l1_refs = sl_params->num_ref_idx_l1_active_minus1 + 1;
349
350 lowdelay = 1;
351 } else {
352 lowdelay = 0;
353 }
354
355 hw_ps = &priv_tbl->rps[j];
356 memset(hw_ps, 0, sizeof(*hw_ps));
357
358 for (i = 0; i < num_l0_refs; i++) {
359 const struct v4l2_hevc_dpb_entry dpb_l0 = dpb[sl_params->ref_idx_l0[i]];
360
361 WRITE_RPS(!!(dpb_l0.flags & V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE),
362 REF_PIC_LONG_TERM_L0(i));
363 WRITE_RPS(sl_params->ref_idx_l0[i], REF_PIC_IDX_L0(i));
364
365 if (dpb_l0.pic_order_cnt_val > sl_params->slice_pic_order_cnt)
366 lowdelay = 0;
367 }
368
369 for (i = 0; i < num_l1_refs; i++) {
370 const struct v4l2_hevc_dpb_entry dpb_l1 = dpb[sl_params->ref_idx_l1[i]];
371 int is_long_term =
372 !!(dpb_l1.flags & V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE);
373
374 WRITE_RPS(is_long_term, REF_PIC_LONG_TERM_L1(i));
375 WRITE_RPS(sl_params->ref_idx_l1[i], REF_PIC_IDX_L1(i));
376
377 if (dpb_l1.pic_order_cnt_val > sl_params->slice_pic_order_cnt)
378 lowdelay = 0;
379 }
380
381 WRITE_RPS(lowdelay, LOWDELAY);
382
383 if (!(decode_params->flags & V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC)) {
384 if (sl_params->short_term_ref_pic_set_size)
385 st_bit_offset = sl_params->short_term_ref_pic_set_size;
386 else if (sps->num_short_term_ref_pic_sets > 1)
387 st_bit_offset = fls(sps->num_short_term_ref_pic_sets - 1);
388 }
389
390 WRITE_RPS(st_bit_offset + sl_params->long_term_ref_pic_set_size,
391 LONG_TERM_RPS_BIT_OFFSET);
392 WRITE_RPS(sl_params->short_term_ref_pic_set_size,
393 SHORT_TERM_RPS_BIT_OFFSET);
394
395 WRITE_RPS(decode_params->num_poc_st_curr_before +
396 decode_params->num_poc_st_curr_after +
397 decode_params->num_poc_lt_curr,
398 NUM_RPS_POC);
399 }
400 }
401
config_registers(struct rkvdec_ctx * ctx,struct rkvdec_hevc_run * run)402 static void config_registers(struct rkvdec_ctx *ctx,
403 struct rkvdec_hevc_run *run)
404 {
405 struct rkvdec_dev *rkvdec = ctx->dev;
406 const struct v4l2_ctrl_hevc_decode_params *decode_params = run->decode_params;
407 const struct v4l2_ctrl_hevc_sps *sps = run->sps;
408 const struct v4l2_ctrl_hevc_slice_params *sl_params = &run->slices_params[0];
409 const struct v4l2_hevc_dpb_entry *dpb = decode_params->dpb;
410 struct rkvdec_hevc_ctx *hevc_ctx = ctx->priv;
411 struct rkvdec_regs *regs = &hevc_ctx->regs;
412 dma_addr_t priv_start_addr = hevc_ctx->priv_tbl.dma;
413 const struct v4l2_pix_format_mplane *dst_fmt;
414 struct vb2_v4l2_buffer *src_buf = run->base.bufs.src;
415 struct vb2_v4l2_buffer *dst_buf = run->base.bufs.dst;
416 const struct v4l2_format *f;
417 dma_addr_t rlc_addr;
418 dma_addr_t refer_addr;
419 u32 rlc_len;
420 u32 hor_virstride;
421 u32 ver_virstride;
422 u32 y_virstride;
423 u32 yuv_virstride = 0;
424 u32 offset;
425 dma_addr_t dst_addr;
426 u32 reg, i;
427
428 memset(regs, 0, sizeof(*regs));
429
430 regs->common.reg02.dec_mode = RKVDEC_MODE_HEVC;
431
432 f = &ctx->decoded_fmt;
433 dst_fmt = &f->fmt.pix_mp;
434 hor_virstride = dst_fmt->plane_fmt[0].bytesperline;
435 ver_virstride = dst_fmt->height;
436 y_virstride = hor_virstride * ver_virstride;
437
438 if (sps->chroma_format_idc == 0)
439 yuv_virstride = y_virstride;
440 else if (sps->chroma_format_idc == 1)
441 yuv_virstride = y_virstride + y_virstride / 2;
442 else if (sps->chroma_format_idc == 2)
443 yuv_virstride = 2 * y_virstride;
444
445 regs->common.reg03.slice_num_lowbits = run->num_slices;
446 regs->common.reg03.uv_hor_virstride = hor_virstride / 16;
447 regs->common.reg03.y_hor_virstride = hor_virstride / 16;
448
449 /* config rlc base address */
450 rlc_addr = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0);
451 regs->common.strm_rlc_base = rlc_addr;
452
453 rlc_len = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
454 regs->common.stream_len = round_up(rlc_len, 16) + 64;
455
456 /* config cabac table */
457 offset = offsetof(struct rkvdec_hevc_priv_tbl, cabac_table);
458 regs->common.cabactbl_base = priv_start_addr + offset;
459
460 /* config output base address */
461 dst_addr = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
462 regs->common.decout_base = dst_addr;
463
464 regs->common.reg08.y_virstride = y_virstride / 16;
465 regs->common.reg09.yuv_virstride = yuv_virstride / 16;
466
467 /* config ref pic address */
468 for (i = 0; i < 15; i++) {
469 struct vb2_buffer *vb_buf = get_ref_buf(ctx, run, i);
470
471 if (i < 4 && decode_params->num_active_dpb_entries) {
472 reg = GENMASK(decode_params->num_active_dpb_entries - 1, 0);
473 reg = (reg >> (i * 4)) & 0xf;
474 } else {
475 reg = 0;
476 }
477
478 refer_addr = vb2_dma_contig_plane_dma_addr(vb_buf, 0);
479
480 regs->h26x.ref0_14_base[i].base_addr = refer_addr >> 4;
481 regs->h26x.ref0_14_base[i].field_ref = !!(reg & 1);
482 regs->h26x.ref0_14_base[i].topfield_used_ref = !!(reg & 2);
483 regs->h26x.ref0_14_base[i].botfield_used_ref = !!(reg & 4);
484 regs->h26x.ref0_14_base[i].colmv_use_flag_ref = !!(reg & 8);
485
486 regs->h26x.ref0_14_poc[i] = i < decode_params->num_active_dpb_entries
487 ? dpb[i].pic_order_cnt_val
488 : 0;
489 }
490
491 regs->h26x.cur_poc = sl_params->slice_pic_order_cnt;
492
493 /* config hw pps address */
494 offset = offsetof(struct rkvdec_hevc_priv_tbl, param_set);
495 regs->h26x.pps_base = priv_start_addr + offset;
496
497 /* config hw rps address */
498 offset = offsetof(struct rkvdec_hevc_priv_tbl, rps);
499 regs->h26x.rps_base = priv_start_addr + offset;
500
501 rkvdec_memcpy_toio(rkvdec->regs, regs,
502 MIN(sizeof(*regs), sizeof(u32) * rkvdec->variant->num_regs));
503 }
504
rkvdec_hevc_validate_sps(struct rkvdec_ctx * ctx,const struct v4l2_ctrl_hevc_sps * sps)505 static int rkvdec_hevc_validate_sps(struct rkvdec_ctx *ctx,
506 const struct v4l2_ctrl_hevc_sps *sps)
507 {
508 if (sps->chroma_format_idc > 1)
509 /* Only 4:0:0 and 4:2:0 are supported */
510 return -EINVAL;
511 if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8)
512 /* Luma and chroma bit depth mismatch */
513 return -EINVAL;
514 if (sps->bit_depth_luma_minus8 != 0 && sps->bit_depth_luma_minus8 != 2)
515 /* Only 8-bit and 10-bit are supported */
516 return -EINVAL;
517
518 if (sps->pic_width_in_luma_samples > ctx->coded_fmt.fmt.pix_mp.width ||
519 sps->pic_height_in_luma_samples > ctx->coded_fmt.fmt.pix_mp.height)
520 return -EINVAL;
521
522 return 0;
523 }
524
rkvdec_hevc_start(struct rkvdec_ctx * ctx)525 static int rkvdec_hevc_start(struct rkvdec_ctx *ctx)
526 {
527 struct rkvdec_dev *rkvdec = ctx->dev;
528 struct rkvdec_hevc_priv_tbl *priv_tbl;
529 struct rkvdec_hevc_ctx *hevc_ctx;
530
531 hevc_ctx = kzalloc_obj(*hevc_ctx);
532 if (!hevc_ctx)
533 return -ENOMEM;
534
535 priv_tbl = dma_alloc_coherent(rkvdec->dev, sizeof(*priv_tbl),
536 &hevc_ctx->priv_tbl.dma, GFP_KERNEL);
537 if (!priv_tbl) {
538 kfree(hevc_ctx);
539 return -ENOMEM;
540 }
541
542 hevc_ctx->priv_tbl.size = sizeof(*priv_tbl);
543 hevc_ctx->priv_tbl.cpu = priv_tbl;
544 memcpy(priv_tbl->cabac_table, rkvdec_hevc_cabac_table,
545 sizeof(rkvdec_hevc_cabac_table));
546
547 ctx->priv = hevc_ctx;
548 return 0;
549 }
550
rkvdec_hevc_stop(struct rkvdec_ctx * ctx)551 static void rkvdec_hevc_stop(struct rkvdec_ctx *ctx)
552 {
553 struct rkvdec_hevc_ctx *hevc_ctx = ctx->priv;
554 struct rkvdec_dev *rkvdec = ctx->dev;
555
556 dma_free_coherent(rkvdec->dev, hevc_ctx->priv_tbl.size,
557 hevc_ctx->priv_tbl.cpu, hevc_ctx->priv_tbl.dma);
558 kfree(hevc_ctx);
559 }
560
rkvdec_hevc_run(struct rkvdec_ctx * ctx)561 static int rkvdec_hevc_run(struct rkvdec_ctx *ctx)
562 {
563 struct rkvdec_dev *rkvdec = ctx->dev;
564 struct rkvdec_hevc_run run;
565 struct rkvdec_hevc_ctx *hevc_ctx = ctx->priv;
566 struct rkvdec_hevc_priv_tbl *tbl = hevc_ctx->priv_tbl.cpu;
567 u32 reg;
568
569 rkvdec_hevc_run_preamble(ctx, &run);
570
571 rkvdec_hevc_assemble_hw_scaling_list(ctx, &run, &tbl->scaling_list,
572 &hevc_ctx->scaling_matrix_cache);
573 assemble_hw_pps(ctx, &run);
574 assemble_sw_rps(ctx, &run);
575 config_registers(ctx, &run);
576
577 rkvdec_run_postamble(ctx, &run.base);
578
579 schedule_delayed_work(&rkvdec->watchdog_work, msecs_to_jiffies(2000));
580
581 writel(1, rkvdec->regs + RKVDEC_REG_PREF_LUMA_CACHE_COMMAND);
582 writel(1, rkvdec->regs + RKVDEC_REG_PREF_CHR_CACHE_COMMAND);
583
584 if (rkvdec->variant->quirks & RKVDEC_QUIRK_DISABLE_QOS)
585 rkvdec_quirks_disable_qos(ctx);
586
587 /* Start decoding! */
588 reg = (run.pps->flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED) ?
589 0 : RKVDEC_WR_DDR_ALIGN_EN;
590 writel(RKVDEC_INTERRUPT_DEC_E | RKVDEC_CONFIG_DEC_CLK_GATE_E |
591 RKVDEC_TIMEOUT_E | RKVDEC_BUF_EMPTY_E | reg,
592 rkvdec->regs + RKVDEC_REG_INTERRUPT);
593
594 return 0;
595 }
596
rkvdec_hevc_try_ctrl(struct rkvdec_ctx * ctx,struct v4l2_ctrl * ctrl)597 static int rkvdec_hevc_try_ctrl(struct rkvdec_ctx *ctx, struct v4l2_ctrl *ctrl)
598 {
599 if (ctrl->id == V4L2_CID_STATELESS_HEVC_SPS)
600 return rkvdec_hevc_validate_sps(ctx, ctrl->p_new.p_hevc_sps);
601
602 return 0;
603 }
604
605 const struct rkvdec_coded_fmt_ops rkvdec_hevc_fmt_ops = {
606 .adjust_fmt = rkvdec_hevc_adjust_fmt,
607 .start = rkvdec_hevc_start,
608 .stop = rkvdec_hevc_stop,
609 .run = rkvdec_hevc_run,
610 .try_ctrl = rkvdec_hevc_try_ctrl,
611 .get_image_fmt = rkvdec_hevc_get_image_fmt,
612 };
613