1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Rockchip VDPU383 HEVC backend 4 * 5 * Copyright (C) 2025 Collabora, Ltd. 6 * Detlev Casanova <detlev.casanova@collabora.com> 7 */ 8 9 #include <media/v4l2-mem2mem.h> 10 11 #include "rkvdec.h" 12 #include "rkvdec-cabac.h" 13 #include "rkvdec-rcb.h" 14 #include "rkvdec-hevc-common.h" 15 #include "rkvdec-vdpu383-regs.h" 16 #include "rkvdec-bitwriter.h" 17 18 #define VIDEO_PARAMETER_SET_ID BW_FIELD(0, 4) 19 #define SEQ_PARAMETER_SET_ID BW_FIELD(4, 4) 20 #define CHROMA_FORMAT_IDC BW_FIELD(8, 2) 21 #define PIC_WIDTH_IN_LUMA_SAMPLES BW_FIELD(10, 16) 22 #define PIC_HEIGHT_IN_LUMA_SAMPLES BW_FIELD(26, 16) 23 #define BIT_DEPTH_LUMA BW_FIELD(42, 3) 24 #define BIT_DEPTH_CHROMA BW_FIELD(45, 3) 25 #define LOG2_MAX_PIC_ORDER_CNT_LSB BW_FIELD(48, 5) 26 #define LOG2_DIFF_MAX_MIN_LUMA_CODING_BLOCK_SIZE BW_FIELD(53, 2) 27 #define LOG2_MIN_LUMA_CODING_BLOCK_SIZE BW_FIELD(55, 3) 28 #define LOG2_MIN_TRANSFORM_BLOCK_SIZE BW_FIELD(58, 3) 29 #define LOG2_DIFF_MAX_MIN_LUMA_TRANSFORM_BLOCK_SIZE BW_FIELD(61, 2) 30 #define MAX_TRANSFORM_HIERARCHY_DEPTH_INTER BW_FIELD(63, 3) 31 #define MAX_TRANSFORM_HIERARCHY_DEPTH_INTRA BW_FIELD(66, 3) 32 #define SCALING_LIST_ENABLED_FLAG BW_FIELD(69, 1) 33 #define AMP_ENABLED_FLAG BW_FIELD(70, 1) 34 #define SAMPLE_ADAPTIVE_OFFSET_ENABLED_FLAG BW_FIELD(71, 1) 35 #define PCM_ENABLED_FLAG BW_FIELD(72, 1) 36 #define PCM_SAMPLE_BIT_DEPTH_LUMA BW_FIELD(73, 4) 37 #define PCM_SAMPLE_BIT_DEPTH_CHROMA BW_FIELD(77, 4) 38 #define PCM_LOOP_FILTER_DISABLED_FLAG BW_FIELD(81, 1) 39 #define LOG2_DIFF_MAX_MIN_PCM_LUMA_CODING_BLOCK_SIZE BW_FIELD(82, 3) 40 #define LOG2_MIN_PCM_LUMA_CODING_BLOCK_SIZE BW_FIELD(85, 3) 41 #define NUM_SHORT_TERM_REF_PIC_SETS BW_FIELD(88, 7) 42 #define LONG_TERM_REF_PICS_PRESENT_FLAG BW_FIELD(95, 1) 43 #define NUM_LONG_TERM_REF_PICS_SPS BW_FIELD(96, 6) 44 #define SPS_TEMPORAL_MVP_ENABLED_FLAG BW_FIELD(102, 1) 45 #define STRONG_INTRA_SMOOTHING_ENABLED_FLAG BW_FIELD(103, 1) 46 #define SPS_MAX_DEC_PIC_BUFFERING_MINUS1 BW_FIELD(111, 4) 47 #define SEPARATE_COLOUR_PLANE_FLAG BW_FIELD(115, 1) 48 #define HIGH_PRECISION_OFFSETS_ENABLED_FLAG BW_FIELD(116, 1) 49 #define PERSISTENT_RICE_ADAPTATION_ENABLED_FLAG BW_FIELD(117, 1) 50 51 /* PPS */ 52 #define PIC_PARAMETER_SET_ID BW_FIELD(118, 6) 53 #define PPS_SEQ_PARAMETER_SET_ID BW_FIELD(124, 4) 54 #define DEPENDENT_SLICE_SEGMENTS_ENABLED_FLAG BW_FIELD(128, 1) 55 #define OUTPUT_FLAG_PRESENT_FLAG BW_FIELD(129, 1) 56 #define NUM_EXTRA_SLICE_HEADER_BITS BW_FIELD(130, 13) 57 #define SIGN_DATA_HIDING_ENABLED_FLAG BW_FIELD(143, 1) 58 #define CABAC_INIT_PRESENT_FLAG BW_FIELD(144, 1) 59 #define NUM_REF_IDX_L0_DEFAULT_ACTIVE BW_FIELD(145, 4) 60 #define NUM_REF_IDX_L1_DEFAULT_ACTIVE BW_FIELD(149, 4) 61 #define INIT_QP_MINUS26 BW_FIELD(153, 7) 62 #define CONSTRAINED_INTRA_PRED_FLAG BW_FIELD(160, 1) 63 #define TRANSFORM_SKIP_ENABLED_FLAG BW_FIELD(161, 1) 64 #define CU_QP_DELTA_ENABLED_FLAG BW_FIELD(162, 1) 65 #define LOG2_MIN_CU_QP_DELTA_SIZE BW_FIELD(163, 3) 66 #define PPS_CB_QP_OFFSET BW_FIELD(166, 5) 67 #define PPS_CR_QP_OFFSET BW_FIELD(171, 5) 68 #define PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT_FLAG BW_FIELD(176, 1) 69 #define WEIGHTED_PRED_FLAG BW_FIELD(177, 1) 70 #define WEIGHTED_BIPRED_FLAG BW_FIELD(178, 1) 71 #define TRANSQUANT_BYPASS_ENABLED_FLAG BW_FIELD(179, 1) 72 #define TILES_ENABLED_FLAG BW_FIELD(180, 1) 73 #define ENTROPY_CODING_SYNC_ENABLED_FLAG BW_FIELD(181, 1) 74 #define PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED_FLAG BW_FIELD(182, 1) 75 #define LOOP_FILTER_ACROSS_TILES_ENABLED_FLAG BW_FIELD(183, 1) 76 #define DEBLOCKING_FILTER_OVERRIDE_ENABLED_FLAG BW_FIELD(184, 1) 77 #define PPS_DEBLOCKING_FILTER_DISABLED_FLAG BW_FIELD(185, 1) 78 #define PPS_BETA_OFFSET_DIV2 BW_FIELD(186, 4) 79 #define PPS_TC_OFFSET_DIV2 BW_FIELD(190, 4) 80 #define LISTS_MODIFICATION_PRESENT_FLAG BW_FIELD(194, 1) 81 #define LOG2_PARALLEL_MERGE_LEVEL BW_FIELD(195, 3) 82 #define SLICE_SEGMENT_HEADER_EXTENSION_PRESENT_FLAG BW_FIELD(198, 1) 83 84 /* pps extensions */ 85 #define LOG2_MAX_TRANSFORM_SKIP_BLOCK_SIZE BW_FIELD(202, 2) 86 #define CROSS_COMPONENT_PREDICTION_ENABLED_FLAG BW_FIELD(204, 1) 87 #define CHROMA_QP_OFFSET_LIST_ENABLED_FLAG BW_FIELD(205, 1) 88 #define LOG2_MIN_CU_CHROMA_QP_DELTA_SIZE BW_FIELD(206, 3) 89 #define CB_QP_OFFSET_LIST(i) BW_FIELD(209 + (i) * 5, 5) // i: 0-5 90 #define CB_CR_OFFSET_LIST(i) BW_FIELD(239 + (i) * 5, 5) // i: 0-5 91 #define CHROMA_QP_OFFSET_LIST_LEN_MINUS1 BW_FIELD(269, 3) 92 93 /* mvc0 && mvc1 */ 94 #define MVC_FF BW_FIELD(272, 16) 95 #define MVC_00 BW_FIELD(288, 9) 96 97 /* poc info */ 98 #define RESERVED2 BW_FIELD(297, 3) 99 #define CURRENT_POC BW_FIELD(300, 32) 100 #define REF_PIC_POC(i) BW_FIELD(332 + (i) * 32, 32) // i: 0-14 101 #define RESERVED3 BW_FIELD(812, 32) 102 #define REF_IS_VALID(i) BW_FIELD(844 + (i), 1) // i: 0-14 103 #define RESERVED4 BW_FIELD(859, 1) 104 105 /* tile info*/ 106 #define NUM_TILE_COLUMNS BW_FIELD(860, 5) 107 #define NUM_TILE_ROWS BW_FIELD(865, 5) 108 #define COLUMN_WIDTH(i) BW_FIELD(870 + (i) * 12, 12) // i: 0-19 109 #define ROW_HEIGHT(i) BW_FIELD(1110 + (i) * 12, 12) // i: 0-21 110 111 #define HEVC_SPS_SIZE ALIGN(1110 + 22 * 12, 256) 112 113 struct rkvdec_hevc_sps_pps { 114 u32 info[HEVC_SPS_SIZE / 8 / 4]; 115 }; 116 117 struct rkvdec_hevc_priv_tbl { 118 struct rkvdec_hevc_sps_pps param_set; 119 struct rkvdec_rps rps; 120 struct scaling_factor scaling_list; 121 u8 cabac_table[27456]; 122 } __packed; 123 124 struct rkvdec_hevc_ctx { 125 struct rkvdec_aux_buf priv_tbl; 126 struct v4l2_ctrl_hevc_scaling_matrix scaling_matrix_cache; 127 struct v4l2_ctrl_hevc_ext_sps_st_rps st_cache; 128 struct vdpu383_regs_h26x regs; 129 }; 130 131 static void assemble_hw_pps(struct rkvdec_ctx *ctx, 132 struct rkvdec_hevc_run *run) 133 { 134 struct rkvdec_hevc_ctx *h264_ctx = ctx->priv; 135 const struct v4l2_ctrl_hevc_sps *sps = run->sps; 136 const struct v4l2_ctrl_hevc_pps *pps = run->pps; 137 const struct v4l2_ctrl_hevc_decode_params *dec_params = run->decode_params; 138 struct rkvdec_hevc_priv_tbl *priv_tbl = h264_ctx->priv_tbl.cpu; 139 struct rkvdec_hevc_sps_pps *hw_ps; 140 bool tiles_enabled; 141 s32 max_cu_width; 142 s32 pic_in_cts_width; 143 s32 pic_in_cts_height; 144 u16 log2_min_cb_size, width, height; 145 u16 column_width[22]; 146 u16 row_height[22]; 147 u8 pcm_enabled; 148 u32 i; 149 150 /* 151 * HW read the SPS/PPS information from PPS packet index by PPS id. 152 * offset from the base can be calculated by PPS_id * 32 (size per PPS 153 * packet unit). so the driver copy SPS/PPS information to the exact PPS 154 * packet unit for HW accessing. 155 */ 156 hw_ps = &priv_tbl->param_set; 157 memset(hw_ps, 0, sizeof(*hw_ps)); 158 159 /* write sps */ 160 rkvdec_set_bw_field(hw_ps->info, VIDEO_PARAMETER_SET_ID, sps->video_parameter_set_id); 161 rkvdec_set_bw_field(hw_ps->info, SEQ_PARAMETER_SET_ID, sps->seq_parameter_set_id); 162 rkvdec_set_bw_field(hw_ps->info, CHROMA_FORMAT_IDC, sps->chroma_format_idc); 163 164 log2_min_cb_size = sps->log2_min_luma_coding_block_size_minus3 + 3; 165 width = sps->pic_width_in_luma_samples; 166 height = sps->pic_height_in_luma_samples; 167 168 rkvdec_set_bw_field(hw_ps->info, PIC_WIDTH_IN_LUMA_SAMPLES, width); 169 rkvdec_set_bw_field(hw_ps->info, PIC_HEIGHT_IN_LUMA_SAMPLES, height); 170 rkvdec_set_bw_field(hw_ps->info, BIT_DEPTH_LUMA, sps->bit_depth_luma_minus8 + 8); 171 rkvdec_set_bw_field(hw_ps->info, BIT_DEPTH_CHROMA, sps->bit_depth_chroma_minus8 + 8); 172 rkvdec_set_bw_field(hw_ps->info, LOG2_MAX_PIC_ORDER_CNT_LSB, 173 sps->log2_max_pic_order_cnt_lsb_minus4 + 4); 174 rkvdec_set_bw_field(hw_ps->info, LOG2_DIFF_MAX_MIN_LUMA_CODING_BLOCK_SIZE, 175 sps->log2_diff_max_min_luma_coding_block_size); 176 rkvdec_set_bw_field(hw_ps->info, LOG2_MIN_LUMA_CODING_BLOCK_SIZE, 177 sps->log2_min_luma_coding_block_size_minus3 + 3); 178 rkvdec_set_bw_field(hw_ps->info, LOG2_MIN_TRANSFORM_BLOCK_SIZE, 179 sps->log2_min_luma_transform_block_size_minus2 + 2); 180 rkvdec_set_bw_field(hw_ps->info, LOG2_DIFF_MAX_MIN_LUMA_TRANSFORM_BLOCK_SIZE, 181 sps->log2_diff_max_min_luma_transform_block_size); 182 rkvdec_set_bw_field(hw_ps->info, MAX_TRANSFORM_HIERARCHY_DEPTH_INTER, 183 sps->max_transform_hierarchy_depth_inter); 184 rkvdec_set_bw_field(hw_ps->info, MAX_TRANSFORM_HIERARCHY_DEPTH_INTRA, 185 sps->max_transform_hierarchy_depth_intra); 186 rkvdec_set_bw_field(hw_ps->info, SCALING_LIST_ENABLED_FLAG, 187 !!(sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED)); 188 rkvdec_set_bw_field(hw_ps->info, AMP_ENABLED_FLAG, 189 !!(sps->flags & V4L2_HEVC_SPS_FLAG_AMP_ENABLED)); 190 rkvdec_set_bw_field(hw_ps->info, SAMPLE_ADAPTIVE_OFFSET_ENABLED_FLAG, 191 !!(sps->flags & V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET)); 192 193 pcm_enabled = !!(sps->flags & V4L2_HEVC_SPS_FLAG_PCM_ENABLED); 194 rkvdec_set_bw_field(hw_ps->info, PCM_ENABLED_FLAG, pcm_enabled); 195 rkvdec_set_bw_field(hw_ps->info, PCM_SAMPLE_BIT_DEPTH_LUMA, 196 pcm_enabled ? sps->pcm_sample_bit_depth_luma_minus1 + 1 : 0); 197 rkvdec_set_bw_field(hw_ps->info, PCM_SAMPLE_BIT_DEPTH_CHROMA, 198 pcm_enabled ? sps->pcm_sample_bit_depth_chroma_minus1 + 1 : 0); 199 rkvdec_set_bw_field(hw_ps->info, PCM_LOOP_FILTER_DISABLED_FLAG, 200 !!(sps->flags & V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED)); 201 rkvdec_set_bw_field(hw_ps->info, LOG2_DIFF_MAX_MIN_PCM_LUMA_CODING_BLOCK_SIZE, 202 sps->log2_diff_max_min_pcm_luma_coding_block_size); 203 rkvdec_set_bw_field(hw_ps->info, LOG2_MIN_PCM_LUMA_CODING_BLOCK_SIZE, 204 pcm_enabled ? sps->log2_min_pcm_luma_coding_block_size_minus3 + 3 : 0); 205 rkvdec_set_bw_field(hw_ps->info, NUM_SHORT_TERM_REF_PIC_SETS, 206 sps->num_short_term_ref_pic_sets); 207 rkvdec_set_bw_field(hw_ps->info, LONG_TERM_REF_PICS_PRESENT_FLAG, 208 !!(sps->flags & V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT)); 209 rkvdec_set_bw_field(hw_ps->info, NUM_LONG_TERM_REF_PICS_SPS, 210 sps->num_long_term_ref_pics_sps); 211 rkvdec_set_bw_field(hw_ps->info, SPS_TEMPORAL_MVP_ENABLED_FLAG, 212 !!(sps->flags & V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED)); 213 rkvdec_set_bw_field(hw_ps->info, STRONG_INTRA_SMOOTHING_ENABLED_FLAG, 214 !!(sps->flags & V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED)); 215 rkvdec_set_bw_field(hw_ps->info, SPS_MAX_DEC_PIC_BUFFERING_MINUS1, 216 sps->sps_max_dec_pic_buffering_minus1); 217 218 /* write pps */ 219 rkvdec_set_bw_field(hw_ps->info, PIC_PARAMETER_SET_ID, pps->pic_parameter_set_id); 220 rkvdec_set_bw_field(hw_ps->info, SEQ_PARAMETER_SET_ID, sps->seq_parameter_set_id); 221 rkvdec_set_bw_field(hw_ps->info, DEPENDENT_SLICE_SEGMENTS_ENABLED_FLAG, 222 !!(pps->flags & V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED)); 223 rkvdec_set_bw_field(hw_ps->info, OUTPUT_FLAG_PRESENT_FLAG, 224 !!(pps->flags & V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT)); 225 rkvdec_set_bw_field(hw_ps->info, NUM_EXTRA_SLICE_HEADER_BITS, 226 pps->num_extra_slice_header_bits); 227 rkvdec_set_bw_field(hw_ps->info, SIGN_DATA_HIDING_ENABLED_FLAG, 228 !!(pps->flags & V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED)); 229 rkvdec_set_bw_field(hw_ps->info, CABAC_INIT_PRESENT_FLAG, 230 !!(pps->flags & V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT)); 231 rkvdec_set_bw_field(hw_ps->info, NUM_REF_IDX_L0_DEFAULT_ACTIVE, 232 pps->num_ref_idx_l0_default_active_minus1 + 1); 233 rkvdec_set_bw_field(hw_ps->info, NUM_REF_IDX_L1_DEFAULT_ACTIVE, 234 pps->num_ref_idx_l1_default_active_minus1 + 1); 235 rkvdec_set_bw_field(hw_ps->info, INIT_QP_MINUS26, pps->init_qp_minus26); 236 rkvdec_set_bw_field(hw_ps->info, CONSTRAINED_INTRA_PRED_FLAG, 237 !!(pps->flags & V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED)); 238 rkvdec_set_bw_field(hw_ps->info, TRANSFORM_SKIP_ENABLED_FLAG, 239 !!(pps->flags & V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED)); 240 rkvdec_set_bw_field(hw_ps->info, CU_QP_DELTA_ENABLED_FLAG, 241 !!(pps->flags & V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED)); 242 rkvdec_set_bw_field(hw_ps->info, LOG2_MIN_CU_QP_DELTA_SIZE, log2_min_cb_size + 243 sps->log2_diff_max_min_luma_coding_block_size - 244 pps->diff_cu_qp_delta_depth); 245 rkvdec_set_bw_field(hw_ps->info, PPS_CB_QP_OFFSET, pps->pps_cb_qp_offset); 246 rkvdec_set_bw_field(hw_ps->info, PPS_CR_QP_OFFSET, pps->pps_cr_qp_offset); 247 rkvdec_set_bw_field(hw_ps->info, PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT_FLAG, 248 !!(pps->flags & 249 V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT)); 250 rkvdec_set_bw_field(hw_ps->info, WEIGHTED_PRED_FLAG, 251 !!(pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED)); 252 rkvdec_set_bw_field(hw_ps->info, WEIGHTED_BIPRED_FLAG, 253 !!(pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED)); 254 rkvdec_set_bw_field(hw_ps->info, TRANSQUANT_BYPASS_ENABLED_FLAG, 255 !!(pps->flags & V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED)); 256 tiles_enabled = !!(pps->flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED); 257 rkvdec_set_bw_field(hw_ps->info, TILES_ENABLED_FLAG, tiles_enabled); 258 rkvdec_set_bw_field(hw_ps->info, ENTROPY_CODING_SYNC_ENABLED_FLAG, 259 !!(pps->flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED)); 260 rkvdec_set_bw_field(hw_ps->info, PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED_FLAG, 261 !!(pps->flags & 262 V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED)); 263 rkvdec_set_bw_field(hw_ps->info, LOOP_FILTER_ACROSS_TILES_ENABLED_FLAG, 264 !!(pps->flags & V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED)); 265 rkvdec_set_bw_field(hw_ps->info, DEBLOCKING_FILTER_OVERRIDE_ENABLED_FLAG, 266 !!(pps->flags & 267 V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED)); 268 rkvdec_set_bw_field(hw_ps->info, PPS_DEBLOCKING_FILTER_DISABLED_FLAG, 269 !!(pps->flags & V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER)); 270 rkvdec_set_bw_field(hw_ps->info, PPS_BETA_OFFSET_DIV2, pps->pps_beta_offset_div2); 271 rkvdec_set_bw_field(hw_ps->info, PPS_TC_OFFSET_DIV2, pps->pps_tc_offset_div2); 272 rkvdec_set_bw_field(hw_ps->info, LISTS_MODIFICATION_PRESENT_FLAG, 273 !!(pps->flags & V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT)); 274 rkvdec_set_bw_field(hw_ps->info, LOG2_PARALLEL_MERGE_LEVEL, 275 pps->log2_parallel_merge_level_minus2 + 2); 276 rkvdec_set_bw_field(hw_ps->info, SLICE_SEGMENT_HEADER_EXTENSION_PRESENT_FLAG, 277 !!(pps->flags & 278 V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT)); 279 rkvdec_set_bw_field(hw_ps->info, NUM_TILE_COLUMNS, 280 tiles_enabled ? pps->num_tile_columns_minus1 + 1 : 1); 281 rkvdec_set_bw_field(hw_ps->info, NUM_TILE_ROWS, 282 tiles_enabled ? pps->num_tile_rows_minus1 + 1 : 1); 283 rkvdec_set_bw_field(hw_ps->info, MVC_FF, 0xffff); 284 285 // Setup tiles information 286 memset(column_width, 0, sizeof(column_width)); 287 memset(row_height, 0, sizeof(row_height)); 288 289 max_cu_width = 1 << (sps->log2_diff_max_min_luma_coding_block_size + log2_min_cb_size); 290 pic_in_cts_width = (width + max_cu_width - 1) / max_cu_width; 291 pic_in_cts_height = (height + max_cu_width - 1) / max_cu_width; 292 293 if (tiles_enabled) { 294 if (pps->flags & V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING) { 295 compute_tiles_uniform(run, log2_min_cb_size, width, height, 296 pic_in_cts_width, pic_in_cts_height, 297 column_width, row_height); 298 } else { 299 compute_tiles_non_uniform(run, log2_min_cb_size, width, height, 300 pic_in_cts_width, pic_in_cts_height, 301 column_width, row_height); 302 } 303 } else { 304 column_width[0] = (width + max_cu_width - 1) / max_cu_width; 305 row_height[0] = (height + max_cu_width - 1) / max_cu_width; 306 } 307 308 for (i = 0; i < 20; i++) 309 rkvdec_set_bw_field(hw_ps->info, COLUMN_WIDTH(i), column_width[i]); 310 for (i = 0; i < 22; i++) 311 rkvdec_set_bw_field(hw_ps->info, ROW_HEIGHT(i), row_height[i]); 312 313 // Setup POC information 314 rkvdec_set_bw_field(hw_ps->info, CURRENT_POC, dec_params->pic_order_cnt_val); 315 316 for (i = 0; i < ARRAY_SIZE(dec_params->dpb); i++) { 317 rkvdec_set_bw_field(hw_ps->info, REF_IS_VALID(i), 318 !!(dec_params->num_active_dpb_entries > i)); 319 rkvdec_set_bw_field(hw_ps->info, REF_PIC_POC(i), 320 dec_params->dpb[i].pic_order_cnt_val); 321 } 322 } 323 324 static void rkvdec_write_regs(struct rkvdec_ctx *ctx) 325 { 326 struct rkvdec_dev *rkvdec = ctx->dev; 327 struct rkvdec_hevc_ctx *h265_ctx = ctx->priv; 328 329 rkvdec_memcpy_toio(rkvdec->regs + VDPU383_OFFSET_COMMON_REGS, 330 &h265_ctx->regs.common, 331 sizeof(h265_ctx->regs.common)); 332 rkvdec_memcpy_toio(rkvdec->regs + VDPU383_OFFSET_COMMON_ADDR_REGS, 333 &h265_ctx->regs.common_addr, 334 sizeof(h265_ctx->regs.common_addr)); 335 rkvdec_memcpy_toio(rkvdec->regs + VDPU383_OFFSET_CODEC_PARAMS_REGS, 336 &h265_ctx->regs.h26x_params, 337 sizeof(h265_ctx->regs.h26x_params)); 338 rkvdec_memcpy_toio(rkvdec->regs + VDPU383_OFFSET_CODEC_ADDR_REGS, 339 &h265_ctx->regs.h26x_addr, 340 sizeof(h265_ctx->regs.h26x_addr)); 341 } 342 343 static void config_registers(struct rkvdec_ctx *ctx, 344 struct rkvdec_hevc_run *run) 345 { 346 const struct v4l2_ctrl_hevc_decode_params *dec_params = run->decode_params; 347 struct rkvdec_hevc_ctx *h265_ctx = ctx->priv; 348 const struct v4l2_ctrl_hevc_sps *sps = run->sps; 349 dma_addr_t priv_start_addr = h265_ctx->priv_tbl.dma; 350 const struct v4l2_pix_format_mplane *dst_fmt; 351 struct vb2_v4l2_buffer *src_buf = run->base.bufs.src; 352 struct vb2_v4l2_buffer *dst_buf = run->base.bufs.dst; 353 struct vdpu383_regs_h26x *regs = &h265_ctx->regs; 354 const struct v4l2_format *f; 355 dma_addr_t rlc_addr; 356 dma_addr_t dst_addr; 357 u32 hor_virstride; 358 u32 ver_virstride; 359 u32 y_virstride; 360 u32 offset; 361 u32 pixels; 362 u32 i; 363 364 memset(regs, 0, sizeof(*regs)); 365 366 /* Set HEVC mode */ 367 regs->common.reg008_dec_mode = VDPU383_MODE_HEVC; 368 369 /* Set input stream length */ 370 regs->h26x_params.reg066_stream_len = vb2_get_plane_payload(&src_buf->vb2_buf, 0); 371 372 /* Set strides */ 373 f = &ctx->decoded_fmt; 374 dst_fmt = &f->fmt.pix_mp; 375 hor_virstride = dst_fmt->plane_fmt[0].bytesperline; 376 ver_virstride = dst_fmt->height; 377 y_virstride = hor_virstride * ver_virstride; 378 379 pixels = dst_fmt->height * dst_fmt->width; 380 381 regs->h26x_params.reg068_hor_virstride = hor_virstride / 16; 382 regs->h26x_params.reg069_raster_uv_hor_virstride = hor_virstride / 16; 383 regs->h26x_params.reg070_y_virstride = y_virstride / 16; 384 385 /* Activate block gating */ 386 regs->common.reg010_block_gating_en.strmd_auto_gating_e = 1; 387 regs->common.reg010_block_gating_en.inter_auto_gating_e = 1; 388 regs->common.reg010_block_gating_en.intra_auto_gating_e = 1; 389 regs->common.reg010_block_gating_en.transd_auto_gating_e = 1; 390 regs->common.reg010_block_gating_en.recon_auto_gating_e = 1; 391 regs->common.reg010_block_gating_en.filterd_auto_gating_e = 1; 392 regs->common.reg010_block_gating_en.bus_auto_gating_e = 1; 393 regs->common.reg010_block_gating_en.ctrl_auto_gating_e = 1; 394 regs->common.reg010_block_gating_en.rcb_auto_gating_e = 1; 395 regs->common.reg010_block_gating_en.err_prc_auto_gating_e = 1; 396 397 /* Set timeout threshold */ 398 if (pixels < RKVDEC_1080P_PIXELS) 399 regs->common.reg013_core_timeout_threshold = VDPU383_TIMEOUT_1080p; 400 else if (pixels < RKVDEC_4K_PIXELS) 401 regs->common.reg013_core_timeout_threshold = VDPU383_TIMEOUT_4K; 402 else if (pixels < RKVDEC_8K_PIXELS) 403 regs->common.reg013_core_timeout_threshold = VDPU383_TIMEOUT_8K; 404 else 405 regs->common.reg013_core_timeout_threshold = VDPU383_TIMEOUT_MAX; 406 407 regs->common.reg016_error_ctrl_set.error_proc_disable = 1; 408 409 /* Set ref pic address & poc */ 410 for (i = 0; i < ARRAY_SIZE(dec_params->dpb) - 1; i++) { 411 struct vb2_buffer *vb_buf = get_ref_buf(ctx, run, i); 412 dma_addr_t buf_dma; 413 414 buf_dma = vb2_dma_contig_plane_dma_addr(vb_buf, 0); 415 416 /* Set reference addresses */ 417 regs->h26x_addr.reg170_185_ref_base[i] = buf_dma; 418 regs->h26x_addr.reg195_210_payload_st_ref_base[i] = buf_dma; 419 420 /* Set COLMV addresses */ 421 regs->h26x_addr.reg217_232_colmv_ref_base[i] = buf_dma + ctx->colmv_offset; 422 } 423 424 /* Set rlc base address (input stream) */ 425 rlc_addr = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0); 426 regs->common_addr.reg128_strm_base = rlc_addr; 427 428 /* Set output base address */ 429 dst_addr = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0); 430 regs->h26x_addr.reg168_decout_base = dst_addr; 431 regs->h26x_addr.reg169_error_ref_base = dst_addr; 432 regs->h26x_addr.reg192_payload_st_cur_base = dst_addr; 433 434 /* Set colmv address */ 435 regs->h26x_addr.reg216_colmv_cur_base = dst_addr + ctx->colmv_offset; 436 437 /* Set RCB addresses */ 438 for (i = 0; i < rkvdec_rcb_buf_count(ctx); i++) { 439 regs->common_addr.reg140_162_rcb_info[i].offset = rkvdec_rcb_buf_dma_addr(ctx, i); 440 regs->common_addr.reg140_162_rcb_info[i].size = rkvdec_rcb_buf_size(ctx, i); 441 } 442 443 if (sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) { 444 /* Set scaling matrix */ 445 offset = offsetof(struct rkvdec_hevc_priv_tbl, scaling_list); 446 regs->common_addr.reg132_scanlist_addr = priv_start_addr + offset; 447 } 448 449 /* Set hw pps address */ 450 offset = offsetof(struct rkvdec_hevc_priv_tbl, param_set); 451 regs->common_addr.reg131_gbl_base = priv_start_addr + offset; 452 regs->h26x_params.reg067_global_len = sizeof(struct rkvdec_hevc_sps_pps) / 16; 453 454 /* Set hw rps address */ 455 offset = offsetof(struct rkvdec_hevc_priv_tbl, rps); 456 regs->common_addr.reg129_rps_base = priv_start_addr + offset; 457 458 /* Set cabac table */ 459 offset = offsetof(struct rkvdec_hevc_priv_tbl, cabac_table); 460 regs->common_addr.reg130_cabactbl_base = priv_start_addr + offset; 461 462 rkvdec_write_regs(ctx); 463 } 464 465 static int rkvdec_hevc_validate_sps(struct rkvdec_ctx *ctx, 466 const struct v4l2_ctrl_hevc_sps *sps) 467 { 468 if (sps->chroma_format_idc != 1) 469 /* Only 4:2:0 is supported */ 470 return -EINVAL; 471 472 if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8) 473 /* Luma and chroma bit depth mismatch */ 474 return -EINVAL; 475 476 if (sps->bit_depth_luma_minus8 != 0 && sps->bit_depth_luma_minus8 != 2) 477 /* Only 8-bit and 10-bit are supported */ 478 return -EINVAL; 479 480 if (sps->pic_width_in_luma_samples > ctx->coded_fmt.fmt.pix_mp.width || 481 sps->pic_height_in_luma_samples > ctx->coded_fmt.fmt.pix_mp.height) 482 return -EINVAL; 483 484 return 0; 485 } 486 487 static int rkvdec_hevc_start(struct rkvdec_ctx *ctx) 488 { 489 struct rkvdec_dev *rkvdec = ctx->dev; 490 struct rkvdec_hevc_priv_tbl *priv_tbl; 491 struct rkvdec_hevc_ctx *hevc_ctx; 492 struct v4l2_ctrl *ctrl; 493 int ret; 494 495 ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl, 496 V4L2_CID_STATELESS_HEVC_SPS); 497 if (!ctrl) 498 return -EINVAL; 499 500 ret = rkvdec_hevc_validate_sps(ctx, ctrl->p_new.p_hevc_sps); 501 if (ret) 502 return ret; 503 504 hevc_ctx = kzalloc_obj(*hevc_ctx); 505 if (!hevc_ctx) 506 return -ENOMEM; 507 508 priv_tbl = dma_alloc_coherent(rkvdec->dev, sizeof(*priv_tbl), 509 &hevc_ctx->priv_tbl.dma, GFP_KERNEL); 510 if (!priv_tbl) { 511 ret = -ENOMEM; 512 goto err_free_ctx; 513 } 514 515 hevc_ctx->priv_tbl.size = sizeof(*priv_tbl); 516 hevc_ctx->priv_tbl.cpu = priv_tbl; 517 memcpy(priv_tbl->cabac_table, rkvdec_hevc_cabac_table, 518 sizeof(rkvdec_hevc_cabac_table)); 519 520 ctx->priv = hevc_ctx; 521 return 0; 522 523 err_free_ctx: 524 kfree(hevc_ctx); 525 return ret; 526 } 527 528 static void rkvdec_hevc_stop(struct rkvdec_ctx *ctx) 529 { 530 struct rkvdec_hevc_ctx *hevc_ctx = ctx->priv; 531 struct rkvdec_dev *rkvdec = ctx->dev; 532 533 dma_free_coherent(rkvdec->dev, hevc_ctx->priv_tbl.size, 534 hevc_ctx->priv_tbl.cpu, hevc_ctx->priv_tbl.dma); 535 kfree(hevc_ctx); 536 } 537 538 static int rkvdec_hevc_run(struct rkvdec_ctx *ctx) 539 { 540 struct rkvdec_dev *rkvdec = ctx->dev; 541 struct rkvdec_hevc_run run; 542 struct rkvdec_hevc_ctx *hevc_ctx = ctx->priv; 543 struct rkvdec_hevc_priv_tbl *tbl = hevc_ctx->priv_tbl.cpu; 544 u32 timeout_threshold; 545 546 rkvdec_hevc_run_preamble(ctx, &run); 547 548 /* 549 * On vdpu383, not setting the long and short term ref sets leads to IOMMU page faults. 550 * To be on the safe side for this new v4l2 control, write an error in the log and mark 551 * the buffer as failed by returning an error here. 552 */ 553 if ((!ctx->has_sps_lt_rps && run.sps->num_long_term_ref_pics_sps) || 554 (!ctx->has_sps_st_rps && run.sps->num_short_term_ref_pic_sets)) { 555 dev_err_ratelimited(rkvdec->dev, "Long and short term RPS not set\n"); 556 return -EINVAL; 557 } 558 559 rkvdec_hevc_assemble_hw_scaling_list(ctx, &run, &tbl->scaling_list, 560 &hevc_ctx->scaling_matrix_cache); 561 assemble_hw_pps(ctx, &run); 562 rkvdec_hevc_assemble_hw_rps(&run, &tbl->rps, &hevc_ctx->st_cache); 563 564 config_registers(ctx, &run); 565 566 rkvdec_run_postamble(ctx, &run.base); 567 568 timeout_threshold = hevc_ctx->regs.common.reg013_core_timeout_threshold; 569 rkvdec_schedule_watchdog(rkvdec, timeout_threshold); 570 571 /* Start decoding! */ 572 writel(timeout_threshold, rkvdec->link + VDPU383_LINK_TIMEOUT_THRESHOLD); 573 writel(VDPU383_IP_CRU_MODE, rkvdec->link + VDPU383_LINK_IP_ENABLE); 574 writel(VDPU383_DEC_E_BIT, rkvdec->link + VDPU383_LINK_DEC_ENABLE); 575 576 return 0; 577 } 578 579 static int rkvdec_hevc_try_ctrl(struct rkvdec_ctx *ctx, struct v4l2_ctrl *ctrl) 580 { 581 if (ctrl->id == V4L2_CID_STATELESS_HEVC_SPS) 582 return rkvdec_hevc_validate_sps(ctx, ctrl->p_new.p_hevc_sps); 583 584 return 0; 585 } 586 587 const struct rkvdec_coded_fmt_ops rkvdec_vdpu383_hevc_fmt_ops = { 588 .adjust_fmt = rkvdec_hevc_adjust_fmt, 589 .start = rkvdec_hevc_start, 590 .stop = rkvdec_hevc_stop, 591 .run = rkvdec_hevc_run, 592 .try_ctrl = rkvdec_hevc_try_ctrl, 593 .get_image_fmt = rkvdec_hevc_get_image_fmt, 594 }; 595