1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Rockchip Video Decoder HEVC backend 4 * 5 * Copyright (C) 2023 Collabora, Ltd. 6 * Sebastian Fricke <sebastian.fricke@collabora.com> 7 * 8 * Copyright (C) 2019 Collabora, Ltd. 9 * Boris Brezillon <boris.brezillon@collabora.com> 10 * 11 * Copyright (C) 2016 Rockchip Electronics Co., Ltd. 12 * Jeffy Chen <jeffy.chen@rock-chips.com> 13 */ 14 15 #include <media/v4l2-mem2mem.h> 16 17 #include "rkvdec.h" 18 #include "rkvdec-regs.h" 19 #include "rkvdec-cabac.h" 20 #include "rkvdec-hevc-common.h" 21 #include "rkvdec-bitwriter.h" 22 23 /* Size in u8/u32 units. */ 24 #define RKV_SCALING_LIST_SIZE 1360 25 #define RKV_PPS_SIZE (80 / 4) 26 #define RKV_PPS_LEN 64 27 #define RKV_RPS_SIZE (32 / 4) 28 #define RKV_RPS_LEN 600 29 30 struct rkvdec_sps_pps_packet { 31 u32 info[RKV_PPS_SIZE]; 32 }; 33 34 struct rkvdec_rps_packet { 35 u32 info[RKV_RPS_SIZE]; 36 }; 37 38 /* SPS */ 39 #define VIDEO_PARAMETER_SET_ID BW_FIELD(0, 4) 40 #define SEQ_PARAMETER_SET_ID BW_FIELD(4, 4) 41 #define CHROMA_FORMAT_IDC BW_FIELD(8, 2) 42 #define PIC_WIDTH_IN_LUMA_SAMPLES BW_FIELD(10, 13) 43 #define PIC_HEIGHT_IN_LUMA_SAMPLES BW_FIELD(23, 13) 44 #define BIT_DEPTH_LUMA BW_FIELD(36, 4) 45 #define BIT_DEPTH_CHROMA BW_FIELD(40, 4) 46 #define LOG2_MAX_PIC_ORDER_CNT_LSB BW_FIELD(44, 5) 47 #define LOG2_DIFF_MAX_MIN_LUMA_CODING_BLOCK_SIZE BW_FIELD(49, 2) 48 #define LOG2_MIN_LUMA_CODING_BLOCK_SIZE BW_FIELD(51, 3) 49 #define LOG2_MIN_TRANSFORM_BLOCK_SIZE BW_FIELD(54, 3) 50 #define LOG2_DIFF_MAX_MIN_LUMA_TRANSFORM_BLOCK_SIZE BW_FIELD(57, 2) 51 #define MAX_TRANSFORM_HIERARCHY_DEPTH_INTER BW_FIELD(59, 3) 52 #define MAX_TRANSFORM_HIERARCHY_DEPTH_INTRA BW_FIELD(62, 3) 53 #define SCALING_LIST_ENABLED_FLAG BW_FIELD(65, 1) 54 #define AMP_ENABLED_FLAG BW_FIELD(66, 1) 55 #define SAMPLE_ADAPTIVE_OFFSET_ENABLED_FLAG BW_FIELD(67, 1) 56 #define PCM_ENABLED_FLAG BW_FIELD(68, 1) 57 #define PCM_SAMPLE_BIT_DEPTH_LUMA BW_FIELD(69, 4) 58 #define PCM_SAMPLE_BIT_DEPTH_CHROMA BW_FIELD(73, 4) 59 #define PCM_LOOP_FILTER_DISABLED_FLAG BW_FIELD(77, 1) 60 #define LOG2_DIFF_MAX_MIN_PCM_LUMA_CODING_BLOCK_SIZE BW_FIELD(78, 3) 61 #define LOG2_MIN_PCM_LUMA_CODING_BLOCK_SIZE BW_FIELD(81, 3) 62 #define NUM_SHORT_TERM_REF_PIC_SETS BW_FIELD(84, 7) 63 #define LONG_TERM_REF_PICS_PRESENT_FLAG BW_FIELD(91, 1) 64 #define NUM_LONG_TERM_REF_PICS_SPS BW_FIELD(92, 6) 65 #define SPS_TEMPORAL_MVP_ENABLED_FLAG BW_FIELD(98, 1) 66 #define STRONG_INTRA_SMOOTHING_ENABLED_FLAG BW_FIELD(99, 1) 67 /* PPS */ 68 #define PIC_PARAMETER_SET_ID BW_FIELD(128, 6) 69 #define PPS_SEQ_PARAMETER_SET_ID BW_FIELD(134, 4) 70 #define DEPENDENT_SLICE_SEGMENTS_ENABLED_FLAG BW_FIELD(138, 1) 71 #define OUTPUT_FLAG_PRESENT_FLAG BW_FIELD(139, 1) 72 #define NUM_EXTRA_SLICE_HEADER_BITS BW_FIELD(140, 13) 73 #define SIGN_DATA_HIDING_ENABLED_FLAG BW_FIELD(153, 1) 74 #define CABAC_INIT_PRESENT_FLAG BW_FIELD(154, 1) 75 #define NUM_REF_IDX_L0_DEFAULT_ACTIVE BW_FIELD(155, 4) 76 #define NUM_REF_IDX_L1_DEFAULT_ACTIVE BW_FIELD(159, 4) 77 #define INIT_QP_MINUS26 BW_FIELD(163, 7) 78 #define CONSTRAINED_INTRA_PRED_FLAG BW_FIELD(170, 1) 79 #define TRANSFORM_SKIP_ENABLED_FLAG BW_FIELD(171, 1) 80 #define CU_QP_DELTA_ENABLED_FLAG BW_FIELD(172, 1) 81 #define LOG2_MIN_CU_QP_DELTA_SIZE BW_FIELD(173, 3) 82 #define PPS_CB_QP_OFFSET BW_FIELD(176, 5) 83 #define PPS_CR_QP_OFFSET BW_FIELD(181, 5) 84 #define PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT_FLAG BW_FIELD(186, 1) 85 #define WEIGHTED_PRED_FLAG BW_FIELD(187, 1) 86 #define WEIGHTED_BIPRED_FLAG BW_FIELD(188, 1) 87 #define TRANSQUANT_BYPASS_ENABLED_FLAG BW_FIELD(189, 1) 88 #define TILES_ENABLED_FLAG BW_FIELD(190, 1) 89 #define ENTROPY_CODING_SYNC_ENABLED_FLAG BW_FIELD(191, 1) 90 #define PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED_FLAG BW_FIELD(192, 1) 91 #define LOOP_FILTER_ACROSS_TILES_ENABLED_FLAG BW_FIELD(193, 1) 92 #define DEBLOCKING_FILTER_OVERRIDE_ENABLED_FLAG BW_FIELD(194, 1) 93 #define PPS_DEBLOCKING_FILTER_DISABLED_FLAG BW_FIELD(195, 1) 94 #define PPS_BETA_OFFSET_DIV2 BW_FIELD(196, 4) 95 #define PPS_TC_OFFSET_DIV2 BW_FIELD(200, 4) 96 #define LISTS_MODIFICATION_PRESENT_FLAG BW_FIELD(204, 1) 97 #define LOG2_PARALLEL_MERGE_LEVEL BW_FIELD(205, 3) 98 #define SLICE_SEGMENT_HEADER_EXTENSION_PRESENT_FLAG BW_FIELD(208, 1) 99 #define NUM_TILE_COLUMNS BW_FIELD(212, 5) 100 #define NUM_TILE_ROWS BW_FIELD(217, 5) 101 #define COLUMN_WIDTH(i) BW_FIELD(256 + ((i) * 8), 8) 102 #define ROW_HEIGHT(i) BW_FIELD(416 + ((i) * 8), 8) 103 #define SCALING_LIST_ADDRESS BW_FIELD(592, 32) 104 105 /* Data structure describing auxiliary buffer format. */ 106 struct rkvdec_hevc_priv_tbl { 107 u8 cabac_table[RKV_HEVC_CABAC_TABLE_SIZE]; 108 struct scaling_factor scaling_list; 109 struct rkvdec_sps_pps_packet param_set[RKV_PPS_LEN]; 110 struct rkvdec_rps_packet rps[RKV_RPS_LEN]; 111 }; 112 113 struct rkvdec_hevc_ctx { 114 struct rkvdec_aux_buf priv_tbl; 115 struct v4l2_ctrl_hevc_scaling_matrix scaling_matrix_cache; 116 struct rkvdec_regs regs; 117 }; 118 119 static void assemble_hw_pps(struct rkvdec_ctx *ctx, 120 struct rkvdec_hevc_run *run) 121 { 122 struct rkvdec_hevc_ctx *hevc_ctx = ctx->priv; 123 const struct v4l2_ctrl_hevc_sps *sps = run->sps; 124 const struct v4l2_ctrl_hevc_pps *pps = run->pps; 125 struct rkvdec_hevc_priv_tbl *priv_tbl = hevc_ctx->priv_tbl.cpu; 126 struct rkvdec_sps_pps_packet *hw_ps; 127 u32 min_cb_log2_size_y, ctb_log2_size_y, ctb_size_y; 128 u32 log2_min_cu_qp_delta_size, scaling_distance; 129 dma_addr_t scaling_list_address; 130 int i; 131 132 /* 133 * HW read the SPS/PPS information from PPS packet index by PPS id. 134 * offset from the base can be calculated by PPS_id * 80 (size per PPS 135 * packet unit). so the driver copy SPS/PPS information to the exact PPS 136 * packet unit for HW accessing. 137 */ 138 hw_ps = &priv_tbl->param_set[pps->pic_parameter_set_id]; 139 memset(hw_ps, 0, sizeof(*hw_ps)); 140 141 #define WRITE_PPS(value, field) rkvdec_set_bw_field(hw_ps->info, field, value) 142 /* write sps */ 143 WRITE_PPS(sps->video_parameter_set_id, VIDEO_PARAMETER_SET_ID); 144 WRITE_PPS(sps->seq_parameter_set_id, SEQ_PARAMETER_SET_ID); 145 WRITE_PPS(sps->chroma_format_idc, CHROMA_FORMAT_IDC); 146 WRITE_PPS(sps->pic_width_in_luma_samples, PIC_WIDTH_IN_LUMA_SAMPLES); 147 WRITE_PPS(sps->pic_height_in_luma_samples, PIC_HEIGHT_IN_LUMA_SAMPLES); 148 WRITE_PPS(sps->bit_depth_luma_minus8 + 8, BIT_DEPTH_LUMA); 149 WRITE_PPS(sps->bit_depth_chroma_minus8 + 8, BIT_DEPTH_CHROMA); 150 WRITE_PPS(sps->log2_max_pic_order_cnt_lsb_minus4 + 4, 151 LOG2_MAX_PIC_ORDER_CNT_LSB); 152 WRITE_PPS(sps->log2_diff_max_min_luma_coding_block_size, 153 LOG2_DIFF_MAX_MIN_LUMA_CODING_BLOCK_SIZE); 154 WRITE_PPS(sps->log2_min_luma_coding_block_size_minus3 + 3, 155 LOG2_MIN_LUMA_CODING_BLOCK_SIZE); 156 WRITE_PPS(sps->log2_min_luma_transform_block_size_minus2 + 2, 157 LOG2_MIN_TRANSFORM_BLOCK_SIZE); 158 WRITE_PPS(sps->log2_diff_max_min_luma_transform_block_size, 159 LOG2_DIFF_MAX_MIN_LUMA_TRANSFORM_BLOCK_SIZE); 160 WRITE_PPS(sps->max_transform_hierarchy_depth_inter, 161 MAX_TRANSFORM_HIERARCHY_DEPTH_INTER); 162 WRITE_PPS(sps->max_transform_hierarchy_depth_intra, 163 MAX_TRANSFORM_HIERARCHY_DEPTH_INTRA); 164 WRITE_PPS(!!(sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED), 165 SCALING_LIST_ENABLED_FLAG); 166 WRITE_PPS(!!(sps->flags & V4L2_HEVC_SPS_FLAG_AMP_ENABLED), 167 AMP_ENABLED_FLAG); 168 WRITE_PPS(!!(sps->flags & V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET), 169 SAMPLE_ADAPTIVE_OFFSET_ENABLED_FLAG); 170 if (sps->flags & V4L2_HEVC_SPS_FLAG_PCM_ENABLED) { 171 WRITE_PPS(1, PCM_ENABLED_FLAG); 172 WRITE_PPS(sps->pcm_sample_bit_depth_luma_minus1 + 1, 173 PCM_SAMPLE_BIT_DEPTH_LUMA); 174 WRITE_PPS(sps->pcm_sample_bit_depth_chroma_minus1 + 1, 175 PCM_SAMPLE_BIT_DEPTH_CHROMA); 176 WRITE_PPS(!!(sps->flags & V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED), 177 PCM_LOOP_FILTER_DISABLED_FLAG); 178 WRITE_PPS(sps->log2_diff_max_min_pcm_luma_coding_block_size, 179 LOG2_DIFF_MAX_MIN_PCM_LUMA_CODING_BLOCK_SIZE); 180 WRITE_PPS(sps->log2_min_pcm_luma_coding_block_size_minus3 + 3, 181 LOG2_MIN_PCM_LUMA_CODING_BLOCK_SIZE); 182 } 183 WRITE_PPS(sps->num_short_term_ref_pic_sets, NUM_SHORT_TERM_REF_PIC_SETS); 184 WRITE_PPS(!!(sps->flags & V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT), 185 LONG_TERM_REF_PICS_PRESENT_FLAG); 186 WRITE_PPS(sps->num_long_term_ref_pics_sps, NUM_LONG_TERM_REF_PICS_SPS); 187 WRITE_PPS(!!(sps->flags & V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED), 188 SPS_TEMPORAL_MVP_ENABLED_FLAG); 189 WRITE_PPS(!!(sps->flags & V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED), 190 STRONG_INTRA_SMOOTHING_ENABLED_FLAG); 191 192 /* write pps */ 193 WRITE_PPS(pps->pic_parameter_set_id, PIC_PARAMETER_SET_ID); 194 WRITE_PPS(sps->seq_parameter_set_id, PPS_SEQ_PARAMETER_SET_ID); 195 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED), 196 DEPENDENT_SLICE_SEGMENTS_ENABLED_FLAG); 197 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT), 198 OUTPUT_FLAG_PRESENT_FLAG); 199 WRITE_PPS(pps->num_extra_slice_header_bits, NUM_EXTRA_SLICE_HEADER_BITS); 200 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED), 201 SIGN_DATA_HIDING_ENABLED_FLAG); 202 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT), 203 CABAC_INIT_PRESENT_FLAG); 204 WRITE_PPS(pps->num_ref_idx_l0_default_active_minus1 + 1, 205 NUM_REF_IDX_L0_DEFAULT_ACTIVE); 206 WRITE_PPS(pps->num_ref_idx_l1_default_active_minus1 + 1, 207 NUM_REF_IDX_L1_DEFAULT_ACTIVE); 208 WRITE_PPS(pps->init_qp_minus26, INIT_QP_MINUS26); 209 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED), 210 CONSTRAINED_INTRA_PRED_FLAG); 211 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED), 212 TRANSFORM_SKIP_ENABLED_FLAG); 213 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED), 214 CU_QP_DELTA_ENABLED_FLAG); 215 216 min_cb_log2_size_y = sps->log2_min_luma_coding_block_size_minus3 + 3; 217 ctb_log2_size_y = min_cb_log2_size_y + 218 sps->log2_diff_max_min_luma_coding_block_size; 219 ctb_size_y = 1 << ctb_log2_size_y; 220 log2_min_cu_qp_delta_size = ctb_log2_size_y - pps->diff_cu_qp_delta_depth; 221 WRITE_PPS(log2_min_cu_qp_delta_size, LOG2_MIN_CU_QP_DELTA_SIZE); 222 WRITE_PPS(pps->pps_cb_qp_offset, PPS_CB_QP_OFFSET); 223 WRITE_PPS(pps->pps_cr_qp_offset, PPS_CR_QP_OFFSET); 224 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT), 225 PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT_FLAG); 226 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED), 227 WEIGHTED_PRED_FLAG); 228 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED), 229 WEIGHTED_BIPRED_FLAG); 230 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED), 231 TRANSQUANT_BYPASS_ENABLED_FLAG); 232 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED), 233 TILES_ENABLED_FLAG); 234 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED), 235 ENTROPY_CODING_SYNC_ENABLED_FLAG); 236 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED), 237 PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED_FLAG); 238 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED), 239 LOOP_FILTER_ACROSS_TILES_ENABLED_FLAG); 240 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED), 241 DEBLOCKING_FILTER_OVERRIDE_ENABLED_FLAG); 242 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER), 243 PPS_DEBLOCKING_FILTER_DISABLED_FLAG); 244 WRITE_PPS(pps->pps_beta_offset_div2, PPS_BETA_OFFSET_DIV2); 245 WRITE_PPS(pps->pps_tc_offset_div2, PPS_TC_OFFSET_DIV2); 246 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT), 247 LISTS_MODIFICATION_PRESENT_FLAG); 248 WRITE_PPS(pps->log2_parallel_merge_level_minus2 + 2, LOG2_PARALLEL_MERGE_LEVEL); 249 WRITE_PPS(!!(pps->flags & V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT), 250 SLICE_SEGMENT_HEADER_EXTENSION_PRESENT_FLAG); 251 WRITE_PPS(pps->num_tile_columns_minus1 + 1, NUM_TILE_COLUMNS); 252 WRITE_PPS(pps->num_tile_rows_minus1 + 1, NUM_TILE_ROWS); 253 254 if (pps->flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED) { 255 /* Userspace also provide column width and row height for uniform spacing */ 256 for (i = 0; i <= pps->num_tile_columns_minus1; i++) 257 WRITE_PPS(pps->column_width_minus1[i], COLUMN_WIDTH(i)); 258 for (i = 0; i <= pps->num_tile_rows_minus1; i++) 259 WRITE_PPS(pps->row_height_minus1[i], ROW_HEIGHT(i)); 260 } else { 261 WRITE_PPS(((sps->pic_width_in_luma_samples + ctb_size_y - 1) / ctb_size_y) - 1, 262 COLUMN_WIDTH(0)); 263 WRITE_PPS(((sps->pic_height_in_luma_samples + ctb_size_y - 1) / ctb_size_y) - 1, 264 ROW_HEIGHT(0)); 265 } 266 267 scaling_distance = offsetof(struct rkvdec_hevc_priv_tbl, scaling_list); 268 scaling_list_address = hevc_ctx->priv_tbl.dma + scaling_distance; 269 WRITE_PPS(scaling_list_address, SCALING_LIST_ADDRESS); 270 } 271 272 /* 273 * Creation of the Reference Picture Set memory blob for the hardware. 274 * The layout looks like this: 275 * [0] 32 bits for L0 (6 references + 2 bits of the 7th reference) 276 * [1] 32 bits for L0 (remaining 3 bits of the 7th reference + 5 references 277 * + 4 bits of the 13th reference) 278 * [2] 11 bits for L0 (remaining bit for 13 and 2 references) and 279 * 21 bits for L1 (4 references + first bit of 5) 280 * [3] 32 bits of padding with 0s 281 * [4] 32 bits for L1 (remaining 4 bits for 5 + 5 references + 3 bits of 11) 282 * [5] 22 bits for L1 (remaining 2 bits of 11 and 4 references) 283 * lowdelay flag (bit 23), rps bit offset long term (bit 24 - 32) 284 * [6] rps bit offset long term (bit 1 - 3), rps bit offset short term (bit 4 - 12) 285 * number of references (bit 13 - 16), remaining 16 bits of padding with 0s 286 * [7] 32 bits of padding with 0s 287 * 288 * Thus we have to set up padding in between reference 5 of the L1 list. 289 */ 290 static void assemble_sw_rps(struct rkvdec_ctx *ctx, 291 struct rkvdec_hevc_run *run) 292 { 293 const struct v4l2_ctrl_hevc_decode_params *decode_params = run->decode_params; 294 const struct v4l2_ctrl_hevc_sps *sps = run->sps; 295 const struct v4l2_ctrl_hevc_slice_params *sl_params; 296 const struct v4l2_hevc_dpb_entry *dpb; 297 struct rkvdec_hevc_ctx *hevc_ctx = ctx->priv; 298 struct rkvdec_hevc_priv_tbl *priv_tbl = hevc_ctx->priv_tbl.cpu; 299 struct rkvdec_rps_packet *hw_ps; 300 int i, j; 301 unsigned int lowdelay; 302 303 #define WRITE_RPS(value, field) rkvdec_set_bw_field(hw_ps->info, field, value) 304 305 #define REF_PIC_LONG_TERM_L0(n) BW_FIELD((n) * 5, 1) 306 #define REF_PIC_IDX_L0(n) BW_FIELD(1 + ((n) * 5), 4) 307 #define REF_PIC_LONG_TERM_L1(n) BW_FIELD(((n) < 5 ? 75 : 132) + ((n) * 5), 1) 308 #define REF_PIC_IDX_L1(n) BW_FIELD(((n) < 4 ? 76 : 128) + ((n) * 5), 4) 309 310 #define LOWDELAY BW_FIELD(182, 1) 311 #define LONG_TERM_RPS_BIT_OFFSET BW_FIELD(183, 10) 312 #define SHORT_TERM_RPS_BIT_OFFSET BW_FIELD(193, 9) 313 #define NUM_RPS_POC BW_FIELD(202, 4) 314 315 for (j = 0; j < run->num_slices; j++) { 316 uint st_bit_offset = 0; 317 uint num_l0_refs = 0; 318 uint num_l1_refs = 0; 319 320 sl_params = &run->slices_params[j]; 321 dpb = decode_params->dpb; 322 323 if (sl_params->slice_type != V4L2_HEVC_SLICE_TYPE_I) { 324 num_l0_refs = sl_params->num_ref_idx_l0_active_minus1 + 1; 325 326 if (sl_params->slice_type == V4L2_HEVC_SLICE_TYPE_B) 327 num_l1_refs = sl_params->num_ref_idx_l1_active_minus1 + 1; 328 329 lowdelay = 1; 330 } else { 331 lowdelay = 0; 332 } 333 334 hw_ps = &priv_tbl->rps[j]; 335 memset(hw_ps, 0, sizeof(*hw_ps)); 336 337 for (i = 0; i < num_l0_refs; i++) { 338 const struct v4l2_hevc_dpb_entry dpb_l0 = dpb[sl_params->ref_idx_l0[i]]; 339 340 WRITE_RPS(!!(dpb_l0.flags & V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE), 341 REF_PIC_LONG_TERM_L0(i)); 342 WRITE_RPS(sl_params->ref_idx_l0[i], REF_PIC_IDX_L0(i)); 343 344 if (dpb_l0.pic_order_cnt_val > sl_params->slice_pic_order_cnt) 345 lowdelay = 0; 346 } 347 348 for (i = 0; i < num_l1_refs; i++) { 349 const struct v4l2_hevc_dpb_entry dpb_l1 = dpb[sl_params->ref_idx_l1[i]]; 350 int is_long_term = 351 !!(dpb_l1.flags & V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE); 352 353 WRITE_RPS(is_long_term, REF_PIC_LONG_TERM_L1(i)); 354 WRITE_RPS(sl_params->ref_idx_l1[i], REF_PIC_IDX_L1(i)); 355 356 if (dpb_l1.pic_order_cnt_val > sl_params->slice_pic_order_cnt) 357 lowdelay = 0; 358 } 359 360 WRITE_RPS(lowdelay, LOWDELAY); 361 362 if (!(decode_params->flags & V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC)) { 363 if (sl_params->short_term_ref_pic_set_size) 364 st_bit_offset = sl_params->short_term_ref_pic_set_size; 365 else if (sps->num_short_term_ref_pic_sets > 1) 366 st_bit_offset = fls(sps->num_short_term_ref_pic_sets - 1); 367 } 368 369 WRITE_RPS(st_bit_offset + sl_params->long_term_ref_pic_set_size, 370 LONG_TERM_RPS_BIT_OFFSET); 371 WRITE_RPS(sl_params->short_term_ref_pic_set_size, 372 SHORT_TERM_RPS_BIT_OFFSET); 373 374 WRITE_RPS(decode_params->num_poc_st_curr_before + 375 decode_params->num_poc_st_curr_after + 376 decode_params->num_poc_lt_curr, 377 NUM_RPS_POC); 378 } 379 } 380 381 static void config_registers(struct rkvdec_ctx *ctx, 382 struct rkvdec_hevc_run *run) 383 { 384 struct rkvdec_dev *rkvdec = ctx->dev; 385 const struct v4l2_ctrl_hevc_decode_params *decode_params = run->decode_params; 386 const struct v4l2_ctrl_hevc_sps *sps = run->sps; 387 const struct v4l2_ctrl_hevc_slice_params *sl_params = &run->slices_params[0]; 388 const struct v4l2_hevc_dpb_entry *dpb = decode_params->dpb; 389 struct rkvdec_hevc_ctx *hevc_ctx = ctx->priv; 390 struct rkvdec_regs *regs = &hevc_ctx->regs; 391 dma_addr_t priv_start_addr = hevc_ctx->priv_tbl.dma; 392 const struct v4l2_pix_format_mplane *dst_fmt; 393 struct vb2_v4l2_buffer *src_buf = run->base.bufs.src; 394 struct vb2_v4l2_buffer *dst_buf = run->base.bufs.dst; 395 const struct v4l2_format *f; 396 dma_addr_t rlc_addr; 397 dma_addr_t refer_addr; 398 u32 rlc_len; 399 u32 hor_virstride; 400 u32 ver_virstride; 401 u32 y_virstride; 402 u32 yuv_virstride = 0; 403 u32 offset; 404 dma_addr_t dst_addr; 405 u32 reg, i; 406 407 memset(regs, 0, sizeof(*regs)); 408 409 regs->common.reg02.dec_mode = RKVDEC_MODE_HEVC; 410 411 f = &ctx->decoded_fmt; 412 dst_fmt = &f->fmt.pix_mp; 413 hor_virstride = dst_fmt->plane_fmt[0].bytesperline; 414 ver_virstride = dst_fmt->height; 415 y_virstride = hor_virstride * ver_virstride; 416 417 if (sps->chroma_format_idc == 0) 418 yuv_virstride = y_virstride; 419 else if (sps->chroma_format_idc == 1) 420 yuv_virstride = y_virstride + y_virstride / 2; 421 else if (sps->chroma_format_idc == 2) 422 yuv_virstride = 2 * y_virstride; 423 424 regs->common.reg03.slice_num_lowbits = run->num_slices; 425 regs->common.reg03.uv_hor_virstride = hor_virstride / 16; 426 regs->common.reg03.y_hor_virstride = hor_virstride / 16; 427 428 /* config rlc base address */ 429 rlc_addr = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0); 430 regs->common.strm_rlc_base = rlc_addr; 431 432 rlc_len = vb2_get_plane_payload(&src_buf->vb2_buf, 0); 433 regs->common.stream_len = round_up(rlc_len, 16) + 64; 434 435 /* config cabac table */ 436 offset = offsetof(struct rkvdec_hevc_priv_tbl, cabac_table); 437 regs->common.cabactbl_base = priv_start_addr + offset; 438 439 /* config output base address */ 440 dst_addr = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0); 441 regs->common.decout_base = dst_addr; 442 443 regs->common.reg08.y_virstride = y_virstride / 16; 444 regs->common.reg09.yuv_virstride = yuv_virstride / 16; 445 446 /* config ref pic address */ 447 for (i = 0; i < 15; i++) { 448 struct vb2_buffer *vb_buf = get_ref_buf(ctx, run, i); 449 450 if (i < 4 && decode_params->num_active_dpb_entries) { 451 reg = GENMASK(decode_params->num_active_dpb_entries - 1, 0); 452 reg = (reg >> (i * 4)) & 0xf; 453 } else { 454 reg = 0; 455 } 456 457 refer_addr = vb2_dma_contig_plane_dma_addr(vb_buf, 0); 458 459 regs->h26x.ref0_14_base[i].base_addr = refer_addr >> 4; 460 regs->h26x.ref0_14_base[i].field_ref = !!(reg & 1); 461 regs->h26x.ref0_14_base[i].topfield_used_ref = !!(reg & 2); 462 regs->h26x.ref0_14_base[i].botfield_used_ref = !!(reg & 4); 463 regs->h26x.ref0_14_base[i].colmv_use_flag_ref = !!(reg & 8); 464 465 regs->h26x.ref0_14_poc[i] = i < decode_params->num_active_dpb_entries 466 ? dpb[i].pic_order_cnt_val 467 : 0; 468 } 469 470 regs->h26x.cur_poc = sl_params->slice_pic_order_cnt; 471 472 /* config hw pps address */ 473 offset = offsetof(struct rkvdec_hevc_priv_tbl, param_set); 474 regs->h26x.pps_base = priv_start_addr + offset; 475 476 /* config hw rps address */ 477 offset = offsetof(struct rkvdec_hevc_priv_tbl, rps); 478 regs->h26x.rps_base = priv_start_addr + offset; 479 480 rkvdec_memcpy_toio(rkvdec->regs, regs, 481 MIN(sizeof(*regs), sizeof(u32) * rkvdec->variant->num_regs)); 482 } 483 484 static int rkvdec_hevc_validate_sps(struct rkvdec_ctx *ctx, 485 const struct v4l2_ctrl_hevc_sps *sps) 486 { 487 if (sps->chroma_format_idc > 1) 488 /* Only 4:0:0 and 4:2:0 are supported */ 489 return -EINVAL; 490 if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8) 491 /* Luma and chroma bit depth mismatch */ 492 return -EINVAL; 493 if (sps->bit_depth_luma_minus8 != 0 && sps->bit_depth_luma_minus8 != 2) 494 /* Only 8-bit and 10-bit are supported */ 495 return -EINVAL; 496 497 if (sps->pic_width_in_luma_samples > ctx->coded_fmt.fmt.pix_mp.width || 498 sps->pic_height_in_luma_samples > ctx->coded_fmt.fmt.pix_mp.height) 499 return -EINVAL; 500 501 return 0; 502 } 503 504 static int rkvdec_hevc_start(struct rkvdec_ctx *ctx) 505 { 506 struct rkvdec_dev *rkvdec = ctx->dev; 507 struct rkvdec_hevc_priv_tbl *priv_tbl; 508 struct rkvdec_hevc_ctx *hevc_ctx; 509 510 hevc_ctx = kzalloc_obj(*hevc_ctx); 511 if (!hevc_ctx) 512 return -ENOMEM; 513 514 priv_tbl = dma_alloc_coherent(rkvdec->dev, sizeof(*priv_tbl), 515 &hevc_ctx->priv_tbl.dma, GFP_KERNEL); 516 if (!priv_tbl) { 517 kfree(hevc_ctx); 518 return -ENOMEM; 519 } 520 521 hevc_ctx->priv_tbl.size = sizeof(*priv_tbl); 522 hevc_ctx->priv_tbl.cpu = priv_tbl; 523 memcpy(priv_tbl->cabac_table, rkvdec_hevc_cabac_table, 524 sizeof(rkvdec_hevc_cabac_table)); 525 526 ctx->priv = hevc_ctx; 527 return 0; 528 } 529 530 static void rkvdec_hevc_stop(struct rkvdec_ctx *ctx) 531 { 532 struct rkvdec_hevc_ctx *hevc_ctx = ctx->priv; 533 struct rkvdec_dev *rkvdec = ctx->dev; 534 535 dma_free_coherent(rkvdec->dev, hevc_ctx->priv_tbl.size, 536 hevc_ctx->priv_tbl.cpu, hevc_ctx->priv_tbl.dma); 537 kfree(hevc_ctx); 538 } 539 540 static int rkvdec_hevc_run(struct rkvdec_ctx *ctx) 541 { 542 struct rkvdec_dev *rkvdec = ctx->dev; 543 struct rkvdec_hevc_run run; 544 struct rkvdec_hevc_ctx *hevc_ctx = ctx->priv; 545 struct rkvdec_hevc_priv_tbl *tbl = hevc_ctx->priv_tbl.cpu; 546 u32 reg; 547 548 rkvdec_hevc_run_preamble(ctx, &run); 549 550 rkvdec_hevc_assemble_hw_scaling_list(ctx, &run, &tbl->scaling_list, 551 &hevc_ctx->scaling_matrix_cache); 552 assemble_hw_pps(ctx, &run); 553 assemble_sw_rps(ctx, &run); 554 config_registers(ctx, &run); 555 556 rkvdec_run_postamble(ctx, &run.base); 557 558 schedule_delayed_work(&rkvdec->watchdog_work, msecs_to_jiffies(2000)); 559 560 writel(1, rkvdec->regs + RKVDEC_REG_PREF_LUMA_CACHE_COMMAND); 561 writel(1, rkvdec->regs + RKVDEC_REG_PREF_CHR_CACHE_COMMAND); 562 563 if (rkvdec->variant->quirks & RKVDEC_QUIRK_DISABLE_QOS) 564 rkvdec_quirks_disable_qos(ctx); 565 566 /* Start decoding! */ 567 reg = (run.pps->flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED) ? 568 0 : RKVDEC_WR_DDR_ALIGN_EN; 569 writel(RKVDEC_INTERRUPT_DEC_E | RKVDEC_CONFIG_DEC_CLK_GATE_E | 570 RKVDEC_TIMEOUT_E | RKVDEC_BUF_EMPTY_E | reg, 571 rkvdec->regs + RKVDEC_REG_INTERRUPT); 572 573 return 0; 574 } 575 576 static int rkvdec_hevc_try_ctrl(struct rkvdec_ctx *ctx, struct v4l2_ctrl *ctrl) 577 { 578 if (ctrl->id == V4L2_CID_STATELESS_HEVC_SPS) 579 return rkvdec_hevc_validate_sps(ctx, ctrl->p_new.p_hevc_sps); 580 581 return 0; 582 } 583 584 const struct rkvdec_coded_fmt_ops rkvdec_hevc_fmt_ops = { 585 .adjust_fmt = rkvdec_hevc_adjust_fmt, 586 .start = rkvdec_hevc_start, 587 .stop = rkvdec_hevc_stop, 588 .run = rkvdec_hevc_run, 589 .try_ctrl = rkvdec_hevc_try_ctrl, 590 .get_image_fmt = rkvdec_hevc_get_image_fmt, 591 }; 592