1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Cedrus VPU driver 4 * 5 * Copyright (c) 2013 Jens Kuske <jenskuske@gmail.com> 6 * Copyright (c) 2018 Bootlin 7 */ 8 9 #include <linux/delay.h> 10 #include <linux/types.h> 11 12 #include <media/videobuf2-dma-contig.h> 13 14 #include "cedrus.h" 15 #include "cedrus_hw.h" 16 #include "cedrus_regs.h" 17 18 enum cedrus_h264_sram_off { 19 CEDRUS_SRAM_H264_PRED_WEIGHT_TABLE = 0x000, 20 CEDRUS_SRAM_H264_FRAMEBUFFER_LIST = 0x100, 21 CEDRUS_SRAM_H264_REF_LIST_0 = 0x190, 22 CEDRUS_SRAM_H264_REF_LIST_1 = 0x199, 23 CEDRUS_SRAM_H264_SCALING_LIST_8x8_0 = 0x200, 24 CEDRUS_SRAM_H264_SCALING_LIST_8x8_1 = 0x210, 25 CEDRUS_SRAM_H264_SCALING_LIST_4x4 = 0x220, 26 }; 27 28 struct cedrus_h264_sram_ref_pic { 29 __le32 top_field_order_cnt; 30 __le32 bottom_field_order_cnt; 31 __le32 frame_info; 32 __le32 luma_ptr; 33 __le32 chroma_ptr; 34 __le32 mv_col_top_ptr; 35 __le32 mv_col_bot_ptr; 36 __le32 reserved; 37 } __packed; 38 39 #define CEDRUS_H264_FRAME_NUM 18 40 41 #define CEDRUS_NEIGHBOR_INFO_BUF_SIZE (32 * SZ_1K) 42 #define CEDRUS_MIN_PIC_INFO_BUF_SIZE (130 * SZ_1K) 43 44 static void cedrus_h264_write_sram(struct cedrus_dev *dev, 45 enum cedrus_h264_sram_off off, 46 const void *data, size_t len) 47 { 48 const u32 *buffer = data; 49 size_t count = DIV_ROUND_UP(len, 4); 50 51 cedrus_write(dev, VE_AVC_SRAM_PORT_OFFSET, off << 2); 52 53 while (count--) 54 cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, *buffer++); 55 } 56 57 static dma_addr_t cedrus_h264_mv_col_buf_addr(struct cedrus_buffer *buf, 58 unsigned int field) 59 { 60 dma_addr_t addr = buf->codec.h264.mv_col_buf_dma; 61 62 /* Adjust for the field */ 63 addr += field * buf->codec.h264.mv_col_buf_size / 2; 64 65 return addr; 66 } 67 68 static void cedrus_fill_ref_pic(struct cedrus_ctx *ctx, 69 struct cedrus_buffer *buf, 70 unsigned int top_field_order_cnt, 71 unsigned int bottom_field_order_cnt, 72 struct cedrus_h264_sram_ref_pic *pic) 73 { 74 struct vb2_buffer *vbuf = &buf->m2m_buf.vb.vb2_buf; 75 76 pic->top_field_order_cnt = cpu_to_le32(top_field_order_cnt); 77 pic->bottom_field_order_cnt = cpu_to_le32(bottom_field_order_cnt); 78 pic->frame_info = cpu_to_le32(buf->codec.h264.pic_type << 8); 79 80 pic->luma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 0)); 81 pic->chroma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 1)); 82 pic->mv_col_top_ptr = cpu_to_le32(cedrus_h264_mv_col_buf_addr(buf, 0)); 83 pic->mv_col_bot_ptr = cpu_to_le32(cedrus_h264_mv_col_buf_addr(buf, 1)); 84 } 85 86 static int cedrus_write_frame_list(struct cedrus_ctx *ctx, 87 struct cedrus_run *run) 88 { 89 struct cedrus_h264_sram_ref_pic pic_list[CEDRUS_H264_FRAME_NUM]; 90 const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params; 91 const struct v4l2_ctrl_h264_sps *sps = run->h264.sps; 92 struct vb2_queue *cap_q; 93 struct cedrus_buffer *output_buf; 94 struct cedrus_dev *dev = ctx->dev; 95 unsigned long used_dpbs = 0; 96 unsigned int position; 97 int output = -1; 98 unsigned int i; 99 100 cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); 101 102 memset(pic_list, 0, sizeof(pic_list)); 103 104 for (i = 0; i < ARRAY_SIZE(decode->dpb); i++) { 105 const struct v4l2_h264_dpb_entry *dpb = &decode->dpb[i]; 106 struct cedrus_buffer *cedrus_buf; 107 struct vb2_buffer *buf; 108 109 if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_VALID)) 110 continue; 111 112 buf = vb2_find_buffer(cap_q, dpb->reference_ts); 113 if (!buf) 114 continue; 115 116 cedrus_buf = vb2_to_cedrus_buffer(buf); 117 position = cedrus_buf->codec.h264.position; 118 used_dpbs |= BIT(position); 119 120 if (run->dst->vb2_buf.timestamp == dpb->reference_ts) { 121 output = position; 122 continue; 123 } 124 125 if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) 126 continue; 127 128 cedrus_fill_ref_pic(ctx, cedrus_buf, 129 dpb->top_field_order_cnt, 130 dpb->bottom_field_order_cnt, 131 &pic_list[position]); 132 } 133 134 if (output >= 0) 135 position = output; 136 else 137 position = find_first_zero_bit(&used_dpbs, CEDRUS_H264_FRAME_NUM); 138 139 output_buf = vb2_to_cedrus_buffer(&run->dst->vb2_buf); 140 output_buf->codec.h264.position = position; 141 142 if (!output_buf->codec.h264.mv_col_buf_size) { 143 const struct v4l2_ctrl_h264_sps *sps = run->h264.sps; 144 unsigned int field_size; 145 146 field_size = DIV_ROUND_UP(ctx->src_fmt.width, 16) * 147 DIV_ROUND_UP(ctx->src_fmt.height, 16) * 16; 148 if (!(sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE)) 149 field_size = field_size * 2; 150 if (!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY)) 151 field_size = field_size * 2; 152 153 output_buf->codec.h264.mv_col_buf_size = field_size * 2; 154 /* Buffer is never accessed by CPU, so we can skip kernel mapping. */ 155 output_buf->codec.h264.mv_col_buf = 156 dma_alloc_attrs(dev->dev, 157 output_buf->codec.h264.mv_col_buf_size, 158 &output_buf->codec.h264.mv_col_buf_dma, 159 GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); 160 161 if (!output_buf->codec.h264.mv_col_buf) { 162 output_buf->codec.h264.mv_col_buf_size = 0; 163 return -ENOMEM; 164 } 165 } 166 167 if (decode->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) 168 output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_FIELD; 169 else if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD) 170 output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_MBAFF; 171 else 172 output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_FRAME; 173 174 cedrus_fill_ref_pic(ctx, output_buf, 175 decode->top_field_order_cnt, 176 decode->bottom_field_order_cnt, 177 &pic_list[position]); 178 179 cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_FRAMEBUFFER_LIST, 180 pic_list, sizeof(pic_list)); 181 182 cedrus_write(dev, VE_H264_OUTPUT_FRAME_IDX, position); 183 184 return 0; 185 } 186 187 #define CEDRUS_MAX_REF_IDX 32 188 189 static void _cedrus_write_ref_list(struct cedrus_ctx *ctx, 190 struct cedrus_run *run, 191 const struct v4l2_h264_reference *ref_list, 192 u8 num_ref, enum cedrus_h264_sram_off sram) 193 { 194 const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params; 195 struct vb2_queue *cap_q; 196 struct cedrus_dev *dev = ctx->dev; 197 u8 sram_array[CEDRUS_MAX_REF_IDX]; 198 unsigned int i; 199 size_t size; 200 201 cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); 202 203 memset(sram_array, 0, sizeof(sram_array)); 204 205 for (i = 0; i < num_ref; i++) { 206 const struct v4l2_h264_dpb_entry *dpb; 207 const struct cedrus_buffer *cedrus_buf; 208 unsigned int position; 209 struct vb2_buffer *buf; 210 u8 dpb_idx; 211 212 dpb_idx = ref_list[i].index; 213 if (dpb_idx >= V4L2_H264_NUM_DPB_ENTRIES) 214 continue; 215 216 dpb = &decode->dpb[dpb_idx]; 217 218 if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) 219 continue; 220 221 buf = vb2_find_buffer(cap_q, dpb->reference_ts); 222 if (!buf) 223 continue; 224 225 cedrus_buf = vb2_to_cedrus_buffer(buf); 226 position = cedrus_buf->codec.h264.position; 227 228 sram_array[i] |= position << 1; 229 if (ref_list[i].fields == V4L2_H264_BOTTOM_FIELD_REF) 230 sram_array[i] |= BIT(0); 231 } 232 233 size = min_t(size_t, ALIGN(num_ref, 4), sizeof(sram_array)); 234 cedrus_h264_write_sram(dev, sram, &sram_array, size); 235 } 236 237 static void cedrus_write_ref_list0(struct cedrus_ctx *ctx, 238 struct cedrus_run *run) 239 { 240 const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params; 241 242 _cedrus_write_ref_list(ctx, run, 243 slice->ref_pic_list0, 244 slice->num_ref_idx_l0_active_minus1 + 1, 245 CEDRUS_SRAM_H264_REF_LIST_0); 246 } 247 248 static void cedrus_write_ref_list1(struct cedrus_ctx *ctx, 249 struct cedrus_run *run) 250 { 251 const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params; 252 253 _cedrus_write_ref_list(ctx, run, 254 slice->ref_pic_list1, 255 slice->num_ref_idx_l1_active_minus1 + 1, 256 CEDRUS_SRAM_H264_REF_LIST_1); 257 } 258 259 static void cedrus_write_scaling_lists(struct cedrus_ctx *ctx, 260 struct cedrus_run *run) 261 { 262 const struct v4l2_ctrl_h264_scaling_matrix *scaling = 263 run->h264.scaling_matrix; 264 const struct v4l2_ctrl_h264_pps *pps = run->h264.pps; 265 struct cedrus_dev *dev = ctx->dev; 266 267 if (!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT)) 268 return; 269 270 cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_0, 271 scaling->scaling_list_8x8[0], 272 sizeof(scaling->scaling_list_8x8[0])); 273 274 cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_1, 275 scaling->scaling_list_8x8[1], 276 sizeof(scaling->scaling_list_8x8[1])); 277 278 cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_4x4, 279 scaling->scaling_list_4x4, 280 sizeof(scaling->scaling_list_4x4)); 281 } 282 283 static void cedrus_write_pred_weight_table(struct cedrus_ctx *ctx, 284 struct cedrus_run *run) 285 { 286 const struct v4l2_ctrl_h264_pred_weights *pred_weight = 287 run->h264.pred_weights; 288 struct cedrus_dev *dev = ctx->dev; 289 int i, j, k; 290 291 cedrus_write(dev, VE_H264_SHS_WP, 292 ((pred_weight->chroma_log2_weight_denom & 0x7) << 4) | 293 ((pred_weight->luma_log2_weight_denom & 0x7) << 0)); 294 295 cedrus_write(dev, VE_AVC_SRAM_PORT_OFFSET, 296 CEDRUS_SRAM_H264_PRED_WEIGHT_TABLE << 2); 297 298 for (i = 0; i < ARRAY_SIZE(pred_weight->weight_factors); i++) { 299 const struct v4l2_h264_weight_factors *factors = 300 &pred_weight->weight_factors[i]; 301 302 for (j = 0; j < ARRAY_SIZE(factors->luma_weight); j++) { 303 u32 val; 304 305 val = (((u32)factors->luma_offset[j] & 0x1ff) << 16) | 306 (factors->luma_weight[j] & 0x1ff); 307 cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, val); 308 } 309 310 for (j = 0; j < ARRAY_SIZE(factors->chroma_weight); j++) { 311 for (k = 0; k < ARRAY_SIZE(factors->chroma_weight[0]); k++) { 312 u32 val; 313 314 val = (((u32)factors->chroma_offset[j][k] & 0x1ff) << 16) | 315 (factors->chroma_weight[j][k] & 0x1ff); 316 cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, val); 317 } 318 } 319 } 320 } 321 322 /* 323 * It turns out that using VE_H264_VLD_OFFSET to skip bits is not reliable. In 324 * rare cases frame is not decoded correctly. However, setting offset to 0 and 325 * skipping appropriate amount of bits with flush bits trigger always works. 326 */ 327 static void cedrus_skip_bits(struct cedrus_dev *dev, int num) 328 { 329 int count = 0; 330 331 while (count < num) { 332 int tmp = min(num - count, 32); 333 334 cedrus_write(dev, VE_H264_TRIGGER_TYPE, 335 VE_H264_TRIGGER_TYPE_FLUSH_BITS | 336 VE_H264_TRIGGER_TYPE_N_BITS(tmp)); 337 while (cedrus_read(dev, VE_H264_STATUS) & VE_H264_STATUS_VLD_BUSY) 338 udelay(1); 339 340 count += tmp; 341 } 342 } 343 344 static void cedrus_set_params(struct cedrus_ctx *ctx, 345 struct cedrus_run *run) 346 { 347 const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params; 348 const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params; 349 const struct v4l2_ctrl_h264_pps *pps = run->h264.pps; 350 const struct v4l2_ctrl_h264_sps *sps = run->h264.sps; 351 struct vb2_buffer *src_buf = &run->src->vb2_buf; 352 struct cedrus_dev *dev = ctx->dev; 353 dma_addr_t src_buf_addr; 354 size_t slice_bytes = vb2_get_plane_payload(src_buf, 0); 355 unsigned int pic_width_in_mbs; 356 bool mbaff_pic; 357 u32 reg; 358 359 cedrus_write(dev, VE_H264_VLD_LEN, slice_bytes * 8); 360 cedrus_write(dev, VE_H264_VLD_OFFSET, 0); 361 362 src_buf_addr = vb2_dma_contig_plane_dma_addr(src_buf, 0); 363 cedrus_write(dev, VE_H264_VLD_END, src_buf_addr + slice_bytes); 364 cedrus_write(dev, VE_H264_VLD_ADDR, 365 VE_H264_VLD_ADDR_VAL(src_buf_addr) | 366 VE_H264_VLD_ADDR_FIRST | VE_H264_VLD_ADDR_VALID | 367 VE_H264_VLD_ADDR_LAST); 368 369 if (ctx->src_fmt.width > 2048) { 370 cedrus_write(dev, VE_BUF_CTRL, 371 VE_BUF_CTRL_INTRAPRED_MIXED_RAM | 372 VE_BUF_CTRL_DBLK_MIXED_RAM); 373 cedrus_write(dev, VE_DBLK_DRAM_BUF_ADDR, 374 ctx->codec.h264.deblk_buf_dma); 375 cedrus_write(dev, VE_INTRAPRED_DRAM_BUF_ADDR, 376 ctx->codec.h264.intra_pred_buf_dma); 377 } else { 378 cedrus_write(dev, VE_BUF_CTRL, 379 VE_BUF_CTRL_INTRAPRED_INT_SRAM | 380 VE_BUF_CTRL_DBLK_INT_SRAM); 381 } 382 383 /* 384 * FIXME: Since the bitstream parsing is done in software, and 385 * in userspace, this shouldn't be needed anymore. But it 386 * turns out that removing it breaks the decoding process, 387 * without any clear indication why. 388 */ 389 cedrus_write(dev, VE_H264_TRIGGER_TYPE, 390 VE_H264_TRIGGER_TYPE_INIT_SWDEC); 391 392 cedrus_skip_bits(dev, slice->header_bit_size); 393 394 if (V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED(pps, slice)) 395 cedrus_write_pred_weight_table(ctx, run); 396 397 if ((slice->slice_type == V4L2_H264_SLICE_TYPE_P) || 398 (slice->slice_type == V4L2_H264_SLICE_TYPE_SP) || 399 (slice->slice_type == V4L2_H264_SLICE_TYPE_B)) 400 cedrus_write_ref_list0(ctx, run); 401 402 if (slice->slice_type == V4L2_H264_SLICE_TYPE_B) 403 cedrus_write_ref_list1(ctx, run); 404 405 // picture parameters 406 reg = 0; 407 /* 408 * FIXME: the kernel headers are allowing the default value to 409 * be passed, but the libva doesn't give us that. 410 */ 411 reg |= (slice->num_ref_idx_l0_active_minus1 & 0x1f) << 10; 412 reg |= (slice->num_ref_idx_l1_active_minus1 & 0x1f) << 5; 413 reg |= (pps->weighted_bipred_idc & 0x3) << 2; 414 if (pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE) 415 reg |= VE_H264_PPS_ENTROPY_CODING_MODE; 416 if (pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED) 417 reg |= VE_H264_PPS_WEIGHTED_PRED; 418 if (pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED) 419 reg |= VE_H264_PPS_CONSTRAINED_INTRA_PRED; 420 if (pps->flags & V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE) 421 reg |= VE_H264_PPS_TRANSFORM_8X8_MODE; 422 cedrus_write(dev, VE_H264_PPS, reg); 423 424 // sequence parameters 425 reg = 0; 426 reg |= (sps->chroma_format_idc & 0x7) << 19; 427 reg |= (sps->pic_width_in_mbs_minus1 & 0xff) << 8; 428 reg |= sps->pic_height_in_map_units_minus1 & 0xff; 429 if (sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY) 430 reg |= VE_H264_SPS_MBS_ONLY; 431 if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD) 432 reg |= VE_H264_SPS_MB_ADAPTIVE_FRAME_FIELD; 433 if (sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE) 434 reg |= VE_H264_SPS_DIRECT_8X8_INFERENCE; 435 cedrus_write(dev, VE_H264_SPS, reg); 436 437 mbaff_pic = !(decode->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) && 438 (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD); 439 pic_width_in_mbs = sps->pic_width_in_mbs_minus1 + 1; 440 441 // slice parameters 442 reg = 0; 443 reg |= ((slice->first_mb_in_slice % pic_width_in_mbs) & 0xff) << 24; 444 reg |= (((slice->first_mb_in_slice / pic_width_in_mbs) * 445 (mbaff_pic + 1)) & 0xff) << 16; 446 reg |= decode->nal_ref_idc ? BIT(12) : 0; 447 reg |= (slice->slice_type & 0xf) << 8; 448 reg |= slice->cabac_init_idc & 0x3; 449 if (ctx->fh.m2m_ctx->new_frame) 450 reg |= VE_H264_SHS_FIRST_SLICE_IN_PIC; 451 if (decode->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) 452 reg |= VE_H264_SHS_FIELD_PIC; 453 if (decode->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD) 454 reg |= VE_H264_SHS_BOTTOM_FIELD; 455 if (slice->flags & V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED) 456 reg |= VE_H264_SHS_DIRECT_SPATIAL_MV_PRED; 457 cedrus_write(dev, VE_H264_SHS, reg); 458 459 reg = 0; 460 reg |= VE_H264_SHS2_NUM_REF_IDX_ACTIVE_OVRD; 461 reg |= (slice->num_ref_idx_l0_active_minus1 & 0x1f) << 24; 462 reg |= (slice->num_ref_idx_l1_active_minus1 & 0x1f) << 16; 463 reg |= (slice->disable_deblocking_filter_idc & 0x3) << 8; 464 reg |= (slice->slice_alpha_c0_offset_div2 & 0xf) << 4; 465 reg |= slice->slice_beta_offset_div2 & 0xf; 466 cedrus_write(dev, VE_H264_SHS2, reg); 467 468 reg = 0; 469 reg |= (pps->second_chroma_qp_index_offset & 0x3f) << 16; 470 reg |= (pps->chroma_qp_index_offset & 0x3f) << 8; 471 reg |= (pps->pic_init_qp_minus26 + 26 + slice->slice_qp_delta) & 0x3f; 472 if (!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT)) 473 reg |= VE_H264_SHS_QP_SCALING_MATRIX_DEFAULT; 474 cedrus_write(dev, VE_H264_SHS_QP, reg); 475 476 // clear status flags 477 cedrus_write(dev, VE_H264_STATUS, cedrus_read(dev, VE_H264_STATUS)); 478 479 // enable int 480 cedrus_write(dev, VE_H264_CTRL, 481 VE_H264_CTRL_SLICE_DECODE_INT | 482 VE_H264_CTRL_DECODE_ERR_INT | 483 VE_H264_CTRL_VLD_DATA_REQ_INT); 484 } 485 486 static enum cedrus_irq_status 487 cedrus_h264_irq_status(struct cedrus_ctx *ctx) 488 { 489 struct cedrus_dev *dev = ctx->dev; 490 u32 reg = cedrus_read(dev, VE_H264_STATUS); 491 492 if (reg & (VE_H264_STATUS_DECODE_ERR_INT | 493 VE_H264_STATUS_VLD_DATA_REQ_INT)) 494 return CEDRUS_IRQ_ERROR; 495 496 if (reg & VE_H264_CTRL_SLICE_DECODE_INT) 497 return CEDRUS_IRQ_OK; 498 499 return CEDRUS_IRQ_NONE; 500 } 501 502 static void cedrus_h264_irq_clear(struct cedrus_ctx *ctx) 503 { 504 struct cedrus_dev *dev = ctx->dev; 505 506 cedrus_write(dev, VE_H264_STATUS, 507 VE_H264_STATUS_INT_MASK); 508 } 509 510 static void cedrus_h264_irq_disable(struct cedrus_ctx *ctx) 511 { 512 struct cedrus_dev *dev = ctx->dev; 513 u32 reg = cedrus_read(dev, VE_H264_CTRL); 514 515 cedrus_write(dev, VE_H264_CTRL, 516 reg & ~VE_H264_CTRL_INT_MASK); 517 } 518 519 static int cedrus_h264_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) 520 { 521 struct cedrus_dev *dev = ctx->dev; 522 int ret; 523 524 cedrus_engine_enable(ctx); 525 526 cedrus_write(dev, VE_H264_SDROT_CTRL, 0); 527 cedrus_write(dev, VE_H264_EXTRA_BUFFER1, 528 ctx->codec.h264.pic_info_buf_dma); 529 cedrus_write(dev, VE_H264_EXTRA_BUFFER2, 530 ctx->codec.h264.neighbor_info_buf_dma); 531 532 cedrus_write_scaling_lists(ctx, run); 533 ret = cedrus_write_frame_list(ctx, run); 534 if (ret) 535 return ret; 536 537 cedrus_set_params(ctx, run); 538 539 return 0; 540 } 541 542 static int cedrus_h264_start(struct cedrus_ctx *ctx) 543 { 544 struct cedrus_dev *dev = ctx->dev; 545 unsigned int pic_info_size; 546 int ret; 547 548 /* 549 * NOTE: All buffers allocated here are only used by HW, so we 550 * can add DMA_ATTR_NO_KERNEL_MAPPING flag when allocating them. 551 */ 552 553 /* Formula for picture buffer size is taken from CedarX source. */ 554 555 if (ctx->src_fmt.width > 2048) 556 pic_info_size = CEDRUS_H264_FRAME_NUM * 0x4000; 557 else 558 pic_info_size = CEDRUS_H264_FRAME_NUM * 0x1000; 559 560 /* 561 * FIXME: If V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY is set, 562 * there is no need to multiply by 2. 563 */ 564 pic_info_size += ctx->src_fmt.height * 2 * 64; 565 566 if (pic_info_size < CEDRUS_MIN_PIC_INFO_BUF_SIZE) 567 pic_info_size = CEDRUS_MIN_PIC_INFO_BUF_SIZE; 568 569 ctx->codec.h264.pic_info_buf_size = pic_info_size; 570 ctx->codec.h264.pic_info_buf = 571 dma_alloc_attrs(dev->dev, ctx->codec.h264.pic_info_buf_size, 572 &ctx->codec.h264.pic_info_buf_dma, 573 GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); 574 if (!ctx->codec.h264.pic_info_buf) 575 return -ENOMEM; 576 577 /* 578 * That buffer is supposed to be 16kiB in size, and be aligned 579 * on 16kiB as well. However, dma_alloc_attrs provides the 580 * guarantee that we'll have a DMA address aligned on the 581 * smallest page order that is greater to the requested size, 582 * so we don't have to overallocate. 583 */ 584 ctx->codec.h264.neighbor_info_buf = 585 dma_alloc_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE, 586 &ctx->codec.h264.neighbor_info_buf_dma, 587 GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); 588 if (!ctx->codec.h264.neighbor_info_buf) { 589 ret = -ENOMEM; 590 goto err_pic_buf; 591 } 592 593 if (ctx->src_fmt.width > 2048) { 594 /* 595 * Formulas for deblock and intra prediction buffer sizes 596 * are taken from CedarX source. 597 */ 598 599 ctx->codec.h264.deblk_buf_size = 600 ALIGN(ctx->src_fmt.width, 32) * 12; 601 ctx->codec.h264.deblk_buf = 602 dma_alloc_attrs(dev->dev, 603 ctx->codec.h264.deblk_buf_size, 604 &ctx->codec.h264.deblk_buf_dma, 605 GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); 606 if (!ctx->codec.h264.deblk_buf) { 607 ret = -ENOMEM; 608 goto err_neighbor_buf; 609 } 610 611 /* 612 * NOTE: Multiplying by two deviates from CedarX logic, but it 613 * is for some unknown reason needed for H264 4K decoding on H6. 614 */ 615 ctx->codec.h264.intra_pred_buf_size = 616 ALIGN(ctx->src_fmt.width, 64) * 5 * 2; 617 ctx->codec.h264.intra_pred_buf = 618 dma_alloc_attrs(dev->dev, 619 ctx->codec.h264.intra_pred_buf_size, 620 &ctx->codec.h264.intra_pred_buf_dma, 621 GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); 622 if (!ctx->codec.h264.intra_pred_buf) { 623 ret = -ENOMEM; 624 goto err_deblk_buf; 625 } 626 } 627 628 return 0; 629 630 err_deblk_buf: 631 dma_free_attrs(dev->dev, ctx->codec.h264.deblk_buf_size, 632 ctx->codec.h264.deblk_buf, 633 ctx->codec.h264.deblk_buf_dma, 634 DMA_ATTR_NO_KERNEL_MAPPING); 635 636 err_neighbor_buf: 637 dma_free_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE, 638 ctx->codec.h264.neighbor_info_buf, 639 ctx->codec.h264.neighbor_info_buf_dma, 640 DMA_ATTR_NO_KERNEL_MAPPING); 641 642 err_pic_buf: 643 dma_free_attrs(dev->dev, ctx->codec.h264.pic_info_buf_size, 644 ctx->codec.h264.pic_info_buf, 645 ctx->codec.h264.pic_info_buf_dma, 646 DMA_ATTR_NO_KERNEL_MAPPING); 647 return ret; 648 } 649 650 static void cedrus_h264_stop(struct cedrus_ctx *ctx) 651 { 652 struct cedrus_dev *dev = ctx->dev; 653 struct cedrus_buffer *buf; 654 struct vb2_queue *vq; 655 unsigned int i; 656 657 vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); 658 659 for (i = 0; i < vb2_get_num_buffers(vq); i++) { 660 struct vb2_buffer *vb = vb2_get_buffer(vq, i); 661 662 if (!vb) 663 continue; 664 665 buf = vb2_to_cedrus_buffer(vb); 666 667 if (buf->codec.h264.mv_col_buf_size > 0) { 668 dma_free_attrs(dev->dev, 669 buf->codec.h264.mv_col_buf_size, 670 buf->codec.h264.mv_col_buf, 671 buf->codec.h264.mv_col_buf_dma, 672 DMA_ATTR_NO_KERNEL_MAPPING); 673 674 buf->codec.h264.mv_col_buf_size = 0; 675 } 676 } 677 678 dma_free_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE, 679 ctx->codec.h264.neighbor_info_buf, 680 ctx->codec.h264.neighbor_info_buf_dma, 681 DMA_ATTR_NO_KERNEL_MAPPING); 682 dma_free_attrs(dev->dev, ctx->codec.h264.pic_info_buf_size, 683 ctx->codec.h264.pic_info_buf, 684 ctx->codec.h264.pic_info_buf_dma, 685 DMA_ATTR_NO_KERNEL_MAPPING); 686 if (ctx->codec.h264.deblk_buf_size) 687 dma_free_attrs(dev->dev, ctx->codec.h264.deblk_buf_size, 688 ctx->codec.h264.deblk_buf, 689 ctx->codec.h264.deblk_buf_dma, 690 DMA_ATTR_NO_KERNEL_MAPPING); 691 if (ctx->codec.h264.intra_pred_buf_size) 692 dma_free_attrs(dev->dev, ctx->codec.h264.intra_pred_buf_size, 693 ctx->codec.h264.intra_pred_buf, 694 ctx->codec.h264.intra_pred_buf_dma, 695 DMA_ATTR_NO_KERNEL_MAPPING); 696 } 697 698 static void cedrus_h264_trigger(struct cedrus_ctx *ctx) 699 { 700 struct cedrus_dev *dev = ctx->dev; 701 702 cedrus_write(dev, VE_H264_TRIGGER_TYPE, 703 VE_H264_TRIGGER_TYPE_AVC_SLICE_DECODE); 704 } 705 706 struct cedrus_dec_ops cedrus_dec_ops_h264 = { 707 .irq_clear = cedrus_h264_irq_clear, 708 .irq_disable = cedrus_h264_irq_disable, 709 .irq_status = cedrus_h264_irq_status, 710 .setup = cedrus_h264_setup, 711 .start = cedrus_h264_start, 712 .stop = cedrus_h264_stop, 713 .trigger = cedrus_h264_trigger, 714 }; 715