1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * NVIDIA Tegra Video decoder driver 4 * 5 * Copyright (C) 2016-2022 Dmitry Osipenko <digetx@gmail.com> 6 * 7 */ 8 9 #include <linux/iopoll.h> 10 #include <linux/pm_runtime.h> 11 #include <linux/reset.h> 12 #include <linux/slab.h> 13 14 #include <media/v4l2-h264.h> 15 16 #include "trace.h" 17 #include "vde.h" 18 19 #define FLAG_B_FRAME 0x1 20 #define FLAG_REFERENCE 0x2 21 22 struct tegra_vde_h264_decoder_ctx { 23 unsigned int dpb_frames_nb; 24 unsigned int dpb_ref_frames_with_earlier_poc_nb; 25 unsigned int baseline_profile; 26 unsigned int level_idc; 27 unsigned int log2_max_pic_order_cnt_lsb; 28 unsigned int log2_max_frame_num; 29 unsigned int pic_order_cnt_type; 30 unsigned int direct_8x8_inference_flag; 31 unsigned int pic_width_in_mbs; 32 unsigned int pic_height_in_mbs; 33 unsigned int pic_init_qp; 34 unsigned int deblocking_filter_control_present_flag; 35 unsigned int constrained_intra_pred_flag; 36 unsigned int chroma_qp_index_offset; 37 unsigned int pic_order_present_flag; 38 unsigned int num_ref_idx_l0_active_minus1; 39 unsigned int num_ref_idx_l1_active_minus1; 40 }; 41 42 struct h264_reflists { 43 struct v4l2_h264_reference p[V4L2_H264_NUM_DPB_ENTRIES]; 44 struct v4l2_h264_reference b0[V4L2_H264_NUM_DPB_ENTRIES]; 45 struct v4l2_h264_reference b1[V4L2_H264_NUM_DPB_ENTRIES]; 46 }; 47 48 static int tegra_vde_wait_mbe(struct tegra_vde *vde) 49 { 50 u32 tmp; 51 52 return readl_relaxed_poll_timeout(vde->mbe + 0x8C, tmp, 53 tmp >= 0x10, 1, 100); 54 } 55 56 static int tegra_vde_setup_mbe_frame_idx(struct tegra_vde *vde, 57 unsigned int refs_nb, 58 bool setup_refs) 59 { 60 u32 value, frame_idx_enb_mask = 0; 61 unsigned int frame_idx; 62 unsigned int idx; 63 int err; 64 65 tegra_vde_writel(vde, 0xD0000000 | (0 << 23), vde->mbe, 0x80); 66 tegra_vde_writel(vde, 0xD0200000 | (0 << 23), vde->mbe, 0x80); 67 68 err = tegra_vde_wait_mbe(vde); 69 if (err) 70 return err; 71 72 if (!setup_refs) 73 return 0; 74 75 for (idx = 0, frame_idx = 1; idx < refs_nb; idx++, frame_idx++) { 76 tegra_vde_writel(vde, 0xD0000000 | (frame_idx << 23), 77 vde->mbe, 0x80); 78 tegra_vde_writel(vde, 0xD0200000 | (frame_idx << 23), 79 vde->mbe, 0x80); 80 81 frame_idx_enb_mask |= frame_idx << (6 * (idx % 4)); 82 83 if (idx % 4 == 3 || idx == refs_nb - 1) { 84 value = 0xC0000000; 85 value |= (idx >> 2) << 24; 86 value |= frame_idx_enb_mask; 87 88 tegra_vde_writel(vde, value, vde->mbe, 0x80); 89 90 err = tegra_vde_wait_mbe(vde); 91 if (err) 92 return err; 93 94 frame_idx_enb_mask = 0; 95 } 96 } 97 98 return 0; 99 } 100 101 static void tegra_vde_mbe_set_0xa_reg(struct tegra_vde *vde, int reg, u32 val) 102 { 103 tegra_vde_writel(vde, 0xA0000000 | (reg << 24) | (val & 0xFFFF), 104 vde->mbe, 0x80); 105 tegra_vde_writel(vde, 0xA0000000 | ((reg + 1) << 24) | (val >> 16), 106 vde->mbe, 0x80); 107 } 108 109 static int tegra_vde_wait_bsev(struct tegra_vde *vde, bool wait_dma) 110 { 111 struct device *dev = vde->dev; 112 u32 value; 113 int err; 114 115 err = readl_relaxed_poll_timeout(vde->bsev + INTR_STATUS, value, 116 !(value & BIT(2)), 1, 100); 117 if (err) { 118 dev_err(dev, "BSEV unknown bit timeout\n"); 119 return err; 120 } 121 122 err = readl_relaxed_poll_timeout(vde->bsev + INTR_STATUS, value, 123 (value & BSE_ICMDQUE_EMPTY), 1, 100); 124 if (err) { 125 dev_err(dev, "BSEV ICMDQUE flush timeout\n"); 126 return err; 127 } 128 129 if (!wait_dma) 130 return 0; 131 132 err = readl_relaxed_poll_timeout(vde->bsev + INTR_STATUS, value, 133 !(value & BSE_DMA_BUSY), 1, 1000); 134 if (err) { 135 dev_err(dev, "BSEV DMA timeout\n"); 136 return err; 137 } 138 139 return 0; 140 } 141 142 static int tegra_vde_push_to_bsev_icmdqueue(struct tegra_vde *vde, 143 u32 value, bool wait_dma) 144 { 145 tegra_vde_writel(vde, value, vde->bsev, ICMDQUE_WR); 146 147 return tegra_vde_wait_bsev(vde, wait_dma); 148 } 149 150 static void tegra_vde_setup_frameid(struct tegra_vde *vde, 151 struct tegra_video_frame *frame, 152 unsigned int frameid, 153 u32 mbs_width, u32 mbs_height) 154 { 155 u32 y_addr = frame ? frame->y_addr : 0x6CDEAD00; 156 u32 cb_addr = frame ? frame->cb_addr : 0x6CDEAD00; 157 u32 cr_addr = frame ? frame->cr_addr : 0x6CDEAD00; 158 u32 value1 = frame ? ((frame->luma_atoms_pitch << 16) | mbs_height) : 0; 159 u32 value2 = frame ? ((frame->chroma_atoms_pitch << 6) | 1) : 0; 160 161 tegra_vde_writel(vde, y_addr >> 8, vde->frameid, 0x000 + frameid * 4); 162 tegra_vde_writel(vde, cb_addr >> 8, vde->frameid, 0x100 + frameid * 4); 163 tegra_vde_writel(vde, cr_addr >> 8, vde->frameid, 0x180 + frameid * 4); 164 tegra_vde_writel(vde, value1, vde->frameid, 0x080 + frameid * 4); 165 tegra_vde_writel(vde, value2, vde->frameid, 0x280 + frameid * 4); 166 } 167 168 static void tegra_setup_frameidx(struct tegra_vde *vde, 169 struct tegra_video_frame *frames, 170 unsigned int frames_nb, 171 u32 mbs_width, u32 mbs_height) 172 { 173 unsigned int idx; 174 175 for (idx = 0; idx < frames_nb; idx++) 176 tegra_vde_setup_frameid(vde, &frames[idx], idx, 177 mbs_width, mbs_height); 178 179 for (; idx < 17; idx++) 180 tegra_vde_setup_frameid(vde, NULL, idx, 0, 0); 181 } 182 183 static void tegra_vde_setup_iram_entry(struct tegra_vde *vde, 184 unsigned int table, 185 unsigned int row, 186 u32 value1, u32 value2) 187 { 188 u32 *iram_tables = vde->iram; 189 190 trace_vde_setup_iram_entry(table, row, value1, value2); 191 192 iram_tables[0x20 * table + row * 2 + 0] = value1; 193 iram_tables[0x20 * table + row * 2 + 1] = value2; 194 } 195 196 static void tegra_vde_setup_iram_tables(struct tegra_vde *vde, 197 struct tegra_video_frame *dpb_frames, 198 unsigned int ref_frames_nb, 199 unsigned int with_earlier_poc_nb) 200 { 201 struct tegra_video_frame *frame; 202 int with_later_poc_nb; 203 u32 value, aux_addr; 204 unsigned int i, k; 205 206 trace_vde_ref_l0(dpb_frames[0].frame_num); 207 208 for (i = 0; i < 16; i++) { 209 if (i < ref_frames_nb) { 210 frame = &dpb_frames[i + 1]; 211 212 aux_addr = frame->aux_addr; 213 214 value = (i + 1) << 26; 215 value |= !(frame->flags & FLAG_B_FRAME) << 25; 216 value |= 1 << 24; 217 value |= frame->frame_num; 218 } else { 219 aux_addr = 0x6ADEAD00; 220 value = 0x3f; 221 } 222 223 tegra_vde_setup_iram_entry(vde, 0, i, value, aux_addr); 224 tegra_vde_setup_iram_entry(vde, 1, i, value, aux_addr); 225 tegra_vde_setup_iram_entry(vde, 2, i, value, aux_addr); 226 tegra_vde_setup_iram_entry(vde, 3, i, value, aux_addr); 227 } 228 229 if (!(dpb_frames[0].flags & FLAG_B_FRAME)) 230 return; 231 232 if (with_earlier_poc_nb >= ref_frames_nb) 233 return; 234 235 with_later_poc_nb = ref_frames_nb - with_earlier_poc_nb; 236 237 trace_vde_ref_l1(with_later_poc_nb, with_earlier_poc_nb); 238 239 for (i = 0, k = with_earlier_poc_nb; i < with_later_poc_nb; i++, k++) { 240 frame = &dpb_frames[k + 1]; 241 242 aux_addr = frame->aux_addr; 243 244 value = (k + 1) << 26; 245 value |= !(frame->flags & FLAG_B_FRAME) << 25; 246 value |= 1 << 24; 247 value |= frame->frame_num; 248 249 tegra_vde_setup_iram_entry(vde, 2, i, value, aux_addr); 250 } 251 252 for (k = 0; i < ref_frames_nb; i++, k++) { 253 frame = &dpb_frames[k + 1]; 254 255 aux_addr = frame->aux_addr; 256 257 value = (k + 1) << 26; 258 value |= !(frame->flags & FLAG_B_FRAME) << 25; 259 value |= 1 << 24; 260 value |= frame->frame_num; 261 262 tegra_vde_setup_iram_entry(vde, 2, i, value, aux_addr); 263 } 264 } 265 266 static int tegra_vde_setup_hw_context(struct tegra_vde *vde, 267 struct tegra_vde_h264_decoder_ctx *ctx, 268 struct tegra_video_frame *dpb_frames, 269 dma_addr_t bitstream_data_addr, 270 size_t bitstream_data_size, 271 unsigned int macroblocks_nb) 272 { 273 struct device *dev = vde->dev; 274 u32 value; 275 int err; 276 277 tegra_vde_set_bits(vde, 0x000A, vde->sxe, 0xF0); 278 tegra_vde_set_bits(vde, 0x000B, vde->bsev, CMDQUE_CONTROL); 279 tegra_vde_set_bits(vde, 0x8002, vde->mbe, 0x50); 280 tegra_vde_set_bits(vde, 0x000A, vde->mbe, 0xA0); 281 tegra_vde_set_bits(vde, 0x000A, vde->ppe, 0x14); 282 tegra_vde_set_bits(vde, 0x000A, vde->ppe, 0x28); 283 tegra_vde_set_bits(vde, 0x0A00, vde->mce, 0x08); 284 tegra_vde_set_bits(vde, 0x000A, vde->tfe, 0x00); 285 tegra_vde_set_bits(vde, 0x0005, vde->vdma, 0x04); 286 287 tegra_vde_writel(vde, 0x00000000, vde->vdma, 0x1C); 288 tegra_vde_writel(vde, 0x00000000, vde->vdma, 0x00); 289 tegra_vde_writel(vde, 0x00000007, vde->vdma, 0x04); 290 tegra_vde_writel(vde, 0x00000007, vde->frameid, 0x200); 291 tegra_vde_writel(vde, 0x00000005, vde->tfe, 0x04); 292 tegra_vde_writel(vde, 0x00000000, vde->mbe, 0x84); 293 tegra_vde_writel(vde, 0x00000010, vde->sxe, 0x08); 294 tegra_vde_writel(vde, 0x00000150, vde->sxe, 0x54); 295 tegra_vde_writel(vde, 0x0000054C, vde->sxe, 0x58); 296 tegra_vde_writel(vde, 0x00000E34, vde->sxe, 0x5C); 297 tegra_vde_writel(vde, 0x063C063C, vde->mce, 0x10); 298 tegra_vde_writel(vde, 0x0003FC00, vde->bsev, INTR_STATUS); 299 tegra_vde_writel(vde, 0x0000150D, vde->bsev, BSE_CONFIG); 300 tegra_vde_writel(vde, 0x00000100, vde->bsev, BSE_INT_ENB); 301 tegra_vde_writel(vde, 0x00000000, vde->bsev, 0x98); 302 tegra_vde_writel(vde, 0x00000060, vde->bsev, 0x9C); 303 304 memset(vde->iram + 128, 0, macroblocks_nb / 2); 305 306 tegra_setup_frameidx(vde, dpb_frames, ctx->dpb_frames_nb, 307 ctx->pic_width_in_mbs, ctx->pic_height_in_mbs); 308 309 tegra_vde_setup_iram_tables(vde, dpb_frames, 310 ctx->dpb_frames_nb - 1, 311 ctx->dpb_ref_frames_with_earlier_poc_nb); 312 313 /* 314 * The IRAM mapping is write-combine, ensure that CPU buffers have 315 * been flushed at this point. 316 */ 317 wmb(); 318 319 tegra_vde_writel(vde, 0x00000000, vde->bsev, 0x8C); 320 tegra_vde_writel(vde, bitstream_data_addr + bitstream_data_size, 321 vde->bsev, 0x54); 322 323 vde->bitstream_data_addr = bitstream_data_addr; 324 325 value = ctx->pic_width_in_mbs << 11 | ctx->pic_height_in_mbs << 3; 326 327 tegra_vde_writel(vde, value, vde->bsev, 0x88); 328 329 err = tegra_vde_wait_bsev(vde, false); 330 if (err) 331 return err; 332 333 err = tegra_vde_push_to_bsev_icmdqueue(vde, 0x800003FC, false); 334 if (err) 335 return err; 336 337 value = 0x01500000; 338 value |= ((vde->iram_lists_addr + 512) >> 2) & 0xFFFF; 339 340 err = tegra_vde_push_to_bsev_icmdqueue(vde, value, true); 341 if (err) 342 return err; 343 344 err = tegra_vde_push_to_bsev_icmdqueue(vde, 0x840F054C, false); 345 if (err) 346 return err; 347 348 err = tegra_vde_push_to_bsev_icmdqueue(vde, 0x80000080, false); 349 if (err) 350 return err; 351 352 value = 0x0E340000 | ((vde->iram_lists_addr >> 2) & 0xFFFF); 353 354 err = tegra_vde_push_to_bsev_icmdqueue(vde, value, true); 355 if (err) 356 return err; 357 358 value = 0x00800005; 359 value |= ctx->pic_width_in_mbs << 11; 360 value |= ctx->pic_height_in_mbs << 3; 361 362 tegra_vde_writel(vde, value, vde->sxe, 0x10); 363 364 value = !ctx->baseline_profile << 17; 365 value |= ctx->level_idc << 13; 366 value |= ctx->log2_max_pic_order_cnt_lsb << 7; 367 value |= ctx->pic_order_cnt_type << 5; 368 value |= ctx->log2_max_frame_num; 369 370 tegra_vde_writel(vde, value, vde->sxe, 0x40); 371 372 value = ctx->pic_init_qp << 25; 373 value |= !!(ctx->deblocking_filter_control_present_flag) << 2; 374 value |= !!ctx->pic_order_present_flag; 375 376 tegra_vde_writel(vde, value, vde->sxe, 0x44); 377 378 value = ctx->chroma_qp_index_offset; 379 value |= ctx->num_ref_idx_l0_active_minus1 << 5; 380 value |= ctx->num_ref_idx_l1_active_minus1 << 10; 381 value |= !!ctx->constrained_intra_pred_flag << 15; 382 383 tegra_vde_writel(vde, value, vde->sxe, 0x48); 384 385 value = 0x0C000000; 386 value |= !!(dpb_frames[0].flags & FLAG_B_FRAME) << 24; 387 388 tegra_vde_writel(vde, value, vde->sxe, 0x4C); 389 390 value = 0x03800000; 391 value |= bitstream_data_size & GENMASK(19, 15); 392 393 tegra_vde_writel(vde, value, vde->sxe, 0x68); 394 395 tegra_vde_writel(vde, bitstream_data_addr, vde->sxe, 0x6C); 396 397 if (vde->soc->supports_ref_pic_marking) 398 tegra_vde_writel(vde, vde->secure_bo->dma_addr, vde->sxe, 0x7c); 399 400 value = 0x10000005; 401 value |= ctx->pic_width_in_mbs << 11; 402 value |= ctx->pic_height_in_mbs << 3; 403 404 tegra_vde_writel(vde, value, vde->mbe, 0x80); 405 406 value = 0x26800000; 407 value |= ctx->level_idc << 4; 408 value |= !ctx->baseline_profile << 1; 409 value |= !!ctx->direct_8x8_inference_flag; 410 411 tegra_vde_writel(vde, value, vde->mbe, 0x80); 412 413 tegra_vde_writel(vde, 0xF4000001, vde->mbe, 0x80); 414 tegra_vde_writel(vde, 0x20000000, vde->mbe, 0x80); 415 tegra_vde_writel(vde, 0xF4000101, vde->mbe, 0x80); 416 417 value = 0x20000000; 418 value |= ctx->chroma_qp_index_offset << 8; 419 420 tegra_vde_writel(vde, value, vde->mbe, 0x80); 421 422 err = tegra_vde_setup_mbe_frame_idx(vde, 423 ctx->dpb_frames_nb - 1, 424 ctx->pic_order_cnt_type == 0); 425 if (err) { 426 dev_err(dev, "MBE frames setup failed %d\n", err); 427 return err; 428 } 429 430 tegra_vde_mbe_set_0xa_reg(vde, 0, 0x000009FC); 431 tegra_vde_mbe_set_0xa_reg(vde, 2, 0x61DEAD00); 432 tegra_vde_mbe_set_0xa_reg(vde, 4, 0x62DEAD00); 433 tegra_vde_mbe_set_0xa_reg(vde, 6, 0x63DEAD00); 434 tegra_vde_mbe_set_0xa_reg(vde, 8, dpb_frames[0].aux_addr); 435 436 value = 0xFC000000; 437 value |= !!(dpb_frames[0].flags & FLAG_B_FRAME) << 2; 438 439 if (!ctx->baseline_profile) 440 value |= !!(dpb_frames[0].flags & FLAG_REFERENCE) << 1; 441 442 tegra_vde_writel(vde, value, vde->mbe, 0x80); 443 444 err = tegra_vde_wait_mbe(vde); 445 if (err) { 446 dev_err(dev, "MBE programming failed %d\n", err); 447 return err; 448 } 449 450 return 0; 451 } 452 453 static void tegra_vde_decode_frame(struct tegra_vde *vde, 454 unsigned int macroblocks_nb) 455 { 456 reinit_completion(&vde->decode_completion); 457 458 tegra_vde_writel(vde, 0x00000001, vde->bsev, 0x8C); 459 tegra_vde_writel(vde, 0x20000000 | (macroblocks_nb - 1), 460 vde->sxe, 0x00); 461 } 462 463 static int tegra_vde_validate_h264_ctx(struct device *dev, 464 struct tegra_vde_h264_decoder_ctx *ctx) 465 { 466 if (ctx->dpb_frames_nb == 0 || ctx->dpb_frames_nb > 17) { 467 dev_err(dev, "Bad DPB size %u\n", ctx->dpb_frames_nb); 468 return -EINVAL; 469 } 470 471 if (ctx->level_idc > 15) { 472 dev_err(dev, "Bad level value %u\n", ctx->level_idc); 473 return -EINVAL; 474 } 475 476 if (ctx->pic_init_qp > 52) { 477 dev_err(dev, "Bad pic_init_qp value %u\n", ctx->pic_init_qp); 478 return -EINVAL; 479 } 480 481 if (ctx->log2_max_pic_order_cnt_lsb > 16) { 482 dev_err(dev, "Bad log2_max_pic_order_cnt_lsb value %u\n", 483 ctx->log2_max_pic_order_cnt_lsb); 484 return -EINVAL; 485 } 486 487 if (ctx->log2_max_frame_num > 16) { 488 dev_err(dev, "Bad log2_max_frame_num value %u\n", 489 ctx->log2_max_frame_num); 490 return -EINVAL; 491 } 492 493 if (ctx->chroma_qp_index_offset > 31) { 494 dev_err(dev, "Bad chroma_qp_index_offset value %u\n", 495 ctx->chroma_qp_index_offset); 496 return -EINVAL; 497 } 498 499 if (ctx->pic_order_cnt_type > 2) { 500 dev_err(dev, "Bad pic_order_cnt_type value %u\n", 501 ctx->pic_order_cnt_type); 502 return -EINVAL; 503 } 504 505 if (ctx->num_ref_idx_l0_active_minus1 > 15) { 506 dev_err(dev, "Bad num_ref_idx_l0_active_minus1 value %u\n", 507 ctx->num_ref_idx_l0_active_minus1); 508 return -EINVAL; 509 } 510 511 if (ctx->num_ref_idx_l1_active_minus1 > 15) { 512 dev_err(dev, "Bad num_ref_idx_l1_active_minus1 value %u\n", 513 ctx->num_ref_idx_l1_active_minus1); 514 return -EINVAL; 515 } 516 517 if (!ctx->pic_width_in_mbs || ctx->pic_width_in_mbs > 127) { 518 dev_err(dev, "Bad pic_width_in_mbs value %u\n", 519 ctx->pic_width_in_mbs); 520 return -EINVAL; 521 } 522 523 if (!ctx->pic_height_in_mbs || ctx->pic_height_in_mbs > 127) { 524 dev_err(dev, "Bad pic_height_in_mbs value %u\n", 525 ctx->pic_height_in_mbs); 526 return -EINVAL; 527 } 528 529 return 0; 530 } 531 532 static int tegra_vde_decode_begin(struct tegra_vde *vde, 533 struct tegra_vde_h264_decoder_ctx *ctx, 534 struct tegra_video_frame *dpb_frames, 535 dma_addr_t bitstream_data_addr, 536 size_t bitstream_data_size) 537 { 538 struct device *dev = vde->dev; 539 unsigned int macroblocks_nb; 540 int err; 541 542 err = mutex_lock_interruptible(&vde->lock); 543 if (err) 544 return err; 545 546 err = pm_runtime_resume_and_get(dev); 547 if (err < 0) 548 goto unlock; 549 550 /* 551 * We rely on the VDE registers reset value, otherwise VDE 552 * causes bus lockup. 553 */ 554 err = reset_control_assert(vde->rst_mc); 555 if (err) { 556 dev_err(dev, "DEC start: Failed to assert MC reset: %d\n", 557 err); 558 goto put_runtime_pm; 559 } 560 561 err = reset_control_reset(vde->rst); 562 if (err) { 563 dev_err(dev, "DEC start: Failed to reset HW: %d\n", err); 564 goto put_runtime_pm; 565 } 566 567 err = reset_control_deassert(vde->rst_mc); 568 if (err) { 569 dev_err(dev, "DEC start: Failed to deassert MC reset: %d\n", 570 err); 571 goto put_runtime_pm; 572 } 573 574 macroblocks_nb = ctx->pic_width_in_mbs * ctx->pic_height_in_mbs; 575 576 err = tegra_vde_setup_hw_context(vde, ctx, dpb_frames, 577 bitstream_data_addr, 578 bitstream_data_size, 579 macroblocks_nb); 580 if (err) 581 goto put_runtime_pm; 582 583 tegra_vde_decode_frame(vde, macroblocks_nb); 584 585 return 0; 586 587 put_runtime_pm: 588 pm_runtime_mark_last_busy(dev); 589 pm_runtime_put_autosuspend(dev); 590 591 unlock: 592 mutex_unlock(&vde->lock); 593 594 return err; 595 } 596 597 static void tegra_vde_decode_abort(struct tegra_vde *vde) 598 { 599 struct device *dev = vde->dev; 600 int err; 601 602 /* 603 * At first reset memory client to avoid resetting VDE HW in the 604 * middle of DMA which could result into memory corruption or hang 605 * the whole system. 606 */ 607 err = reset_control_assert(vde->rst_mc); 608 if (err) 609 dev_err(dev, "DEC end: Failed to assert MC reset: %d\n", err); 610 611 err = reset_control_assert(vde->rst); 612 if (err) 613 dev_err(dev, "DEC end: Failed to assert HW reset: %d\n", err); 614 615 pm_runtime_mark_last_busy(dev); 616 pm_runtime_put_autosuspend(dev); 617 618 mutex_unlock(&vde->lock); 619 } 620 621 static int tegra_vde_decode_end(struct tegra_vde *vde) 622 { 623 unsigned int read_bytes, macroblocks_nb; 624 struct device *dev = vde->dev; 625 dma_addr_t bsev_ptr; 626 long time_left; 627 int ret; 628 629 time_left = wait_for_completion_interruptible_timeout( 630 &vde->decode_completion, msecs_to_jiffies(1000)); 631 if (time_left < 0) { 632 ret = time_left; 633 } else if (time_left == 0) { 634 bsev_ptr = tegra_vde_readl(vde, vde->bsev, 0x10); 635 macroblocks_nb = tegra_vde_readl(vde, vde->sxe, 0xC8) & 0x1FFF; 636 read_bytes = bsev_ptr ? bsev_ptr - vde->bitstream_data_addr : 0; 637 638 dev_err(dev, "Decoding failed: read 0x%X bytes, %u macroblocks parsed\n", 639 read_bytes, macroblocks_nb); 640 641 ret = -EIO; 642 } else { 643 ret = 0; 644 } 645 646 tegra_vde_decode_abort(vde); 647 648 return ret; 649 } 650 651 static struct vb2_buffer *get_ref_buf(struct tegra_ctx *ctx, 652 struct vb2_v4l2_buffer *dst, 653 unsigned int dpb_idx) 654 { 655 const struct v4l2_h264_dpb_entry *dpb = ctx->h264.decode_params->dpb; 656 struct vb2_queue *cap_q = &ctx->fh.m2m_ctx->cap_q_ctx.q; 657 struct vb2_buffer *vb = NULL; 658 659 if (dpb[dpb_idx].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE) 660 vb = vb2_find_buffer(cap_q, dpb[dpb_idx].reference_ts); 661 662 /* 663 * If a DPB entry is unused or invalid, address of current destination 664 * buffer is returned. 665 */ 666 if (!vb) 667 return &dst->vb2_buf; 668 669 return vb; 670 } 671 672 static int tegra_vde_validate_vb_size(struct tegra_ctx *ctx, 673 struct vb2_buffer *vb, 674 unsigned int plane_id, 675 size_t min_size) 676 { 677 u64 offset = vb->planes[plane_id].data_offset; 678 struct device *dev = ctx->vde->dev; 679 680 if (offset + min_size > vb2_plane_size(vb, plane_id)) { 681 dev_err(dev, "Too small plane[%u] size %lu @0x%llX, should be at least %zu\n", 682 plane_id, vb2_plane_size(vb, plane_id), offset, min_size); 683 return -EINVAL; 684 } 685 686 return 0; 687 } 688 689 static int tegra_vde_h264_setup_frame(struct tegra_ctx *ctx, 690 struct tegra_vde_h264_decoder_ctx *h264, 691 struct v4l2_h264_reflist_builder *b, 692 struct vb2_buffer *vb, 693 unsigned int ref_id, 694 unsigned int id) 695 { 696 struct v4l2_pix_format_mplane *pixfmt = &ctx->decoded_fmt.fmt.pix_mp; 697 struct tegra_m2m_buffer *tb = vb_to_tegra_buf(vb); 698 struct tegra_ctx_h264 *h = &ctx->h264; 699 struct tegra_vde *vde = ctx->vde; 700 struct device *dev = vde->dev; 701 unsigned int cstride, lstride; 702 unsigned int flags = 0; 703 size_t lsize, csize; 704 int err, frame_num; 705 706 lsize = h264->pic_width_in_mbs * 16 * h264->pic_height_in_mbs * 16; 707 csize = h264->pic_width_in_mbs * 8 * h264->pic_height_in_mbs * 8; 708 lstride = pixfmt->plane_fmt[0].bytesperline; 709 cstride = pixfmt->plane_fmt[1].bytesperline; 710 711 err = tegra_vde_validate_vb_size(ctx, vb, 0, lsize); 712 if (err) 713 return err; 714 715 err = tegra_vde_validate_vb_size(ctx, vb, 1, csize); 716 if (err) 717 return err; 718 719 err = tegra_vde_validate_vb_size(ctx, vb, 2, csize); 720 if (err) 721 return err; 722 723 if (!tb->aux || tb->aux->size < csize) { 724 dev_err(dev, "Too small aux size %zd, should be at least %zu\n", 725 tb->aux ? tb->aux->size : -1, csize); 726 return -EINVAL; 727 } 728 729 if (id == 0) { 730 frame_num = h->decode_params->frame_num; 731 732 if (h->decode_params->nal_ref_idc) 733 flags |= FLAG_REFERENCE; 734 } else { 735 frame_num = b->refs[ref_id].frame_num; 736 } 737 738 if (tb->b_frame) 739 flags |= FLAG_B_FRAME; 740 741 vde->frames[id].flags = flags; 742 vde->frames[id].y_addr = tb->dma_addr[0]; 743 vde->frames[id].cb_addr = tb->dma_addr[1]; 744 vde->frames[id].cr_addr = tb->dma_addr[2]; 745 vde->frames[id].aux_addr = tb->aux->dma_addr; 746 vde->frames[id].frame_num = frame_num & 0x7fffff; 747 vde->frames[id].luma_atoms_pitch = lstride / VDE_ATOM; 748 vde->frames[id].chroma_atoms_pitch = cstride / VDE_ATOM; 749 750 return 0; 751 } 752 753 static int tegra_vde_h264_setup_frames(struct tegra_ctx *ctx, 754 struct tegra_vde_h264_decoder_ctx *h264) 755 { 756 struct vb2_v4l2_buffer *src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); 757 struct vb2_v4l2_buffer *dst = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); 758 const struct v4l2_h264_dpb_entry *dpb = ctx->h264.decode_params->dpb; 759 struct tegra_m2m_buffer *tb = vb_to_tegra_buf(&dst->vb2_buf); 760 struct tegra_ctx_h264 *h = &ctx->h264; 761 struct v4l2_h264_reflist_builder b; 762 struct v4l2_h264_reference *dpb_id; 763 struct h264_reflists reflists; 764 struct vb2_buffer *ref; 765 unsigned int i; 766 int err; 767 768 /* 769 * Tegra hardware requires information about frame's type, assuming 770 * that frame consists of the same type slices. Userspace must tag 771 * frame's type appropriately. 772 * 773 * Decoding of a non-uniform frames isn't supported by hardware and 774 * require software preprocessing that we don't implement. Decoding 775 * is expected to fail in this case. Such video streams are rare in 776 * practice, so not a big deal. 777 * 778 * If userspace doesn't tell us frame's type, then we will try decode 779 * as-is. 780 */ 781 v4l2_m2m_buf_copy_metadata(src, dst, true); 782 783 if (h->decode_params->flags & V4L2_H264_DECODE_PARAM_FLAG_BFRAME) 784 tb->b_frame = true; 785 else 786 tb->b_frame = false; 787 788 err = tegra_vde_h264_setup_frame(ctx, h264, NULL, &dst->vb2_buf, 0, 789 h264->dpb_frames_nb++); 790 if (err) 791 return err; 792 793 if (!(h->decode_params->flags & (V4L2_H264_DECODE_PARAM_FLAG_PFRAME | 794 V4L2_H264_DECODE_PARAM_FLAG_BFRAME))) 795 return 0; 796 797 v4l2_h264_init_reflist_builder(&b, h->decode_params, h->sps, dpb); 798 799 if (h->decode_params->flags & V4L2_H264_DECODE_PARAM_FLAG_BFRAME) { 800 v4l2_h264_build_b_ref_lists(&b, reflists.b0, reflists.b1); 801 dpb_id = reflists.b0; 802 } else { 803 v4l2_h264_build_p_ref_list(&b, reflists.p); 804 dpb_id = reflists.p; 805 } 806 807 for (i = 0; i < b.num_valid; i++) { 808 int dpb_idx = dpb_id[i].index; 809 810 ref = get_ref_buf(ctx, dst, dpb_idx); 811 812 err = tegra_vde_h264_setup_frame(ctx, h264, &b, ref, dpb_idx, 813 h264->dpb_frames_nb++); 814 if (err) 815 return err; 816 817 if (b.refs[dpb_idx].top_field_order_cnt < b.cur_pic_order_count) 818 h264->dpb_ref_frames_with_earlier_poc_nb++; 819 } 820 821 return 0; 822 } 823 824 static unsigned int to_tegra_vde_h264_level_idc(unsigned int level_idc) 825 { 826 switch (level_idc) { 827 case 11: 828 return 2; 829 case 12: 830 return 3; 831 case 13: 832 return 4; 833 case 20: 834 return 5; 835 case 21: 836 return 6; 837 case 22: 838 return 7; 839 case 30: 840 return 8; 841 case 31: 842 return 9; 843 case 32: 844 return 10; 845 case 40: 846 return 11; 847 case 41: 848 return 12; 849 case 42: 850 return 13; 851 case 50: 852 return 14; 853 default: 854 break; 855 } 856 857 return 15; 858 } 859 860 static int tegra_vde_h264_setup_context(struct tegra_ctx *ctx, 861 struct tegra_vde_h264_decoder_ctx *h264) 862 { 863 struct tegra_ctx_h264 *h = &ctx->h264; 864 struct tegra_vde *vde = ctx->vde; 865 struct device *dev = vde->dev; 866 int err; 867 868 memset(h264, 0, sizeof(*h264)); 869 memset(vde->frames, 0, sizeof(vde->frames)); 870 871 tegra_vde_prepare_control_data(ctx, V4L2_CID_STATELESS_H264_DECODE_PARAMS); 872 tegra_vde_prepare_control_data(ctx, V4L2_CID_STATELESS_H264_SPS); 873 tegra_vde_prepare_control_data(ctx, V4L2_CID_STATELESS_H264_PPS); 874 875 /* CABAC unsupported by hardware, requires software preprocessing */ 876 if (h->pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE) 877 return -EOPNOTSUPP; 878 879 if (h->decode_params->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) 880 return -EOPNOTSUPP; 881 882 if (h->sps->profile_idc == 66) 883 h264->baseline_profile = 1; 884 885 if (h->sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE) 886 h264->direct_8x8_inference_flag = 1; 887 888 if (h->pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED) 889 h264->constrained_intra_pred_flag = 1; 890 891 if (h->pps->flags & V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT) 892 h264->deblocking_filter_control_present_flag = 1; 893 894 if (h->pps->flags & V4L2_H264_PPS_FLAG_BOTTOM_FIELD_PIC_ORDER_IN_FRAME_PRESENT) 895 h264->pic_order_present_flag = 1; 896 897 h264->level_idc = to_tegra_vde_h264_level_idc(h->sps->level_idc); 898 h264->log2_max_pic_order_cnt_lsb = h->sps->log2_max_pic_order_cnt_lsb_minus4 + 4; 899 h264->log2_max_frame_num = h->sps->log2_max_frame_num_minus4 + 4; 900 h264->pic_order_cnt_type = h->sps->pic_order_cnt_type; 901 h264->pic_width_in_mbs = h->sps->pic_width_in_mbs_minus1 + 1; 902 h264->pic_height_in_mbs = h->sps->pic_height_in_map_units_minus1 + 1; 903 904 h264->num_ref_idx_l0_active_minus1 = h->pps->num_ref_idx_l0_default_active_minus1; 905 h264->num_ref_idx_l1_active_minus1 = h->pps->num_ref_idx_l1_default_active_minus1; 906 h264->chroma_qp_index_offset = h->pps->chroma_qp_index_offset & 0x1f; 907 h264->pic_init_qp = h->pps->pic_init_qp_minus26 + 26; 908 909 err = tegra_vde_h264_setup_frames(ctx, h264); 910 if (err) 911 return err; 912 913 err = tegra_vde_validate_h264_ctx(dev, h264); 914 if (err) 915 return err; 916 917 return 0; 918 } 919 920 int tegra_vde_h264_decode_run(struct tegra_ctx *ctx) 921 { 922 struct vb2_v4l2_buffer *src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); 923 struct tegra_m2m_buffer *bitstream = vb_to_tegra_buf(&src->vb2_buf); 924 size_t bitstream_size = vb2_get_plane_payload(&src->vb2_buf, 0); 925 struct tegra_vde_h264_decoder_ctx h264; 926 struct tegra_vde *vde = ctx->vde; 927 int err; 928 929 err = tegra_vde_h264_setup_context(ctx, &h264); 930 if (err) 931 return err; 932 933 err = tegra_vde_decode_begin(vde, &h264, vde->frames, 934 bitstream->dma_addr[0], 935 bitstream_size); 936 if (err) 937 return err; 938 939 return 0; 940 } 941 942 int tegra_vde_h264_decode_wait(struct tegra_ctx *ctx) 943 { 944 return tegra_vde_decode_end(ctx->vde); 945 } 946