1 // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) 2 /* 3 * Wave5 series multi-standard codec IP - decoder interface 4 * 5 * Copyright (C) 2021-2023 CHIPS&MEDIA INC 6 */ 7 8 #include <linux/pm_runtime.h> 9 #include "wave5-helper.h" 10 11 #define VPU_DEC_DEV_NAME "C&M Wave5 VPU decoder" 12 #define VPU_DEC_DRV_NAME "wave5-dec" 13 14 static const struct v4l2_frmsize_stepwise dec_hevc_frmsize = { 15 .min_width = W5_MIN_DEC_PIC_8_WIDTH, 16 .max_width = W5_MAX_DEC_PIC_WIDTH, 17 .step_width = W5_DEC_CODEC_STEP_WIDTH, 18 .min_height = W5_MIN_DEC_PIC_8_HEIGHT, 19 .max_height = W5_MAX_DEC_PIC_HEIGHT, 20 .step_height = W5_DEC_CODEC_STEP_HEIGHT, 21 }; 22 23 static const struct v4l2_frmsize_stepwise dec_h264_frmsize = { 24 .min_width = W5_MIN_DEC_PIC_32_WIDTH, 25 .max_width = W5_MAX_DEC_PIC_WIDTH, 26 .step_width = W5_DEC_CODEC_STEP_WIDTH, 27 .min_height = W5_MIN_DEC_PIC_32_HEIGHT, 28 .max_height = W5_MAX_DEC_PIC_HEIGHT, 29 .step_height = W5_DEC_CODEC_STEP_HEIGHT, 30 }; 31 32 static const struct v4l2_frmsize_stepwise dec_raw_frmsize = { 33 .min_width = W5_MIN_DEC_PIC_8_WIDTH, 34 .max_width = W5_MAX_DEC_PIC_WIDTH, 35 .step_width = W5_DEC_RAW_STEP_WIDTH, 36 .min_height = W5_MIN_DEC_PIC_8_HEIGHT, 37 .max_height = W5_MAX_DEC_PIC_HEIGHT, 38 .step_height = W5_DEC_RAW_STEP_HEIGHT, 39 }; 40 41 static const struct vpu_format dec_fmt_list[FMT_TYPES][MAX_FMTS] = { 42 [VPU_FMT_TYPE_CODEC] = { 43 { 44 .v4l2_pix_fmt = V4L2_PIX_FMT_HEVC, 45 .v4l2_frmsize = &dec_hevc_frmsize, 46 }, 47 { 48 .v4l2_pix_fmt = V4L2_PIX_FMT_H264, 49 .v4l2_frmsize = &dec_h264_frmsize, 50 }, 51 }, 52 [VPU_FMT_TYPE_RAW] = { 53 { 54 .v4l2_pix_fmt = V4L2_PIX_FMT_YUV420, 55 .v4l2_frmsize = &dec_raw_frmsize, 56 }, 57 { 58 .v4l2_pix_fmt = V4L2_PIX_FMT_NV12, 59 .v4l2_frmsize = &dec_raw_frmsize, 60 }, 61 { 62 .v4l2_pix_fmt = V4L2_PIX_FMT_NV21, 63 .v4l2_frmsize = &dec_raw_frmsize, 64 }, 65 { 66 .v4l2_pix_fmt = V4L2_PIX_FMT_YUV422P, 67 .v4l2_frmsize = &dec_raw_frmsize, 68 }, 69 { 70 .v4l2_pix_fmt = V4L2_PIX_FMT_NV16, 71 .v4l2_frmsize = &dec_raw_frmsize, 72 }, 73 { 74 .v4l2_pix_fmt = V4L2_PIX_FMT_NV61, 75 .v4l2_frmsize = &dec_raw_frmsize, 76 }, 77 { 78 .v4l2_pix_fmt = V4L2_PIX_FMT_YUV420M, 79 .v4l2_frmsize = &dec_raw_frmsize, 80 }, 81 { 82 .v4l2_pix_fmt = V4L2_PIX_FMT_NV12M, 83 .v4l2_frmsize = &dec_raw_frmsize, 84 }, 85 { 86 .v4l2_pix_fmt = V4L2_PIX_FMT_NV21M, 87 .v4l2_frmsize = &dec_raw_frmsize, 88 }, 89 { 90 .v4l2_pix_fmt = V4L2_PIX_FMT_YUV422M, 91 .v4l2_frmsize = &dec_raw_frmsize, 92 }, 93 { 94 .v4l2_pix_fmt = V4L2_PIX_FMT_NV16M, 95 .v4l2_frmsize = &dec_raw_frmsize, 96 }, 97 { 98 .v4l2_pix_fmt = V4L2_PIX_FMT_NV61M, 99 .v4l2_frmsize = &dec_raw_frmsize, 100 }, 101 } 102 }; 103 104 /* 105 * Make sure that the state switch is allowed and add logging for debugging 106 * purposes 107 */ 108 static int switch_state(struct vpu_instance *inst, enum vpu_instance_state state) 109 { 110 switch (state) { 111 case VPU_INST_STATE_NONE: 112 break; 113 case VPU_INST_STATE_OPEN: 114 if (inst->state != VPU_INST_STATE_NONE) 115 goto invalid_state_switch; 116 goto valid_state_switch; 117 case VPU_INST_STATE_INIT_SEQ: 118 if (inst->state != VPU_INST_STATE_OPEN && inst->state != VPU_INST_STATE_STOP) 119 goto invalid_state_switch; 120 goto valid_state_switch; 121 case VPU_INST_STATE_PIC_RUN: 122 if (inst->state != VPU_INST_STATE_INIT_SEQ) 123 goto invalid_state_switch; 124 goto valid_state_switch; 125 case VPU_INST_STATE_STOP: 126 goto valid_state_switch; 127 } 128 invalid_state_switch: 129 WARN(1, "Invalid state switch from %s to %s.\n", 130 state_to_str(inst->state), state_to_str(state)); 131 return -EINVAL; 132 valid_state_switch: 133 dev_dbg(inst->dev->dev, "Switch state from %s to %s.\n", 134 state_to_str(inst->state), state_to_str(state)); 135 inst->state = state; 136 return 0; 137 } 138 139 static int wave5_vpu_dec_set_eos_on_firmware(struct vpu_instance *inst) 140 { 141 int ret; 142 143 ret = wave5_vpu_dec_update_bitstream_buffer(inst, 0); 144 if (ret) { 145 /* 146 * To set the EOS flag, a command is sent to the firmware. 147 * That command may never return (timeout) or may report an error. 148 */ 149 dev_err(inst->dev->dev, 150 "Setting EOS for the bitstream, fail: %d\n", ret); 151 return ret; 152 } 153 return 0; 154 } 155 156 static bool wave5_last_src_buffer_consumed(struct v4l2_m2m_ctx *m2m_ctx) 157 { 158 struct vpu_src_buffer *vpu_buf; 159 160 if (!m2m_ctx->last_src_buf) 161 return false; 162 163 vpu_buf = wave5_to_vpu_src_buf(m2m_ctx->last_src_buf); 164 return vpu_buf->consumed; 165 } 166 167 static void wave5_handle_src_buffer(struct vpu_instance *inst, dma_addr_t rd_ptr) 168 { 169 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 170 struct v4l2_m2m_buffer *buf, *n; 171 size_t consumed_bytes = 0; 172 173 if (rd_ptr >= inst->last_rd_ptr) { 174 consumed_bytes = rd_ptr - inst->last_rd_ptr; 175 } else { 176 size_t rd_offs = rd_ptr - inst->bitstream_vbuf.daddr; 177 size_t last_rd_offs = inst->last_rd_ptr - inst->bitstream_vbuf.daddr; 178 179 consumed_bytes = rd_offs + (inst->bitstream_vbuf.size - last_rd_offs); 180 } 181 182 inst->last_rd_ptr = rd_ptr; 183 consumed_bytes += inst->remaining_consumed_bytes; 184 185 dev_dbg(inst->dev->dev, "%s: %zu bytes of bitstream was consumed", __func__, 186 consumed_bytes); 187 188 v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buf, n) { 189 struct vb2_v4l2_buffer *src_buf = &buf->vb; 190 size_t src_size = vb2_get_plane_payload(&src_buf->vb2_buf, 0); 191 192 if (src_size > consumed_bytes) 193 break; 194 195 dev_dbg(inst->dev->dev, "%s: removing src buffer %i", 196 __func__, src_buf->vb2_buf.index); 197 src_buf = v4l2_m2m_src_buf_remove(m2m_ctx); 198 inst->timestamp = src_buf->vb2_buf.timestamp; 199 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE); 200 consumed_bytes -= src_size; 201 202 /* Handle the case the last bitstream buffer has been picked */ 203 if (src_buf == m2m_ctx->last_src_buf) { 204 int ret; 205 206 m2m_ctx->last_src_buf = NULL; 207 ret = wave5_vpu_dec_set_eos_on_firmware(inst); 208 if (ret) 209 dev_warn(inst->dev->dev, 210 "Setting EOS for the bitstream, fail: %d\n", ret); 211 break; 212 } 213 } 214 215 inst->remaining_consumed_bytes = consumed_bytes; 216 } 217 218 static int start_decode(struct vpu_instance *inst, u32 *fail_res) 219 { 220 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 221 int ret = 0; 222 223 ret = wave5_vpu_dec_start_one_frame(inst, fail_res); 224 if (ret) { 225 struct vb2_v4l2_buffer *src_buf; 226 227 src_buf = v4l2_m2m_src_buf_remove(m2m_ctx); 228 if (src_buf) 229 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR); 230 switch_state(inst, VPU_INST_STATE_STOP); 231 232 dev_dbg(inst->dev->dev, "%s: pic run failed / finish job", __func__); 233 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx); 234 } 235 236 return ret; 237 } 238 239 static void flag_last_buffer_done(struct vpu_instance *inst) 240 { 241 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 242 struct vb2_v4l2_buffer *vb; 243 int i; 244 245 lockdep_assert_held(&inst->state_spinlock); 246 247 vb = v4l2_m2m_dst_buf_remove(m2m_ctx); 248 if (!vb) { 249 m2m_ctx->is_draining = true; 250 m2m_ctx->next_buf_last = true; 251 return; 252 } 253 254 for (i = 0; i < vb->vb2_buf.num_planes; i++) 255 vb2_set_plane_payload(&vb->vb2_buf, i, 0); 256 vb->field = V4L2_FIELD_NONE; 257 258 v4l2_m2m_last_buffer_done(m2m_ctx, vb); 259 } 260 261 static void send_eos_event(struct vpu_instance *inst) 262 { 263 static const struct v4l2_event vpu_event_eos = { 264 .type = V4L2_EVENT_EOS 265 }; 266 267 lockdep_assert_held(&inst->state_spinlock); 268 269 v4l2_event_queue_fh(&inst->v4l2_fh, &vpu_event_eos); 270 inst->eos = false; 271 } 272 273 static int handle_dynamic_resolution_change(struct vpu_instance *inst) 274 { 275 struct v4l2_fh *fh = &inst->v4l2_fh; 276 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 277 278 static const struct v4l2_event vpu_event_src_ch = { 279 .type = V4L2_EVENT_SOURCE_CHANGE, 280 .u.src_change.changes = V4L2_EVENT_SRC_CH_RESOLUTION, 281 }; 282 struct dec_info *p_dec_info = &inst->codec_info->dec_info; 283 struct dec_initial_info *initial_info = &inst->codec_info->dec_info.initial_info; 284 285 lockdep_assert_held(&inst->state_spinlock); 286 287 dev_dbg(inst->dev->dev, "%s: rd_ptr %pad", __func__, &initial_info->rd_ptr); 288 289 dev_dbg(inst->dev->dev, "%s: width: %u height: %u profile: %u | minbuffer: %u\n", 290 __func__, initial_info->pic_width, initial_info->pic_height, 291 initial_info->profile, initial_info->min_frame_buffer_count); 292 293 inst->needs_reallocation = true; 294 inst->fbc_buf_count = initial_info->min_frame_buffer_count + 1; 295 if (inst->fbc_buf_count != v4l2_m2m_num_dst_bufs_ready(m2m_ctx)) { 296 struct v4l2_ctrl *ctrl; 297 298 ctrl = v4l2_ctrl_find(&inst->v4l2_ctrl_hdl, 299 V4L2_CID_MIN_BUFFERS_FOR_CAPTURE); 300 if (ctrl) 301 v4l2_ctrl_s_ctrl(ctrl, inst->fbc_buf_count); 302 } 303 304 if (p_dec_info->initial_info_obtained) { 305 const struct vpu_format *vpu_fmt; 306 307 inst->conf_win.left = initial_info->pic_crop_rect.left; 308 inst->conf_win.top = initial_info->pic_crop_rect.top; 309 inst->conf_win.width = initial_info->pic_width - 310 initial_info->pic_crop_rect.left - initial_info->pic_crop_rect.right; 311 inst->conf_win.height = initial_info->pic_height - 312 initial_info->pic_crop_rect.top - initial_info->pic_crop_rect.bottom; 313 314 vpu_fmt = wave5_find_vpu_fmt(inst->src_fmt.pixelformat, 315 dec_fmt_list[VPU_FMT_TYPE_CODEC]); 316 if (!vpu_fmt) 317 return -EINVAL; 318 319 wave5_update_pix_fmt(&inst->src_fmt, 320 VPU_FMT_TYPE_CODEC, 321 initial_info->pic_width, 322 initial_info->pic_height, 323 vpu_fmt->v4l2_frmsize); 324 325 vpu_fmt = wave5_find_vpu_fmt(inst->dst_fmt.pixelformat, 326 dec_fmt_list[VPU_FMT_TYPE_RAW]); 327 if (!vpu_fmt) 328 return -EINVAL; 329 330 wave5_update_pix_fmt(&inst->dst_fmt, 331 VPU_FMT_TYPE_RAW, 332 initial_info->pic_width, 333 initial_info->pic_height, 334 vpu_fmt->v4l2_frmsize); 335 } 336 337 v4l2_event_queue_fh(fh, &vpu_event_src_ch); 338 339 return 0; 340 } 341 342 static void wave5_vpu_dec_finish_decode(struct vpu_instance *inst) 343 { 344 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 345 struct dec_output_info dec_info; 346 int ret; 347 struct vb2_v4l2_buffer *dec_buf = NULL; 348 struct vb2_v4l2_buffer *disp_buf = NULL; 349 struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx); 350 struct queue_status_info q_status; 351 352 dev_dbg(inst->dev->dev, "%s: Fetch output info from firmware.", __func__); 353 354 ret = wave5_vpu_dec_get_output_info(inst, &dec_info); 355 if (ret) { 356 dev_warn(inst->dev->dev, "%s: could not get output info.", __func__); 357 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx); 358 return; 359 } 360 361 dev_dbg(inst->dev->dev, "%s: rd_ptr %pad wr_ptr %pad", __func__, &dec_info.rd_ptr, 362 &dec_info.wr_ptr); 363 wave5_handle_src_buffer(inst, dec_info.rd_ptr); 364 365 dev_dbg(inst->dev->dev, "%s: dec_info dec_idx %i disp_idx %i", __func__, 366 dec_info.index_frame_decoded, dec_info.index_frame_display); 367 368 if (!vb2_is_streaming(dst_vq)) { 369 dev_dbg(inst->dev->dev, "%s: capture is not streaming..", __func__); 370 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx); 371 return; 372 } 373 374 /* Remove decoded buffer from the ready queue now that it has been 375 * decoded. 376 */ 377 if (dec_info.index_frame_decoded >= 0) { 378 struct vb2_buffer *vb = vb2_get_buffer(dst_vq, 379 dec_info.index_frame_decoded); 380 if (vb) { 381 dec_buf = to_vb2_v4l2_buffer(vb); 382 dec_buf->vb2_buf.timestamp = inst->timestamp; 383 } else { 384 dev_warn(inst->dev->dev, "%s: invalid decoded frame index %i", 385 __func__, dec_info.index_frame_decoded); 386 } 387 } 388 389 if (dec_info.index_frame_display >= 0) { 390 disp_buf = v4l2_m2m_dst_buf_remove_by_idx(m2m_ctx, dec_info.index_frame_display); 391 if (!disp_buf) 392 dev_warn(inst->dev->dev, "%s: invalid display frame index %i", 393 __func__, dec_info.index_frame_display); 394 } 395 396 /* If there is anything to display, do that now */ 397 if (disp_buf) { 398 struct vpu_dst_buffer *dst_vpu_buf = wave5_to_vpu_dst_buf(disp_buf); 399 400 if (inst->dst_fmt.num_planes == 1) { 401 vb2_set_plane_payload(&disp_buf->vb2_buf, 0, 402 inst->dst_fmt.plane_fmt[0].sizeimage); 403 } else if (inst->dst_fmt.num_planes == 2) { 404 vb2_set_plane_payload(&disp_buf->vb2_buf, 0, 405 inst->dst_fmt.plane_fmt[0].sizeimage); 406 vb2_set_plane_payload(&disp_buf->vb2_buf, 1, 407 inst->dst_fmt.plane_fmt[1].sizeimage); 408 } else if (inst->dst_fmt.num_planes == 3) { 409 vb2_set_plane_payload(&disp_buf->vb2_buf, 0, 410 inst->dst_fmt.plane_fmt[0].sizeimage); 411 vb2_set_plane_payload(&disp_buf->vb2_buf, 1, 412 inst->dst_fmt.plane_fmt[1].sizeimage); 413 vb2_set_plane_payload(&disp_buf->vb2_buf, 2, 414 inst->dst_fmt.plane_fmt[2].sizeimage); 415 } 416 417 /* TODO implement interlace support */ 418 disp_buf->field = V4L2_FIELD_NONE; 419 dst_vpu_buf->display = true; 420 v4l2_m2m_buf_done(disp_buf, VB2_BUF_STATE_DONE); 421 422 dev_dbg(inst->dev->dev, "%s: frame_cycle %8u (payload %lu)\n", 423 __func__, dec_info.frame_cycle, 424 vb2_get_plane_payload(&disp_buf->vb2_buf, 0)); 425 } 426 427 if ((dec_info.index_frame_display == DISPLAY_IDX_FLAG_SEQ_END || 428 dec_info.sequence_changed)) { 429 unsigned long flags; 430 431 spin_lock_irqsave(&inst->state_spinlock, flags); 432 if (!v4l2_m2m_has_stopped(m2m_ctx)) { 433 switch_state(inst, VPU_INST_STATE_STOP); 434 435 if (dec_info.sequence_changed) 436 handle_dynamic_resolution_change(inst); 437 else 438 send_eos_event(inst); 439 440 flag_last_buffer_done(inst); 441 } 442 spin_unlock_irqrestore(&inst->state_spinlock, flags); 443 } 444 445 /* 446 * During a resolution change and while draining, the firmware may flush 447 * the reorder queue regardless of having a matching decoding operation 448 * pending. Only terminate the job if there are no more IRQ coming. 449 */ 450 wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status); 451 if (q_status.report_queue_count == 0 && 452 (q_status.instance_queue_count == 0 || dec_info.sequence_changed)) { 453 dev_dbg(inst->dev->dev, "%s: finishing job.\n", __func__); 454 pm_runtime_put_autosuspend(inst->dev->dev); 455 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx); 456 } 457 } 458 459 static int wave5_vpu_dec_querycap(struct file *file, void *fh, struct v4l2_capability *cap) 460 { 461 strscpy(cap->driver, VPU_DEC_DRV_NAME, sizeof(cap->driver)); 462 strscpy(cap->card, VPU_DEC_DRV_NAME, sizeof(cap->card)); 463 464 return 0; 465 } 466 467 static int wave5_vpu_dec_enum_framesizes(struct file *f, void *fh, struct v4l2_frmsizeenum *fsize) 468 { 469 const struct vpu_format *vpu_fmt; 470 471 if (fsize->index) 472 return -EINVAL; 473 474 vpu_fmt = wave5_find_vpu_fmt(fsize->pixel_format, dec_fmt_list[VPU_FMT_TYPE_CODEC]); 475 if (!vpu_fmt) { 476 vpu_fmt = wave5_find_vpu_fmt(fsize->pixel_format, dec_fmt_list[VPU_FMT_TYPE_RAW]); 477 if (!vpu_fmt) 478 return -EINVAL; 479 } 480 481 fsize->type = V4L2_FRMSIZE_TYPE_CONTINUOUS; 482 fsize->stepwise.min_width = vpu_fmt->v4l2_frmsize->min_width; 483 fsize->stepwise.max_width = vpu_fmt->v4l2_frmsize->max_width; 484 fsize->stepwise.step_width = W5_DEC_CODEC_STEP_WIDTH; 485 fsize->stepwise.min_height = vpu_fmt->v4l2_frmsize->min_height; 486 fsize->stepwise.max_height = vpu_fmt->v4l2_frmsize->max_height; 487 fsize->stepwise.step_height = W5_DEC_CODEC_STEP_HEIGHT; 488 489 return 0; 490 } 491 492 static int wave5_vpu_dec_enum_fmt_cap(struct file *file, void *fh, struct v4l2_fmtdesc *f) 493 { 494 const struct vpu_format *vpu_fmt; 495 496 vpu_fmt = wave5_find_vpu_fmt_by_idx(f->index, dec_fmt_list[VPU_FMT_TYPE_RAW]); 497 if (!vpu_fmt) 498 return -EINVAL; 499 500 f->pixelformat = vpu_fmt->v4l2_pix_fmt; 501 f->flags = 0; 502 503 return 0; 504 } 505 506 static int wave5_vpu_dec_try_fmt_cap(struct file *file, void *fh, struct v4l2_format *f) 507 { 508 struct vpu_instance *inst = file_to_vpu_inst(file); 509 struct dec_info *p_dec_info = &inst->codec_info->dec_info; 510 const struct v4l2_frmsize_stepwise *frmsize; 511 const struct vpu_format *vpu_fmt; 512 int width, height; 513 514 dev_dbg(inst->dev->dev, 515 "%s: fourcc: %u width: %u height: %u nm planes: %u colorspace: %u field: %u\n", 516 __func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height, 517 f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field); 518 519 vpu_fmt = wave5_find_vpu_fmt(f->fmt.pix_mp.pixelformat, dec_fmt_list[VPU_FMT_TYPE_RAW]); 520 if (!vpu_fmt) { 521 width = inst->dst_fmt.width; 522 height = inst->dst_fmt.height; 523 f->fmt.pix_mp.pixelformat = inst->dst_fmt.pixelformat; 524 frmsize = &dec_raw_frmsize; 525 } else { 526 width = f->fmt.pix_mp.width; 527 height = f->fmt.pix_mp.height; 528 f->fmt.pix_mp.pixelformat = vpu_fmt->v4l2_pix_fmt; 529 frmsize = vpu_fmt->v4l2_frmsize; 530 } 531 532 if (p_dec_info->initial_info_obtained) { 533 width = inst->dst_fmt.width; 534 height = inst->dst_fmt.height; 535 } 536 537 wave5_update_pix_fmt(&f->fmt.pix_mp, VPU_FMT_TYPE_RAW, 538 width, height, frmsize); 539 f->fmt.pix_mp.colorspace = inst->colorspace; 540 f->fmt.pix_mp.ycbcr_enc = inst->ycbcr_enc; 541 f->fmt.pix_mp.quantization = inst->quantization; 542 f->fmt.pix_mp.xfer_func = inst->xfer_func; 543 544 return 0; 545 } 546 547 static int wave5_vpu_dec_s_fmt_cap(struct file *file, void *fh, struct v4l2_format *f) 548 { 549 struct vpu_instance *inst = file_to_vpu_inst(file); 550 int i, ret; 551 552 dev_dbg(inst->dev->dev, 553 "%s: fourcc: %u width: %u height: %u num_planes: %u colorspace: %u field: %u\n", 554 __func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height, 555 f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field); 556 557 ret = wave5_vpu_dec_try_fmt_cap(file, fh, f); 558 if (ret) 559 return ret; 560 561 inst->dst_fmt.width = f->fmt.pix_mp.width; 562 inst->dst_fmt.height = f->fmt.pix_mp.height; 563 inst->dst_fmt.pixelformat = f->fmt.pix_mp.pixelformat; 564 inst->dst_fmt.field = f->fmt.pix_mp.field; 565 inst->dst_fmt.flags = f->fmt.pix_mp.flags; 566 inst->dst_fmt.num_planes = f->fmt.pix_mp.num_planes; 567 for (i = 0; i < inst->dst_fmt.num_planes; i++) { 568 inst->dst_fmt.plane_fmt[i].bytesperline = f->fmt.pix_mp.plane_fmt[i].bytesperline; 569 inst->dst_fmt.plane_fmt[i].sizeimage = f->fmt.pix_mp.plane_fmt[i].sizeimage; 570 } 571 572 if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV12 || 573 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV12M) { 574 inst->cbcr_interleave = true; 575 inst->nv21 = false; 576 inst->output_format = FORMAT_420; 577 } else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV21 || 578 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV21M) { 579 inst->cbcr_interleave = true; 580 inst->nv21 = true; 581 inst->output_format = FORMAT_420; 582 } else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV16 || 583 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV16M) { 584 inst->cbcr_interleave = true; 585 inst->nv21 = false; 586 inst->output_format = FORMAT_422; 587 } else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV61 || 588 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV61M) { 589 inst->cbcr_interleave = true; 590 inst->nv21 = true; 591 inst->output_format = FORMAT_422; 592 } else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_YUV422P || 593 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_YUV422M) { 594 inst->cbcr_interleave = false; 595 inst->nv21 = false; 596 inst->output_format = FORMAT_422; 597 } else { 598 inst->cbcr_interleave = false; 599 inst->nv21 = false; 600 inst->output_format = FORMAT_420; 601 } 602 603 return 0; 604 } 605 606 static int wave5_vpu_dec_g_fmt_cap(struct file *file, void *fh, struct v4l2_format *f) 607 { 608 struct vpu_instance *inst = file_to_vpu_inst(file); 609 int i; 610 611 f->fmt.pix_mp.width = inst->dst_fmt.width; 612 f->fmt.pix_mp.height = inst->dst_fmt.height; 613 f->fmt.pix_mp.pixelformat = inst->dst_fmt.pixelformat; 614 f->fmt.pix_mp.field = inst->dst_fmt.field; 615 f->fmt.pix_mp.flags = inst->dst_fmt.flags; 616 f->fmt.pix_mp.num_planes = inst->dst_fmt.num_planes; 617 for (i = 0; i < f->fmt.pix_mp.num_planes; i++) { 618 f->fmt.pix_mp.plane_fmt[i].bytesperline = inst->dst_fmt.plane_fmt[i].bytesperline; 619 f->fmt.pix_mp.plane_fmt[i].sizeimage = inst->dst_fmt.plane_fmt[i].sizeimage; 620 } 621 622 f->fmt.pix_mp.colorspace = inst->colorspace; 623 f->fmt.pix_mp.ycbcr_enc = inst->ycbcr_enc; 624 f->fmt.pix_mp.quantization = inst->quantization; 625 f->fmt.pix_mp.xfer_func = inst->xfer_func; 626 627 return 0; 628 } 629 630 static int wave5_vpu_dec_enum_fmt_out(struct file *file, void *fh, struct v4l2_fmtdesc *f) 631 { 632 struct vpu_instance *inst = file_to_vpu_inst(file); 633 const struct vpu_format *vpu_fmt; 634 635 dev_dbg(inst->dev->dev, "%s: index: %u\n", __func__, f->index); 636 637 vpu_fmt = wave5_find_vpu_fmt_by_idx(f->index, dec_fmt_list[VPU_FMT_TYPE_CODEC]); 638 if (!vpu_fmt) 639 return -EINVAL; 640 641 f->pixelformat = vpu_fmt->v4l2_pix_fmt; 642 f->flags = V4L2_FMT_FLAG_DYN_RESOLUTION | V4L2_FMT_FLAG_COMPRESSED; 643 644 return 0; 645 } 646 647 static int wave5_vpu_dec_try_fmt_out(struct file *file, void *fh, struct v4l2_format *f) 648 { 649 struct vpu_instance *inst = file_to_vpu_inst(file); 650 const struct v4l2_frmsize_stepwise *frmsize; 651 const struct vpu_format *vpu_fmt; 652 int width, height; 653 654 dev_dbg(inst->dev->dev, 655 "%s: fourcc: %u width: %u height: %u num_planes: %u colorspace: %u field: %u\n", 656 __func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height, 657 f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field); 658 659 vpu_fmt = wave5_find_vpu_fmt(f->fmt.pix_mp.pixelformat, dec_fmt_list[VPU_FMT_TYPE_CODEC]); 660 if (!vpu_fmt) { 661 width = inst->src_fmt.width; 662 height = inst->src_fmt.height; 663 f->fmt.pix_mp.pixelformat = inst->src_fmt.pixelformat; 664 frmsize = &dec_hevc_frmsize; 665 } else { 666 width = f->fmt.pix_mp.width; 667 height = f->fmt.pix_mp.height; 668 f->fmt.pix_mp.pixelformat = vpu_fmt->v4l2_pix_fmt; 669 frmsize = vpu_fmt->v4l2_frmsize; 670 } 671 672 wave5_update_pix_fmt(&f->fmt.pix_mp, VPU_FMT_TYPE_CODEC, 673 width, height, frmsize); 674 675 return 0; 676 } 677 678 static int wave5_vpu_dec_s_fmt_out(struct file *file, void *fh, struct v4l2_format *f) 679 { 680 struct vpu_instance *inst = file_to_vpu_inst(file); 681 const struct vpu_format *vpu_fmt; 682 int i, ret; 683 684 dev_dbg(inst->dev->dev, 685 "%s: fourcc: %u width: %u height: %u num_planes: %u field: %u\n", 686 __func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height, 687 f->fmt.pix_mp.num_planes, f->fmt.pix_mp.field); 688 689 ret = wave5_vpu_dec_try_fmt_out(file, fh, f); 690 if (ret) 691 return ret; 692 693 inst->std = wave5_to_vpu_std(f->fmt.pix_mp.pixelformat, inst->type); 694 if (inst->std == STD_UNKNOWN) { 695 dev_warn(inst->dev->dev, "unsupported pixelformat: %.4s\n", 696 (char *)&f->fmt.pix_mp.pixelformat); 697 return -EINVAL; 698 } 699 700 inst->src_fmt.width = f->fmt.pix_mp.width; 701 inst->src_fmt.height = f->fmt.pix_mp.height; 702 inst->src_fmt.pixelformat = f->fmt.pix_mp.pixelformat; 703 inst->src_fmt.field = f->fmt.pix_mp.field; 704 inst->src_fmt.flags = f->fmt.pix_mp.flags; 705 inst->src_fmt.num_planes = f->fmt.pix_mp.num_planes; 706 for (i = 0; i < inst->src_fmt.num_planes; i++) { 707 inst->src_fmt.plane_fmt[i].bytesperline = f->fmt.pix_mp.plane_fmt[i].bytesperline; 708 inst->src_fmt.plane_fmt[i].sizeimage = f->fmt.pix_mp.plane_fmt[i].sizeimage; 709 } 710 711 inst->colorspace = f->fmt.pix_mp.colorspace; 712 inst->ycbcr_enc = f->fmt.pix_mp.ycbcr_enc; 713 inst->quantization = f->fmt.pix_mp.quantization; 714 inst->xfer_func = f->fmt.pix_mp.xfer_func; 715 716 vpu_fmt = wave5_find_vpu_fmt(inst->dst_fmt.pixelformat, dec_fmt_list[VPU_FMT_TYPE_RAW]); 717 if (!vpu_fmt) 718 return -EINVAL; 719 720 wave5_update_pix_fmt(&inst->dst_fmt, VPU_FMT_TYPE_RAW, 721 f->fmt.pix_mp.width, f->fmt.pix_mp.height, 722 vpu_fmt->v4l2_frmsize); 723 724 return 0; 725 } 726 727 static int wave5_vpu_dec_g_selection(struct file *file, void *fh, struct v4l2_selection *s) 728 { 729 struct vpu_instance *inst = file_to_vpu_inst(file); 730 731 dev_dbg(inst->dev->dev, "%s: type: %u | target: %u\n", __func__, s->type, s->target); 732 733 if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) 734 return -EINVAL; 735 switch (s->target) { 736 case V4L2_SEL_TGT_COMPOSE_BOUNDS: 737 case V4L2_SEL_TGT_COMPOSE_PADDED: 738 s->r.left = 0; 739 s->r.top = 0; 740 s->r.width = inst->dst_fmt.width; 741 s->r.height = inst->dst_fmt.height; 742 break; 743 case V4L2_SEL_TGT_COMPOSE: 744 case V4L2_SEL_TGT_COMPOSE_DEFAULT: 745 s->r.left = 0; 746 s->r.top = 0; 747 if (inst->state > VPU_INST_STATE_OPEN) { 748 s->r = inst->conf_win; 749 } else { 750 s->r.width = inst->src_fmt.width; 751 s->r.height = inst->src_fmt.height; 752 } 753 break; 754 default: 755 return -EINVAL; 756 } 757 758 return 0; 759 } 760 761 static int wave5_vpu_dec_s_selection(struct file *file, void *fh, struct v4l2_selection *s) 762 { 763 struct vpu_instance *inst = file_to_vpu_inst(file); 764 765 if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) 766 return -EINVAL; 767 768 if (s->target != V4L2_SEL_TGT_COMPOSE) 769 return -EINVAL; 770 771 dev_dbg(inst->dev->dev, "V4L2_SEL_TGT_COMPOSE w: %u h: %u\n", 772 s->r.width, s->r.height); 773 774 s->r.left = 0; 775 s->r.top = 0; 776 s->r.width = inst->dst_fmt.width; 777 s->r.height = inst->dst_fmt.height; 778 779 return 0; 780 } 781 782 static int wave5_vpu_dec_stop(struct vpu_instance *inst) 783 { 784 int ret = 0; 785 unsigned long flags; 786 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 787 788 spin_lock_irqsave(&inst->state_spinlock, flags); 789 790 if (m2m_ctx->is_draining) { 791 ret = -EBUSY; 792 goto unlock_and_return; 793 } 794 795 if (inst->state != VPU_INST_STATE_NONE) { 796 /* 797 * Temporarily release the state_spinlock so that subsequent 798 * calls do not block on a mutex while inside this spinlock. 799 */ 800 spin_unlock_irqrestore(&inst->state_spinlock, flags); 801 ret = wave5_vpu_dec_set_eos_on_firmware(inst); 802 if (ret) 803 return ret; 804 805 spin_lock_irqsave(&inst->state_spinlock, flags); 806 /* 807 * TODO eliminate this check by using a separate check for 808 * draining triggered by a resolution change. 809 */ 810 if (m2m_ctx->is_draining) { 811 ret = -EBUSY; 812 goto unlock_and_return; 813 } 814 } 815 816 /* 817 * Used to remember the EOS state after the streamoff/on transition on 818 * the capture queue. 819 */ 820 inst->eos = true; 821 822 if (m2m_ctx->has_stopped) 823 goto unlock_and_return; 824 825 m2m_ctx->last_src_buf = v4l2_m2m_last_src_buf(m2m_ctx); 826 m2m_ctx->is_draining = true; 827 828 /* 829 * Deferred to device run in case it wasn't in the ring buffer 830 * yet. In other case, we have to send the EOS signal to the 831 * firmware so that any pending PIC_RUN ends without new 832 * bitstream buffer. 833 */ 834 if (m2m_ctx->last_src_buf) 835 goto unlock_and_return; 836 837 if (inst->state == VPU_INST_STATE_NONE) { 838 send_eos_event(inst); 839 flag_last_buffer_done(inst); 840 } 841 842 unlock_and_return: 843 spin_unlock_irqrestore(&inst->state_spinlock, flags); 844 return ret; 845 } 846 847 static int wave5_vpu_dec_start(struct vpu_instance *inst) 848 { 849 int ret = 0; 850 unsigned long flags; 851 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 852 struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx); 853 854 spin_lock_irqsave(&inst->state_spinlock, flags); 855 856 if (m2m_ctx->is_draining) { 857 ret = -EBUSY; 858 goto unlock_and_return; 859 } 860 861 if (m2m_ctx->has_stopped) 862 m2m_ctx->has_stopped = false; 863 864 vb2_clear_last_buffer_dequeued(dst_vq); 865 inst->eos = false; 866 867 unlock_and_return: 868 spin_unlock_irqrestore(&inst->state_spinlock, flags); 869 return ret; 870 } 871 872 static int wave5_vpu_dec_decoder_cmd(struct file *file, void *fh, struct v4l2_decoder_cmd *dc) 873 { 874 struct vpu_instance *inst = file_to_vpu_inst(file); 875 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 876 int ret; 877 878 dev_dbg(inst->dev->dev, "decoder command: %u\n", dc->cmd); 879 880 ret = v4l2_m2m_ioctl_try_decoder_cmd(file, fh, dc); 881 if (ret) 882 return ret; 883 884 switch (dc->cmd) { 885 case V4L2_DEC_CMD_STOP: 886 ret = wave5_vpu_dec_stop(inst); 887 /* Just in case we don't have anything to decode anymore */ 888 v4l2_m2m_try_schedule(m2m_ctx); 889 break; 890 case V4L2_DEC_CMD_START: 891 ret = wave5_vpu_dec_start(inst); 892 break; 893 default: 894 ret = -EINVAL; 895 } 896 897 return ret; 898 } 899 900 static const struct v4l2_ioctl_ops wave5_vpu_dec_ioctl_ops = { 901 .vidioc_querycap = wave5_vpu_dec_querycap, 902 .vidioc_enum_framesizes = wave5_vpu_dec_enum_framesizes, 903 904 .vidioc_enum_fmt_vid_cap = wave5_vpu_dec_enum_fmt_cap, 905 .vidioc_s_fmt_vid_cap_mplane = wave5_vpu_dec_s_fmt_cap, 906 .vidioc_g_fmt_vid_cap_mplane = wave5_vpu_dec_g_fmt_cap, 907 .vidioc_try_fmt_vid_cap_mplane = wave5_vpu_dec_try_fmt_cap, 908 909 .vidioc_enum_fmt_vid_out = wave5_vpu_dec_enum_fmt_out, 910 .vidioc_s_fmt_vid_out_mplane = wave5_vpu_dec_s_fmt_out, 911 .vidioc_g_fmt_vid_out_mplane = wave5_vpu_g_fmt_out, 912 .vidioc_try_fmt_vid_out_mplane = wave5_vpu_dec_try_fmt_out, 913 914 .vidioc_g_selection = wave5_vpu_dec_g_selection, 915 .vidioc_s_selection = wave5_vpu_dec_s_selection, 916 917 .vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs, 918 /* 919 * Firmware does not support CREATE_BUFS for CAPTURE queue. Since 920 * there is no immediate use-case for supporting CREATE_BUFS on 921 * just the OUTPUT queue, disable CREATE_BUFS altogether. 922 */ 923 .vidioc_querybuf = v4l2_m2m_ioctl_querybuf, 924 .vidioc_prepare_buf = v4l2_m2m_ioctl_prepare_buf, 925 .vidioc_qbuf = v4l2_m2m_ioctl_qbuf, 926 .vidioc_expbuf = v4l2_m2m_ioctl_expbuf, 927 .vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf, 928 .vidioc_streamon = v4l2_m2m_ioctl_streamon, 929 .vidioc_streamoff = v4l2_m2m_ioctl_streamoff, 930 931 .vidioc_try_decoder_cmd = v4l2_m2m_ioctl_try_decoder_cmd, 932 .vidioc_decoder_cmd = wave5_vpu_dec_decoder_cmd, 933 934 .vidioc_subscribe_event = wave5_vpu_subscribe_event, 935 .vidioc_unsubscribe_event = v4l2_event_unsubscribe, 936 }; 937 938 static int wave5_vpu_dec_queue_setup(struct vb2_queue *q, unsigned int *num_buffers, 939 unsigned int *num_planes, unsigned int sizes[], 940 struct device *alloc_devs[]) 941 { 942 struct vpu_instance *inst = vb2_get_drv_priv(q); 943 struct v4l2_pix_format_mplane inst_format = 944 (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) ? inst->src_fmt : inst->dst_fmt; 945 unsigned int i; 946 947 dev_dbg(inst->dev->dev, "%s: num_buffers: %u | num_planes: %u | type: %u\n", __func__, 948 *num_buffers, *num_planes, q->type); 949 950 *num_planes = inst_format.num_planes; 951 952 if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) { 953 sizes[0] = inst_format.plane_fmt[0].sizeimage; 954 dev_dbg(inst->dev->dev, "%s: size[0]: %u\n", __func__, sizes[0]); 955 } else if (q->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) { 956 if (*num_buffers < inst->fbc_buf_count) 957 *num_buffers = inst->fbc_buf_count; 958 959 for (i = 0; i < *num_planes; i++) { 960 sizes[i] = inst_format.plane_fmt[i].sizeimage; 961 dev_dbg(inst->dev->dev, "%s: size[%u]: %u\n", __func__, i, sizes[i]); 962 } 963 } 964 965 return 0; 966 } 967 968 static int wave5_prepare_fb(struct vpu_instance *inst) 969 { 970 int linear_num; 971 int non_linear_num; 972 int fb_stride = 0, fb_height = 0; 973 int luma_size, chroma_size; 974 int ret, i; 975 struct v4l2_m2m_buffer *buf, *n; 976 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 977 u32 bitdepth = inst->codec_info->dec_info.initial_info.luma_bitdepth; 978 979 switch (bitdepth) { 980 case 8: 981 break; 982 case 10: 983 if (inst->std == W_HEVC_DEC && 984 inst->dev->attr.support_hevc10bit_dec) 985 break; 986 987 fallthrough; 988 default: 989 dev_err(inst->dev->dev, "no support for %d bit depth\n", bitdepth); 990 991 return -EINVAL; 992 } 993 994 linear_num = v4l2_m2m_num_dst_bufs_ready(m2m_ctx); 995 non_linear_num = inst->fbc_buf_count; 996 997 for (i = 0; i < non_linear_num; i++) { 998 struct frame_buffer *frame = &inst->frame_buf[i]; 999 struct vpu_buf *vframe = &inst->frame_vbuf[i]; 1000 1001 fb_stride = ALIGN(inst->dst_fmt.width * bitdepth / 8, 32); 1002 fb_height = ALIGN(inst->dst_fmt.height, 32); 1003 luma_size = fb_stride * fb_height; 1004 1005 chroma_size = ALIGN(fb_stride / 2, 16) * fb_height; 1006 1007 if (vframe->size == (luma_size + chroma_size)) 1008 continue; 1009 1010 if (vframe->size) 1011 wave5_vpu_dec_reset_framebuffer(inst, i); 1012 1013 vframe->size = luma_size + chroma_size; 1014 ret = wave5_vdi_allocate_dma_memory(inst->dev, vframe); 1015 if (ret) { 1016 dev_dbg(inst->dev->dev, 1017 "%s: Allocating FBC buf of size %zu, fail: %d\n", 1018 __func__, vframe->size, ret); 1019 return ret; 1020 } 1021 1022 frame->buf_y = vframe->daddr; 1023 frame->buf_cb = vframe->daddr + luma_size; 1024 frame->buf_cr = (dma_addr_t)-1; 1025 frame->size = vframe->size; 1026 frame->width = inst->src_fmt.width; 1027 frame->stride = fb_stride; 1028 frame->map_type = COMPRESSED_FRAME_MAP; 1029 frame->update_fb_info = true; 1030 } 1031 /* In case the count has reduced, clean up leftover framebuffer memory */ 1032 for (i = non_linear_num; i < MAX_REG_FRAME; i++) { 1033 ret = wave5_vpu_dec_reset_framebuffer(inst, i); 1034 if (ret) 1035 break; 1036 } 1037 1038 for (i = 0; i < linear_num; i++) { 1039 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1040 struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx); 1041 struct vb2_buffer *vb = vb2_get_buffer(dst_vq, i); 1042 struct frame_buffer *frame = &inst->frame_buf[non_linear_num + i]; 1043 dma_addr_t buf_addr_y = 0, buf_addr_cb = 0, buf_addr_cr = 0; 1044 u32 buf_size = 0; 1045 u32 fb_stride = inst->dst_fmt.width; 1046 u32 luma_size = fb_stride * inst->dst_fmt.height; 1047 u32 chroma_size; 1048 1049 if (inst->output_format == FORMAT_422) 1050 chroma_size = fb_stride * inst->dst_fmt.height / 2; 1051 else 1052 chroma_size = fb_stride * inst->dst_fmt.height / 4; 1053 1054 if (inst->dst_fmt.num_planes == 1) { 1055 buf_size = vb2_plane_size(vb, 0); 1056 buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0); 1057 buf_addr_cb = buf_addr_y + luma_size; 1058 buf_addr_cr = buf_addr_cb + chroma_size; 1059 } else if (inst->dst_fmt.num_planes == 2) { 1060 buf_size = vb2_plane_size(vb, 0) + 1061 vb2_plane_size(vb, 1); 1062 buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0); 1063 buf_addr_cb = vb2_dma_contig_plane_dma_addr(vb, 1); 1064 buf_addr_cr = buf_addr_cb + chroma_size; 1065 } else if (inst->dst_fmt.num_planes == 3) { 1066 buf_size = vb2_plane_size(vb, 0) + 1067 vb2_plane_size(vb, 1) + 1068 vb2_plane_size(vb, 2); 1069 buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0); 1070 buf_addr_cb = vb2_dma_contig_plane_dma_addr(vb, 1); 1071 buf_addr_cr = vb2_dma_contig_plane_dma_addr(vb, 2); 1072 } 1073 1074 frame->buf_y = buf_addr_y; 1075 frame->buf_cb = buf_addr_cb; 1076 frame->buf_cr = buf_addr_cr; 1077 frame->size = buf_size; 1078 frame->width = inst->src_fmt.width; 1079 frame->stride = fb_stride; 1080 frame->map_type = LINEAR_FRAME_MAP; 1081 frame->update_fb_info = true; 1082 } 1083 1084 ret = wave5_vpu_dec_register_frame_buffer_ex(inst, non_linear_num, linear_num, 1085 fb_stride, inst->dst_fmt.height); 1086 if (ret) { 1087 dev_dbg(inst->dev->dev, "%s: vpu_dec_register_frame_buffer_ex fail: %d", 1088 __func__, ret); 1089 return ret; 1090 } 1091 1092 /* 1093 * Mark all frame buffers as out of display, to avoid using them before 1094 * the application have them queued. 1095 */ 1096 for (i = 0; i < v4l2_m2m_num_dst_bufs_ready(m2m_ctx); i++) { 1097 ret = wave5_vpu_dec_set_disp_flag(inst, i); 1098 if (ret) { 1099 dev_dbg(inst->dev->dev, 1100 "%s: Setting display flag of buf index: %u, fail: %d\n", 1101 __func__, i, ret); 1102 } 1103 } 1104 1105 v4l2_m2m_for_each_dst_buf_safe(m2m_ctx, buf, n) { 1106 struct vb2_v4l2_buffer *vbuf = &buf->vb; 1107 1108 ret = wave5_vpu_dec_clr_disp_flag(inst, vbuf->vb2_buf.index); 1109 if (ret) 1110 dev_dbg(inst->dev->dev, 1111 "%s: Clearing display flag of buf index: %u, fail: %d\n", 1112 __func__, i, ret); 1113 } 1114 1115 return 0; 1116 } 1117 1118 static int write_to_ringbuffer(struct vpu_instance *inst, void *buffer, size_t buffer_size, 1119 struct vpu_buf *ring_buffer, dma_addr_t wr_ptr) 1120 { 1121 size_t size; 1122 size_t offset = wr_ptr - ring_buffer->daddr; 1123 int ret; 1124 1125 if (wr_ptr + buffer_size > ring_buffer->daddr + ring_buffer->size) { 1126 size = ring_buffer->daddr + ring_buffer->size - wr_ptr; 1127 ret = wave5_vdi_write_memory(inst->dev, ring_buffer, offset, (u8 *)buffer, size); 1128 if (ret < 0) 1129 return ret; 1130 1131 ret = wave5_vdi_write_memory(inst->dev, ring_buffer, 0, (u8 *)buffer + size, 1132 buffer_size - size); 1133 if (ret < 0) 1134 return ret; 1135 } else { 1136 ret = wave5_vdi_write_memory(inst->dev, ring_buffer, offset, (u8 *)buffer, 1137 buffer_size); 1138 if (ret < 0) 1139 return ret; 1140 } 1141 1142 return 0; 1143 } 1144 1145 static int fill_ringbuffer(struct vpu_instance *inst) 1146 { 1147 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1148 struct v4l2_m2m_buffer *buf, *n; 1149 int ret; 1150 1151 if (m2m_ctx->last_src_buf) { 1152 struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(m2m_ctx->last_src_buf); 1153 1154 if (vpu_buf->consumed) { 1155 dev_dbg(inst->dev->dev, "last src buffer already written\n"); 1156 return 0; 1157 } 1158 } 1159 1160 v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buf, n) { 1161 struct vb2_v4l2_buffer *vbuf = &buf->vb; 1162 struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(vbuf); 1163 struct vpu_buf *ring_buffer = &inst->bitstream_vbuf; 1164 size_t src_size = vb2_get_plane_payload(&vbuf->vb2_buf, 0); 1165 void *src_buf = vb2_plane_vaddr(&vbuf->vb2_buf, 0); 1166 dma_addr_t rd_ptr = 0; 1167 dma_addr_t wr_ptr = 0; 1168 size_t remain_size = 0; 1169 1170 if (vpu_buf->consumed) { 1171 dev_dbg(inst->dev->dev, "already copied src buf (%u) to the ring buffer\n", 1172 vbuf->vb2_buf.index); 1173 continue; 1174 } 1175 1176 if (!src_buf) { 1177 dev_dbg(inst->dev->dev, 1178 "%s: Acquiring kernel pointer to src buf (%u), fail\n", 1179 __func__, vbuf->vb2_buf.index); 1180 break; 1181 } 1182 1183 ret = wave5_vpu_dec_get_bitstream_buffer(inst, &rd_ptr, &wr_ptr, &remain_size); 1184 if (ret) { 1185 /* Unable to acquire the mutex */ 1186 dev_err(inst->dev->dev, "Getting the bitstream buffer, fail: %d\n", 1187 ret); 1188 return ret; 1189 } 1190 1191 dev_dbg(inst->dev->dev, "%s: rd_ptr %pad wr_ptr %pad", __func__, &rd_ptr, &wr_ptr); 1192 1193 if (remain_size < src_size) { 1194 dev_dbg(inst->dev->dev, 1195 "%s: remaining size: %zu < source size: %zu for src buf (%u)\n", 1196 __func__, remain_size, src_size, vbuf->vb2_buf.index); 1197 break; 1198 } 1199 1200 ret = write_to_ringbuffer(inst, src_buf, src_size, ring_buffer, wr_ptr); 1201 if (ret) { 1202 dev_err(inst->dev->dev, "Write src buf (%u) to ring buffer, fail: %d\n", 1203 vbuf->vb2_buf.index, ret); 1204 return ret; 1205 } 1206 1207 ret = wave5_vpu_dec_update_bitstream_buffer(inst, src_size); 1208 if (ret) { 1209 dev_dbg(inst->dev->dev, 1210 "update_bitstream_buffer fail: %d for src buf (%u)\n", 1211 ret, vbuf->vb2_buf.index); 1212 break; 1213 } 1214 1215 vpu_buf->consumed = true; 1216 1217 /* Don't write buffers passed the last one while draining. */ 1218 if (v4l2_m2m_is_last_draining_src_buf(m2m_ctx, vbuf)) { 1219 dev_dbg(inst->dev->dev, "last src buffer written to the ring buffer\n"); 1220 break; 1221 } 1222 } 1223 1224 return 0; 1225 } 1226 1227 static void wave5_vpu_dec_buf_queue_src(struct vb2_buffer *vb) 1228 { 1229 struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue); 1230 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1231 struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); 1232 struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(vbuf); 1233 1234 vpu_buf->consumed = false; 1235 vbuf->sequence = inst->queued_src_buf_num++; 1236 1237 v4l2_m2m_buf_queue(m2m_ctx, vbuf); 1238 } 1239 1240 static void wave5_vpu_dec_buf_queue_dst(struct vb2_buffer *vb) 1241 { 1242 struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); 1243 struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue); 1244 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1245 1246 vbuf->sequence = inst->queued_dst_buf_num++; 1247 1248 if (inst->state == VPU_INST_STATE_PIC_RUN) { 1249 struct vpu_dst_buffer *vpu_buf = wave5_to_vpu_dst_buf(vbuf); 1250 int ret; 1251 1252 /* 1253 * The buffer is already registered just clear the display flag 1254 * to let the firmware know it can be used. 1255 */ 1256 vpu_buf->display = false; 1257 ret = wave5_vpu_dec_clr_disp_flag(inst, vb->index); 1258 if (ret) { 1259 dev_dbg(inst->dev->dev, 1260 "%s: Clearing the display flag of buffer index: %u, fail: %d\n", 1261 __func__, vb->index, ret); 1262 } 1263 } 1264 1265 if (vb2_is_streaming(vb->vb2_queue) && v4l2_m2m_dst_buf_is_last(m2m_ctx)) { 1266 unsigned int i; 1267 1268 for (i = 0; i < vb->num_planes; i++) 1269 vb2_set_plane_payload(vb, i, 0); 1270 1271 vbuf->field = V4L2_FIELD_NONE; 1272 1273 send_eos_event(inst); 1274 v4l2_m2m_last_buffer_done(m2m_ctx, vbuf); 1275 } else { 1276 v4l2_m2m_buf_queue(m2m_ctx, vbuf); 1277 } 1278 } 1279 1280 static void wave5_vpu_dec_buf_queue(struct vb2_buffer *vb) 1281 { 1282 struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); 1283 struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue); 1284 1285 dev_dbg(inst->dev->dev, "%s: type: %4u index: %4u size: ([0]=%4lu, [1]=%4lu, [2]=%4lu)\n", 1286 __func__, vb->type, vb->index, vb2_plane_size(&vbuf->vb2_buf, 0), 1287 vb2_plane_size(&vbuf->vb2_buf, 1), vb2_plane_size(&vbuf->vb2_buf, 2)); 1288 1289 if (vb->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) 1290 wave5_vpu_dec_buf_queue_src(vb); 1291 else if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) 1292 wave5_vpu_dec_buf_queue_dst(vb); 1293 } 1294 1295 static int wave5_vpu_dec_allocate_ring_buffer(struct vpu_instance *inst) 1296 { 1297 int ret; 1298 struct vpu_buf *ring_buffer = &inst->bitstream_vbuf; 1299 1300 ring_buffer->size = ALIGN(inst->src_fmt.plane_fmt[0].sizeimage, 1024) * 4; 1301 ret = wave5_vdi_allocate_dma_memory(inst->dev, ring_buffer); 1302 if (ret) { 1303 dev_dbg(inst->dev->dev, "%s: allocate ring buffer of size %zu fail: %d\n", 1304 __func__, ring_buffer->size, ret); 1305 return ret; 1306 } 1307 1308 inst->last_rd_ptr = ring_buffer->daddr; 1309 1310 return 0; 1311 } 1312 1313 static int wave5_vpu_dec_start_streaming(struct vb2_queue *q, unsigned int count) 1314 { 1315 struct vpu_instance *inst = vb2_get_drv_priv(q); 1316 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1317 int ret = 0; 1318 1319 dev_dbg(inst->dev->dev, "%s: type: %u\n", __func__, q->type); 1320 pm_runtime_resume_and_get(inst->dev->dev); 1321 1322 v4l2_m2m_update_start_streaming_state(m2m_ctx, q); 1323 1324 if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE && inst->state == VPU_INST_STATE_NONE) { 1325 struct dec_open_param open_param; 1326 1327 memset(&open_param, 0, sizeof(struct dec_open_param)); 1328 1329 ret = wave5_vpu_dec_allocate_ring_buffer(inst); 1330 if (ret) 1331 goto return_buffers; 1332 1333 open_param.bitstream_buffer = inst->bitstream_vbuf.daddr; 1334 open_param.bitstream_buffer_size = inst->bitstream_vbuf.size; 1335 1336 ret = wave5_vpu_dec_open(inst, &open_param); 1337 if (ret) { 1338 dev_dbg(inst->dev->dev, "%s: decoder opening, fail: %d\n", 1339 __func__, ret); 1340 goto free_bitstream_vbuf; 1341 } 1342 1343 ret = switch_state(inst, VPU_INST_STATE_OPEN); 1344 if (ret) 1345 goto free_bitstream_vbuf; 1346 } else if (q->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) { 1347 struct dec_initial_info *initial_info = 1348 &inst->codec_info->dec_info.initial_info; 1349 1350 if (inst->state == VPU_INST_STATE_STOP) 1351 ret = switch_state(inst, VPU_INST_STATE_INIT_SEQ); 1352 if (ret) 1353 goto return_buffers; 1354 1355 if (inst->state == VPU_INST_STATE_INIT_SEQ && 1356 inst->dev->product_code == WAVE521C_CODE) { 1357 if (initial_info->luma_bitdepth != 8) { 1358 dev_info(inst->dev->dev, "%s: no support for %d bit depth", 1359 __func__, initial_info->luma_bitdepth); 1360 ret = -EINVAL; 1361 goto return_buffers; 1362 } 1363 } 1364 1365 } 1366 pm_runtime_put_autosuspend(inst->dev->dev); 1367 return ret; 1368 1369 free_bitstream_vbuf: 1370 wave5_vdi_free_dma_memory(inst->dev, &inst->bitstream_vbuf); 1371 return_buffers: 1372 wave5_return_bufs(q, VB2_BUF_STATE_QUEUED); 1373 pm_runtime_put_autosuspend(inst->dev->dev); 1374 return ret; 1375 } 1376 1377 static int streamoff_output(struct vb2_queue *q) 1378 { 1379 struct vpu_instance *inst = vb2_get_drv_priv(q); 1380 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1381 struct vb2_v4l2_buffer *buf; 1382 int ret; 1383 dma_addr_t new_rd_ptr; 1384 struct dec_output_info dec_info; 1385 unsigned int i; 1386 1387 for (i = 0; i < v4l2_m2m_num_dst_bufs_ready(m2m_ctx); i++) { 1388 ret = wave5_vpu_dec_set_disp_flag(inst, i); 1389 if (ret) 1390 dev_dbg(inst->dev->dev, 1391 "%s: Setting display flag of buf index: %u, fail: %d\n", 1392 __func__, i, ret); 1393 } 1394 1395 while ((buf = v4l2_m2m_src_buf_remove(m2m_ctx))) { 1396 dev_dbg(inst->dev->dev, "%s: (Multiplanar) buf type %4u | index %4u\n", 1397 __func__, buf->vb2_buf.type, buf->vb2_buf.index); 1398 v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR); 1399 } 1400 1401 while (wave5_vpu_dec_get_output_info(inst, &dec_info) == 0) { 1402 if (dec_info.index_frame_display >= 0) 1403 wave5_vpu_dec_set_disp_flag(inst, dec_info.index_frame_display); 1404 } 1405 1406 ret = wave5_vpu_flush_instance(inst); 1407 if (ret) 1408 return ret; 1409 1410 /* Reset the ring buffer information */ 1411 new_rd_ptr = wave5_vpu_dec_get_rd_ptr(inst); 1412 inst->last_rd_ptr = new_rd_ptr; 1413 inst->codec_info->dec_info.stream_rd_ptr = new_rd_ptr; 1414 inst->codec_info->dec_info.stream_wr_ptr = new_rd_ptr; 1415 1416 if (v4l2_m2m_has_stopped(m2m_ctx)) 1417 send_eos_event(inst); 1418 1419 /* streamoff on output cancels any draining operation */ 1420 inst->eos = false; 1421 1422 return 0; 1423 } 1424 1425 static int streamoff_capture(struct vb2_queue *q) 1426 { 1427 struct vpu_instance *inst = vb2_get_drv_priv(q); 1428 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1429 struct vb2_v4l2_buffer *buf; 1430 unsigned int i; 1431 int ret = 0; 1432 1433 for (i = 0; i < v4l2_m2m_num_dst_bufs_ready(m2m_ctx); i++) { 1434 ret = wave5_vpu_dec_set_disp_flag(inst, i); 1435 if (ret) 1436 dev_dbg(inst->dev->dev, 1437 "%s: Setting display flag of buf index: %u, fail: %d\n", 1438 __func__, i, ret); 1439 } 1440 1441 while ((buf = v4l2_m2m_dst_buf_remove(m2m_ctx))) { 1442 u32 plane; 1443 1444 dev_dbg(inst->dev->dev, "%s: buf type %4u | index %4u\n", 1445 __func__, buf->vb2_buf.type, buf->vb2_buf.index); 1446 1447 for (plane = 0; plane < inst->dst_fmt.num_planes; plane++) 1448 vb2_set_plane_payload(&buf->vb2_buf, plane, 0); 1449 1450 v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR); 1451 } 1452 1453 if (inst->needs_reallocation) { 1454 wave5_vpu_dec_give_command(inst, DEC_RESET_FRAMEBUF_INFO, NULL); 1455 inst->needs_reallocation = false; 1456 } 1457 1458 if (v4l2_m2m_has_stopped(m2m_ctx)) { 1459 ret = switch_state(inst, VPU_INST_STATE_INIT_SEQ); 1460 if (ret) 1461 return ret; 1462 } 1463 1464 return 0; 1465 } 1466 1467 static void wave5_vpu_dec_stop_streaming(struct vb2_queue *q) 1468 { 1469 struct vpu_instance *inst = vb2_get_drv_priv(q); 1470 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1471 bool check_cmd = TRUE; 1472 1473 dev_dbg(inst->dev->dev, "%s: type: %u\n", __func__, q->type); 1474 pm_runtime_resume_and_get(inst->dev->dev); 1475 1476 while (check_cmd) { 1477 struct queue_status_info q_status; 1478 struct dec_output_info dec_output_info; 1479 1480 wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status); 1481 1482 if (q_status.report_queue_count == 0) 1483 break; 1484 1485 if (wave5_vpu_wait_interrupt(inst, VPU_DEC_TIMEOUT) < 0) 1486 break; 1487 1488 if (wave5_vpu_dec_get_output_info(inst, &dec_output_info)) 1489 dev_dbg(inst->dev->dev, "there is no output info\n"); 1490 } 1491 1492 v4l2_m2m_update_stop_streaming_state(m2m_ctx, q); 1493 1494 if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) 1495 streamoff_output(q); 1496 else 1497 streamoff_capture(q); 1498 1499 pm_runtime_put_autosuspend(inst->dev->dev); 1500 } 1501 1502 static const struct vb2_ops wave5_vpu_dec_vb2_ops = { 1503 .queue_setup = wave5_vpu_dec_queue_setup, 1504 .buf_queue = wave5_vpu_dec_buf_queue, 1505 .start_streaming = wave5_vpu_dec_start_streaming, 1506 .stop_streaming = wave5_vpu_dec_stop_streaming, 1507 }; 1508 1509 static void wave5_set_default_format(struct v4l2_pix_format_mplane *src_fmt, 1510 struct v4l2_pix_format_mplane *dst_fmt) 1511 { 1512 src_fmt->pixelformat = dec_fmt_list[VPU_FMT_TYPE_CODEC][0].v4l2_pix_fmt; 1513 wave5_update_pix_fmt(src_fmt, VPU_FMT_TYPE_CODEC, 1514 W5_DEF_DEC_PIC_WIDTH, W5_DEF_DEC_PIC_HEIGHT, 1515 &dec_hevc_frmsize); 1516 1517 dst_fmt->pixelformat = dec_fmt_list[VPU_FMT_TYPE_RAW][0].v4l2_pix_fmt; 1518 wave5_update_pix_fmt(dst_fmt, VPU_FMT_TYPE_RAW, 1519 W5_DEF_DEC_PIC_WIDTH, W5_DEF_DEC_PIC_HEIGHT, 1520 &dec_raw_frmsize); 1521 } 1522 1523 static int wave5_vpu_dec_queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq) 1524 { 1525 return wave5_vpu_queue_init(priv, src_vq, dst_vq, &wave5_vpu_dec_vb2_ops); 1526 } 1527 1528 static const struct vpu_instance_ops wave5_vpu_dec_inst_ops = { 1529 .finish_process = wave5_vpu_dec_finish_decode, 1530 }; 1531 1532 static int initialize_sequence(struct vpu_instance *inst) 1533 { 1534 struct dec_initial_info initial_info; 1535 int ret = 0; 1536 1537 memset(&initial_info, 0, sizeof(struct dec_initial_info)); 1538 1539 ret = wave5_vpu_dec_issue_seq_init(inst); 1540 if (ret) { 1541 dev_dbg(inst->dev->dev, "%s: wave5_vpu_dec_issue_seq_init, fail: %d\n", 1542 __func__, ret); 1543 return ret; 1544 } 1545 1546 if (wave5_vpu_wait_interrupt(inst, VPU_DEC_TIMEOUT) < 0) 1547 dev_dbg(inst->dev->dev, "%s: failed to call vpu_wait_interrupt()\n", __func__); 1548 1549 ret = wave5_vpu_dec_complete_seq_init(inst, &initial_info); 1550 if (ret) { 1551 dev_dbg(inst->dev->dev, "%s: vpu_dec_complete_seq_init, fail: %d, reason: %u\n", 1552 __func__, ret, initial_info.seq_init_err_reason); 1553 wave5_handle_src_buffer(inst, initial_info.rd_ptr); 1554 return ret; 1555 } 1556 1557 handle_dynamic_resolution_change(inst); 1558 1559 return 0; 1560 } 1561 1562 static bool wave5_is_draining_or_eos(struct vpu_instance *inst) 1563 { 1564 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1565 1566 lockdep_assert_held(&inst->state_spinlock); 1567 return m2m_ctx->is_draining || inst->eos; 1568 } 1569 1570 static void wave5_vpu_dec_device_run(void *priv) 1571 { 1572 struct vpu_instance *inst = priv; 1573 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1574 struct queue_status_info q_status; 1575 u32 fail_res = 0; 1576 int ret = 0; 1577 1578 dev_dbg(inst->dev->dev, "%s: Fill the ring buffer with new bitstream data", __func__); 1579 pm_runtime_resume_and_get(inst->dev->dev); 1580 ret = fill_ringbuffer(inst); 1581 if (ret) { 1582 dev_warn(inst->dev->dev, "Filling ring buffer failed\n"); 1583 goto finish_job_and_return; 1584 } 1585 1586 switch (inst->state) { 1587 case VPU_INST_STATE_OPEN: 1588 ret = initialize_sequence(inst); 1589 if (ret) { 1590 unsigned long flags; 1591 1592 spin_lock_irqsave(&inst->state_spinlock, flags); 1593 if (wave5_is_draining_or_eos(inst) && 1594 wave5_last_src_buffer_consumed(m2m_ctx)) { 1595 struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx); 1596 1597 switch_state(inst, VPU_INST_STATE_STOP); 1598 1599 if (vb2_is_streaming(dst_vq)) 1600 send_eos_event(inst); 1601 else 1602 handle_dynamic_resolution_change(inst); 1603 1604 flag_last_buffer_done(inst); 1605 } 1606 spin_unlock_irqrestore(&inst->state_spinlock, flags); 1607 } else { 1608 switch_state(inst, VPU_INST_STATE_INIT_SEQ); 1609 } 1610 1611 break; 1612 1613 case VPU_INST_STATE_INIT_SEQ: 1614 /* 1615 * Do this early, preparing the fb can trigger an IRQ before 1616 * we had a chance to switch, which leads to an invalid state 1617 * change. 1618 */ 1619 switch_state(inst, VPU_INST_STATE_PIC_RUN); 1620 1621 /* 1622 * During DRC, the picture decoding remains pending, so just leave the job 1623 * active until this decode operation completes. 1624 */ 1625 wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status); 1626 1627 /* 1628 * The sequence must be analyzed first to calculate the proper 1629 * size of the auxiliary buffers. 1630 */ 1631 ret = wave5_prepare_fb(inst); 1632 if (ret) { 1633 dev_warn(inst->dev->dev, "Framebuffer preparation, fail: %d\n", ret); 1634 switch_state(inst, VPU_INST_STATE_STOP); 1635 break; 1636 } 1637 1638 if (q_status.instance_queue_count) { 1639 dev_dbg(inst->dev->dev, "%s: leave with active job", __func__); 1640 return; 1641 } 1642 1643 fallthrough; 1644 case VPU_INST_STATE_PIC_RUN: 1645 ret = start_decode(inst, &fail_res); 1646 if (ret) { 1647 dev_err(inst->dev->dev, 1648 "Frame decoding on m2m context (%p), fail: %d (result: %d)\n", 1649 m2m_ctx, ret, fail_res); 1650 break; 1651 } 1652 /* Return so that we leave this job active */ 1653 dev_dbg(inst->dev->dev, "%s: leave with active job", __func__); 1654 return; 1655 default: 1656 WARN(1, "Execution of a job in state %s illegal.\n", state_to_str(inst->state)); 1657 break; 1658 } 1659 1660 finish_job_and_return: 1661 dev_dbg(inst->dev->dev, "%s: leave and finish job", __func__); 1662 pm_runtime_put_autosuspend(inst->dev->dev); 1663 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx); 1664 } 1665 1666 static void wave5_vpu_dec_job_abort(void *priv) 1667 { 1668 struct vpu_instance *inst = priv; 1669 int ret; 1670 1671 ret = switch_state(inst, VPU_INST_STATE_STOP); 1672 if (ret) 1673 return; 1674 1675 ret = wave5_vpu_dec_set_eos_on_firmware(inst); 1676 if (ret) 1677 dev_warn(inst->dev->dev, 1678 "Setting EOS for the bitstream, fail: %d\n", ret); 1679 } 1680 1681 static int wave5_vpu_dec_job_ready(void *priv) 1682 { 1683 struct vpu_instance *inst = priv; 1684 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1685 unsigned long flags; 1686 int ret = 0; 1687 1688 spin_lock_irqsave(&inst->state_spinlock, flags); 1689 1690 switch (inst->state) { 1691 case VPU_INST_STATE_NONE: 1692 dev_dbg(inst->dev->dev, "Decoder must be open to start queueing M2M jobs!\n"); 1693 break; 1694 case VPU_INST_STATE_OPEN: 1695 if (wave5_is_draining_or_eos(inst) || !v4l2_m2m_has_stopped(m2m_ctx) || 1696 v4l2_m2m_num_src_bufs_ready(m2m_ctx) > 0) { 1697 ret = 1; 1698 break; 1699 } 1700 1701 dev_dbg(inst->dev->dev, 1702 "Decoder must be draining or >= 1 OUTPUT queue buffer must be queued!\n"); 1703 break; 1704 case VPU_INST_STATE_INIT_SEQ: 1705 case VPU_INST_STATE_PIC_RUN: 1706 if (!m2m_ctx->cap_q_ctx.q.streaming) { 1707 dev_dbg(inst->dev->dev, "CAPTURE queue must be streaming to queue jobs!\n"); 1708 break; 1709 } else if (v4l2_m2m_num_dst_bufs_ready(m2m_ctx) < (inst->fbc_buf_count - 1)) { 1710 dev_dbg(inst->dev->dev, 1711 "No capture buffer ready to decode!\n"); 1712 break; 1713 } else if (!wave5_is_draining_or_eos(inst) && 1714 !v4l2_m2m_num_src_bufs_ready(m2m_ctx)) { 1715 dev_dbg(inst->dev->dev, 1716 "No bitstream data to decode!\n"); 1717 break; 1718 } 1719 ret = 1; 1720 break; 1721 case VPU_INST_STATE_STOP: 1722 dev_dbg(inst->dev->dev, "Decoder is stopped, not running.\n"); 1723 break; 1724 } 1725 1726 spin_unlock_irqrestore(&inst->state_spinlock, flags); 1727 1728 return ret; 1729 } 1730 1731 static const struct v4l2_m2m_ops wave5_vpu_dec_m2m_ops = { 1732 .device_run = wave5_vpu_dec_device_run, 1733 .job_abort = wave5_vpu_dec_job_abort, 1734 .job_ready = wave5_vpu_dec_job_ready, 1735 }; 1736 1737 static int wave5_vpu_open_dec(struct file *filp) 1738 { 1739 struct video_device *vdev = video_devdata(filp); 1740 struct vpu_device *dev = video_drvdata(filp); 1741 struct vpu_instance *inst = NULL; 1742 struct v4l2_m2m_ctx *m2m_ctx; 1743 int ret = 0; 1744 1745 inst = kzalloc(sizeof(*inst), GFP_KERNEL); 1746 if (!inst) 1747 return -ENOMEM; 1748 1749 inst->dev = dev; 1750 inst->type = VPU_INST_TYPE_DEC; 1751 inst->ops = &wave5_vpu_dec_inst_ops; 1752 1753 spin_lock_init(&inst->state_spinlock); 1754 1755 inst->codec_info = kzalloc(sizeof(*inst->codec_info), GFP_KERNEL); 1756 if (!inst->codec_info) 1757 return -ENOMEM; 1758 1759 v4l2_fh_init(&inst->v4l2_fh, vdev); 1760 v4l2_fh_add(&inst->v4l2_fh, filp); 1761 1762 INIT_LIST_HEAD(&inst->list); 1763 1764 inst->v4l2_m2m_dev = inst->dev->v4l2_m2m_dec_dev; 1765 inst->v4l2_fh.m2m_ctx = 1766 v4l2_m2m_ctx_init(inst->v4l2_m2m_dev, inst, wave5_vpu_dec_queue_init); 1767 if (IS_ERR(inst->v4l2_fh.m2m_ctx)) { 1768 ret = PTR_ERR(inst->v4l2_fh.m2m_ctx); 1769 goto cleanup_inst; 1770 } 1771 m2m_ctx = inst->v4l2_fh.m2m_ctx; 1772 1773 v4l2_m2m_set_src_buffered(m2m_ctx, true); 1774 v4l2_m2m_set_dst_buffered(m2m_ctx, true); 1775 /* 1776 * We use the M2M job queue to ensure synchronization of steps where 1777 * needed, as IOCTLs can occur at anytime and we need to run commands on 1778 * the firmware in a specified order. 1779 * In order to initialize the sequence on the firmware within an M2M 1780 * job, the M2M framework needs to be able to queue jobs before 1781 * the CAPTURE queue has been started, because we need the results of the 1782 * initialization to properly prepare the CAPTURE queue with the correct 1783 * amount of buffers. 1784 * By setting ignore_cap_streaming to true the m2m framework will call 1785 * job_ready as soon as the OUTPUT queue is streaming, instead of 1786 * waiting until both the CAPTURE and OUTPUT queues are streaming. 1787 */ 1788 m2m_ctx->ignore_cap_streaming = true; 1789 1790 v4l2_ctrl_handler_init(&inst->v4l2_ctrl_hdl, 10); 1791 v4l2_ctrl_new_std(&inst->v4l2_ctrl_hdl, NULL, 1792 V4L2_CID_MIN_BUFFERS_FOR_CAPTURE, 1, 32, 1, 1); 1793 1794 if (inst->v4l2_ctrl_hdl.error) { 1795 ret = -ENODEV; 1796 goto cleanup_inst; 1797 } 1798 1799 inst->v4l2_fh.ctrl_handler = &inst->v4l2_ctrl_hdl; 1800 v4l2_ctrl_handler_setup(&inst->v4l2_ctrl_hdl); 1801 1802 wave5_set_default_format(&inst->src_fmt, &inst->dst_fmt); 1803 inst->colorspace = V4L2_COLORSPACE_REC709; 1804 inst->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT; 1805 inst->quantization = V4L2_QUANTIZATION_DEFAULT; 1806 inst->xfer_func = V4L2_XFER_FUNC_DEFAULT; 1807 1808 init_completion(&inst->irq_done); 1809 1810 inst->id = ida_alloc(&inst->dev->inst_ida, GFP_KERNEL); 1811 if (inst->id < 0) { 1812 dev_warn(inst->dev->dev, "Allocating instance ID, fail: %d\n", inst->id); 1813 ret = inst->id; 1814 goto cleanup_inst; 1815 } 1816 1817 /* 1818 * For Wave515 SRAM memory was already allocated 1819 * at wave5_vpu_dec_register_device() 1820 */ 1821 if (inst->dev->product_code != WAVE515_CODE) 1822 wave5_vdi_allocate_sram(inst->dev); 1823 1824 ret = mutex_lock_interruptible(&dev->dev_lock); 1825 if (ret) 1826 goto cleanup_inst; 1827 1828 if (list_empty(&dev->instances)) 1829 pm_runtime_use_autosuspend(inst->dev->dev); 1830 1831 list_add_tail(&inst->list, &dev->instances); 1832 1833 mutex_unlock(&dev->dev_lock); 1834 1835 return 0; 1836 1837 cleanup_inst: 1838 wave5_cleanup_instance(inst, filp); 1839 return ret; 1840 } 1841 1842 static int wave5_vpu_dec_release(struct file *filp) 1843 { 1844 return wave5_vpu_release_device(filp, wave5_vpu_dec_close, "decoder"); 1845 } 1846 1847 static const struct v4l2_file_operations wave5_vpu_dec_fops = { 1848 .owner = THIS_MODULE, 1849 .open = wave5_vpu_open_dec, 1850 .release = wave5_vpu_dec_release, 1851 .unlocked_ioctl = video_ioctl2, 1852 .poll = v4l2_m2m_fop_poll, 1853 .mmap = v4l2_m2m_fop_mmap, 1854 }; 1855 1856 int wave5_vpu_dec_register_device(struct vpu_device *dev) 1857 { 1858 struct video_device *vdev_dec; 1859 int ret; 1860 1861 /* 1862 * Secondary AXI setup for Wave515 is done by INIT_VPU command, 1863 * i.e. wave5_vpu_init(), that's why we allocate SRAM memory early. 1864 */ 1865 if (dev->product_code == WAVE515_CODE) 1866 wave5_vdi_allocate_sram(dev); 1867 1868 vdev_dec = devm_kzalloc(dev->v4l2_dev.dev, sizeof(*vdev_dec), GFP_KERNEL); 1869 if (!vdev_dec) 1870 return -ENOMEM; 1871 1872 dev->v4l2_m2m_dec_dev = v4l2_m2m_init(&wave5_vpu_dec_m2m_ops); 1873 if (IS_ERR(dev->v4l2_m2m_dec_dev)) { 1874 ret = PTR_ERR(dev->v4l2_m2m_dec_dev); 1875 dev_err(dev->dev, "v4l2_m2m_init, fail: %d\n", ret); 1876 return -EINVAL; 1877 } 1878 1879 dev->video_dev_dec = vdev_dec; 1880 1881 strscpy(vdev_dec->name, VPU_DEC_DEV_NAME, sizeof(vdev_dec->name)); 1882 vdev_dec->fops = &wave5_vpu_dec_fops; 1883 vdev_dec->ioctl_ops = &wave5_vpu_dec_ioctl_ops; 1884 vdev_dec->release = video_device_release_empty; 1885 vdev_dec->v4l2_dev = &dev->v4l2_dev; 1886 vdev_dec->vfl_dir = VFL_DIR_M2M; 1887 vdev_dec->device_caps = V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_STREAMING; 1888 vdev_dec->lock = &dev->dev_lock; 1889 1890 ret = video_register_device(vdev_dec, VFL_TYPE_VIDEO, -1); 1891 if (ret) 1892 return ret; 1893 1894 video_set_drvdata(vdev_dec, dev); 1895 1896 return 0; 1897 } 1898 1899 void wave5_vpu_dec_unregister_device(struct vpu_device *dev) 1900 { 1901 /* 1902 * Here is a freeing pair for Wave515 SRAM memory allocation 1903 * happened at wave5_vpu_dec_register_device(). 1904 */ 1905 if (dev->product_code == WAVE515_CODE) 1906 wave5_vdi_free_sram(dev); 1907 1908 video_unregister_device(dev->video_dev_dec); 1909 if (dev->v4l2_m2m_dec_dev) 1910 v4l2_m2m_release(dev->v4l2_m2m_dec_dev); 1911 } 1912