1 // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) 2 /* 3 * Wave5 series multi-standard codec IP - decoder interface 4 * 5 * Copyright (C) 2021-2023 CHIPS&MEDIA INC 6 */ 7 8 #include <linux/pm_runtime.h> 9 #include "wave5-helper.h" 10 11 #define VPU_DEC_DEV_NAME "C&M Wave5 VPU decoder" 12 #define VPU_DEC_DRV_NAME "wave5-dec" 13 14 static const struct v4l2_frmsize_stepwise dec_hevc_frmsize = { 15 .min_width = W5_MIN_DEC_PIC_8_WIDTH, 16 .max_width = W5_MAX_DEC_PIC_WIDTH, 17 .step_width = W5_DEC_CODEC_STEP_WIDTH, 18 .min_height = W5_MIN_DEC_PIC_8_HEIGHT, 19 .max_height = W5_MAX_DEC_PIC_HEIGHT, 20 .step_height = W5_DEC_CODEC_STEP_HEIGHT, 21 }; 22 23 static const struct v4l2_frmsize_stepwise dec_h264_frmsize = { 24 .min_width = W5_MIN_DEC_PIC_32_WIDTH, 25 .max_width = W5_MAX_DEC_PIC_WIDTH, 26 .step_width = W5_DEC_CODEC_STEP_WIDTH, 27 .min_height = W5_MIN_DEC_PIC_32_HEIGHT, 28 .max_height = W5_MAX_DEC_PIC_HEIGHT, 29 .step_height = W5_DEC_CODEC_STEP_HEIGHT, 30 }; 31 32 static const struct v4l2_frmsize_stepwise dec_raw_frmsize = { 33 .min_width = W5_MIN_DEC_PIC_8_WIDTH, 34 .max_width = W5_MAX_DEC_PIC_WIDTH, 35 .step_width = W5_DEC_RAW_STEP_WIDTH, 36 .min_height = W5_MIN_DEC_PIC_8_HEIGHT, 37 .max_height = W5_MAX_DEC_PIC_HEIGHT, 38 .step_height = W5_DEC_RAW_STEP_HEIGHT, 39 }; 40 41 static const struct vpu_format dec_fmt_list[FMT_TYPES][MAX_FMTS] = { 42 [VPU_FMT_TYPE_CODEC] = { 43 { 44 .v4l2_pix_fmt = V4L2_PIX_FMT_HEVC, 45 .v4l2_frmsize = &dec_hevc_frmsize, 46 }, 47 { 48 .v4l2_pix_fmt = V4L2_PIX_FMT_H264, 49 .v4l2_frmsize = &dec_h264_frmsize, 50 }, 51 }, 52 [VPU_FMT_TYPE_RAW] = { 53 { 54 .v4l2_pix_fmt = V4L2_PIX_FMT_YUV420, 55 .v4l2_frmsize = &dec_raw_frmsize, 56 }, 57 { 58 .v4l2_pix_fmt = V4L2_PIX_FMT_NV12, 59 .v4l2_frmsize = &dec_raw_frmsize, 60 }, 61 { 62 .v4l2_pix_fmt = V4L2_PIX_FMT_NV21, 63 .v4l2_frmsize = &dec_raw_frmsize, 64 }, 65 { 66 .v4l2_pix_fmt = V4L2_PIX_FMT_YUV422P, 67 .v4l2_frmsize = &dec_raw_frmsize, 68 }, 69 { 70 .v4l2_pix_fmt = V4L2_PIX_FMT_NV16, 71 .v4l2_frmsize = &dec_raw_frmsize, 72 }, 73 { 74 .v4l2_pix_fmt = V4L2_PIX_FMT_NV61, 75 .v4l2_frmsize = &dec_raw_frmsize, 76 }, 77 { 78 .v4l2_pix_fmt = V4L2_PIX_FMT_YUV420M, 79 .v4l2_frmsize = &dec_raw_frmsize, 80 }, 81 { 82 .v4l2_pix_fmt = V4L2_PIX_FMT_NV12M, 83 .v4l2_frmsize = &dec_raw_frmsize, 84 }, 85 { 86 .v4l2_pix_fmt = V4L2_PIX_FMT_NV21M, 87 .v4l2_frmsize = &dec_raw_frmsize, 88 }, 89 { 90 .v4l2_pix_fmt = V4L2_PIX_FMT_YUV422M, 91 .v4l2_frmsize = &dec_raw_frmsize, 92 }, 93 { 94 .v4l2_pix_fmt = V4L2_PIX_FMT_NV16M, 95 .v4l2_frmsize = &dec_raw_frmsize, 96 }, 97 { 98 .v4l2_pix_fmt = V4L2_PIX_FMT_NV61M, 99 .v4l2_frmsize = &dec_raw_frmsize, 100 }, 101 } 102 }; 103 104 /* 105 * Make sure that the state switch is allowed and add logging for debugging 106 * purposes 107 */ 108 static int switch_state(struct vpu_instance *inst, enum vpu_instance_state state) 109 { 110 switch (state) { 111 case VPU_INST_STATE_NONE: 112 break; 113 case VPU_INST_STATE_OPEN: 114 if (inst->state != VPU_INST_STATE_NONE) 115 goto invalid_state_switch; 116 goto valid_state_switch; 117 case VPU_INST_STATE_INIT_SEQ: 118 if (inst->state != VPU_INST_STATE_OPEN && inst->state != VPU_INST_STATE_STOP) 119 goto invalid_state_switch; 120 goto valid_state_switch; 121 case VPU_INST_STATE_PIC_RUN: 122 if (inst->state != VPU_INST_STATE_INIT_SEQ) 123 goto invalid_state_switch; 124 goto valid_state_switch; 125 case VPU_INST_STATE_STOP: 126 goto valid_state_switch; 127 } 128 invalid_state_switch: 129 WARN(1, "Invalid state switch from %s to %s.\n", 130 state_to_str(inst->state), state_to_str(state)); 131 return -EINVAL; 132 valid_state_switch: 133 dev_dbg(inst->dev->dev, "Switch state from %s to %s.\n", 134 state_to_str(inst->state), state_to_str(state)); 135 inst->state = state; 136 return 0; 137 } 138 139 static int set_instance_state(struct vpu_instance *inst, enum vpu_instance_state state) 140 { 141 unsigned long flags; 142 int ret; 143 144 spin_lock_irqsave(&inst->state_spinlock, flags); 145 ret = switch_state(inst, state); 146 spin_unlock_irqrestore(&inst->state_spinlock, flags); 147 148 return ret; 149 } 150 151 static int wave5_vpu_dec_set_eos_on_firmware(struct vpu_instance *inst) 152 { 153 int ret; 154 155 ret = wave5_vpu_dec_update_bitstream_buffer(inst, 0); 156 if (ret) { 157 /* 158 * To set the EOS flag, a command is sent to the firmware. 159 * That command may never return (timeout) or may report an error. 160 */ 161 dev_err(inst->dev->dev, 162 "Setting EOS for the bitstream, fail: %d\n", ret); 163 return ret; 164 } 165 return 0; 166 } 167 168 static bool wave5_last_src_buffer_consumed(struct v4l2_m2m_ctx *m2m_ctx) 169 { 170 struct vpu_src_buffer *vpu_buf; 171 172 if (!m2m_ctx->last_src_buf) 173 return false; 174 175 vpu_buf = wave5_to_vpu_src_buf(m2m_ctx->last_src_buf); 176 return vpu_buf->consumed; 177 } 178 179 static void wave5_handle_src_buffer(struct vpu_instance *inst, dma_addr_t rd_ptr) 180 { 181 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 182 struct v4l2_m2m_buffer *buf, *n; 183 size_t consumed_bytes = 0; 184 185 if (rd_ptr >= inst->last_rd_ptr) { 186 consumed_bytes = rd_ptr - inst->last_rd_ptr; 187 } else { 188 size_t rd_offs = rd_ptr - inst->bitstream_vbuf.daddr; 189 size_t last_rd_offs = inst->last_rd_ptr - inst->bitstream_vbuf.daddr; 190 191 consumed_bytes = rd_offs + (inst->bitstream_vbuf.size - last_rd_offs); 192 } 193 194 inst->last_rd_ptr = rd_ptr; 195 consumed_bytes += inst->remaining_consumed_bytes; 196 197 dev_dbg(inst->dev->dev, "%s: %zu bytes of bitstream was consumed", __func__, 198 consumed_bytes); 199 200 v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buf, n) { 201 struct vb2_v4l2_buffer *src_buf = &buf->vb; 202 size_t src_size = vb2_get_plane_payload(&src_buf->vb2_buf, 0); 203 204 if (src_size > consumed_bytes) 205 break; 206 207 dev_dbg(inst->dev->dev, "%s: removing src buffer %i", 208 __func__, src_buf->vb2_buf.index); 209 src_buf = v4l2_m2m_src_buf_remove(m2m_ctx); 210 inst->timestamp = src_buf->vb2_buf.timestamp; 211 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE); 212 consumed_bytes -= src_size; 213 214 /* Handle the case the last bitstream buffer has been picked */ 215 if (src_buf == m2m_ctx->last_src_buf) { 216 int ret; 217 218 m2m_ctx->last_src_buf = NULL; 219 ret = wave5_vpu_dec_set_eos_on_firmware(inst); 220 if (ret) 221 dev_warn(inst->dev->dev, 222 "Setting EOS for the bitstream, fail: %d\n", ret); 223 break; 224 } 225 } 226 227 inst->remaining_consumed_bytes = consumed_bytes; 228 } 229 230 static int start_decode(struct vpu_instance *inst, u32 *fail_res) 231 { 232 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 233 int ret = 0; 234 235 ret = wave5_vpu_dec_start_one_frame(inst, fail_res); 236 if (ret) { 237 struct vb2_v4l2_buffer *src_buf; 238 239 src_buf = v4l2_m2m_src_buf_remove(m2m_ctx); 240 if (src_buf) 241 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR); 242 set_instance_state(inst, VPU_INST_STATE_STOP); 243 244 dev_dbg(inst->dev->dev, "%s: pic run failed / finish job", __func__); 245 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx); 246 } 247 248 return ret; 249 } 250 251 static void flag_last_buffer_done(struct vpu_instance *inst) 252 { 253 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 254 struct vb2_v4l2_buffer *vb; 255 int i; 256 257 lockdep_assert_held(&inst->state_spinlock); 258 259 vb = v4l2_m2m_dst_buf_remove(m2m_ctx); 260 if (!vb) { 261 m2m_ctx->is_draining = true; 262 m2m_ctx->next_buf_last = true; 263 return; 264 } 265 266 for (i = 0; i < vb->vb2_buf.num_planes; i++) 267 vb2_set_plane_payload(&vb->vb2_buf, i, 0); 268 vb->field = V4L2_FIELD_NONE; 269 270 v4l2_m2m_last_buffer_done(m2m_ctx, vb); 271 } 272 273 static void send_eos_event(struct vpu_instance *inst) 274 { 275 static const struct v4l2_event vpu_event_eos = { 276 .type = V4L2_EVENT_EOS 277 }; 278 279 lockdep_assert_held(&inst->state_spinlock); 280 281 v4l2_event_queue_fh(&inst->v4l2_fh, &vpu_event_eos); 282 inst->eos = false; 283 inst->sent_eos = true; 284 } 285 286 static int handle_dynamic_resolution_change(struct vpu_instance *inst) 287 { 288 struct v4l2_fh *fh = &inst->v4l2_fh; 289 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 290 291 static const struct v4l2_event vpu_event_src_ch = { 292 .type = V4L2_EVENT_SOURCE_CHANGE, 293 .u.src_change.changes = V4L2_EVENT_SRC_CH_RESOLUTION, 294 }; 295 struct dec_info *p_dec_info = &inst->codec_info->dec_info; 296 struct dec_initial_info *initial_info = &inst->codec_info->dec_info.initial_info; 297 298 lockdep_assert_held(&inst->state_spinlock); 299 300 dev_dbg(inst->dev->dev, "%s: rd_ptr %pad", __func__, &initial_info->rd_ptr); 301 302 dev_dbg(inst->dev->dev, "%s: width: %u height: %u profile: %u | minbuffer: %u\n", 303 __func__, initial_info->pic_width, initial_info->pic_height, 304 initial_info->profile, initial_info->min_frame_buffer_count); 305 306 inst->needs_reallocation = true; 307 inst->fbc_buf_count = initial_info->min_frame_buffer_count + 1; 308 if (inst->fbc_buf_count != v4l2_m2m_num_dst_bufs_ready(m2m_ctx)) { 309 struct v4l2_ctrl *ctrl; 310 311 ctrl = v4l2_ctrl_find(&inst->v4l2_ctrl_hdl, 312 V4L2_CID_MIN_BUFFERS_FOR_CAPTURE); 313 if (ctrl) 314 v4l2_ctrl_s_ctrl(ctrl, inst->fbc_buf_count); 315 } 316 317 if (p_dec_info->initial_info_obtained) { 318 const struct vpu_format *vpu_fmt; 319 320 inst->conf_win.left = initial_info->pic_crop_rect.left; 321 inst->conf_win.top = initial_info->pic_crop_rect.top; 322 inst->conf_win.width = initial_info->pic_width - 323 initial_info->pic_crop_rect.left - initial_info->pic_crop_rect.right; 324 inst->conf_win.height = initial_info->pic_height - 325 initial_info->pic_crop_rect.top - initial_info->pic_crop_rect.bottom; 326 327 vpu_fmt = wave5_find_vpu_fmt(inst->src_fmt.pixelformat, 328 dec_fmt_list[VPU_FMT_TYPE_CODEC]); 329 if (!vpu_fmt) 330 return -EINVAL; 331 332 wave5_update_pix_fmt(&inst->src_fmt, 333 VPU_FMT_TYPE_CODEC, 334 initial_info->pic_width, 335 initial_info->pic_height, 336 vpu_fmt->v4l2_frmsize); 337 338 vpu_fmt = wave5_find_vpu_fmt(inst->dst_fmt.pixelformat, 339 dec_fmt_list[VPU_FMT_TYPE_RAW]); 340 if (!vpu_fmt) 341 return -EINVAL; 342 343 wave5_update_pix_fmt(&inst->dst_fmt, 344 VPU_FMT_TYPE_RAW, 345 initial_info->pic_width, 346 initial_info->pic_height, 347 vpu_fmt->v4l2_frmsize); 348 } 349 350 v4l2_event_queue_fh(fh, &vpu_event_src_ch); 351 352 return 0; 353 } 354 355 static void wave5_vpu_dec_finish_decode(struct vpu_instance *inst) 356 { 357 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 358 struct dec_output_info dec_info; 359 int ret; 360 struct vb2_v4l2_buffer *dec_buf = NULL; 361 struct vb2_v4l2_buffer *disp_buf = NULL; 362 struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx); 363 364 dev_dbg(inst->dev->dev, "%s: Fetch output info from firmware.", __func__); 365 366 ret = wave5_vpu_dec_get_output_info(inst, &dec_info); 367 if (ret) { 368 dev_dbg(inst->dev->dev, "%s: could not get output info.", __func__); 369 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx); 370 return; 371 } 372 373 dev_dbg(inst->dev->dev, "%s: rd_ptr %pad wr_ptr %pad", __func__, &dec_info.rd_ptr, 374 &dec_info.wr_ptr); 375 wave5_handle_src_buffer(inst, dec_info.rd_ptr); 376 377 dev_dbg(inst->dev->dev, "%s: dec_info dec_idx %i disp_idx %i", __func__, 378 dec_info.index_frame_decoded, dec_info.index_frame_display); 379 380 if (!vb2_is_streaming(dst_vq)) { 381 dev_dbg(inst->dev->dev, "%s: capture is not streaming..", __func__); 382 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx); 383 return; 384 } 385 386 /* Remove decoded buffer from the ready queue now that it has been 387 * decoded. 388 */ 389 if (dec_info.index_frame_decoded >= 0) { 390 struct vb2_buffer *vb = vb2_get_buffer(dst_vq, 391 dec_info.index_frame_decoded); 392 if (vb) { 393 dec_buf = to_vb2_v4l2_buffer(vb); 394 dec_buf->vb2_buf.timestamp = inst->timestamp; 395 } else { 396 dev_warn(inst->dev->dev, "%s: invalid decoded frame index %i", 397 __func__, dec_info.index_frame_decoded); 398 } 399 } 400 401 if (dec_info.index_frame_display >= 0) { 402 disp_buf = v4l2_m2m_dst_buf_remove_by_idx(m2m_ctx, dec_info.index_frame_display); 403 if (!disp_buf) 404 dev_warn(inst->dev->dev, "%s: invalid display frame index %i", 405 __func__, dec_info.index_frame_display); 406 } 407 408 /* If there is anything to display, do that now */ 409 if (disp_buf) { 410 struct vpu_dst_buffer *dst_vpu_buf = wave5_to_vpu_dst_buf(disp_buf); 411 412 if (inst->dst_fmt.num_planes == 1) { 413 vb2_set_plane_payload(&disp_buf->vb2_buf, 0, 414 inst->dst_fmt.plane_fmt[0].sizeimage); 415 } else if (inst->dst_fmt.num_planes == 2) { 416 vb2_set_plane_payload(&disp_buf->vb2_buf, 0, 417 inst->dst_fmt.plane_fmt[0].sizeimage); 418 vb2_set_plane_payload(&disp_buf->vb2_buf, 1, 419 inst->dst_fmt.plane_fmt[1].sizeimage); 420 } else if (inst->dst_fmt.num_planes == 3) { 421 vb2_set_plane_payload(&disp_buf->vb2_buf, 0, 422 inst->dst_fmt.plane_fmt[0].sizeimage); 423 vb2_set_plane_payload(&disp_buf->vb2_buf, 1, 424 inst->dst_fmt.plane_fmt[1].sizeimage); 425 vb2_set_plane_payload(&disp_buf->vb2_buf, 2, 426 inst->dst_fmt.plane_fmt[2].sizeimage); 427 } 428 429 /* TODO implement interlace support */ 430 disp_buf->field = V4L2_FIELD_NONE; 431 dst_vpu_buf->display = true; 432 v4l2_m2m_buf_done(disp_buf, VB2_BUF_STATE_DONE); 433 434 dev_dbg(inst->dev->dev, "%s: frame_cycle %8u (payload %lu)\n", 435 __func__, dec_info.frame_cycle, 436 vb2_get_plane_payload(&disp_buf->vb2_buf, 0)); 437 } 438 439 if ((dec_info.index_frame_display == DISPLAY_IDX_FLAG_SEQ_END || 440 dec_info.sequence_changed)) { 441 unsigned long flags; 442 443 spin_lock_irqsave(&inst->state_spinlock, flags); 444 if (!v4l2_m2m_has_stopped(m2m_ctx)) { 445 switch_state(inst, VPU_INST_STATE_STOP); 446 447 if (dec_info.sequence_changed) 448 handle_dynamic_resolution_change(inst); 449 else 450 send_eos_event(inst); 451 452 flag_last_buffer_done(inst); 453 } 454 spin_unlock_irqrestore(&inst->state_spinlock, flags); 455 } 456 457 if (inst->sent_eos && 458 v4l2_m2m_get_curr_priv(inst->v4l2_m2m_dev)) { 459 struct queue_status_info q_status; 460 461 wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status); 462 if (q_status.report_queue_count == 0 && 463 q_status.instance_queue_count == 0) 464 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx); 465 } 466 467 inst->queuing_fail = false; 468 } 469 470 static int wave5_vpu_dec_querycap(struct file *file, void *fh, struct v4l2_capability *cap) 471 { 472 strscpy(cap->driver, VPU_DEC_DRV_NAME, sizeof(cap->driver)); 473 strscpy(cap->card, VPU_DEC_DRV_NAME, sizeof(cap->card)); 474 475 return 0; 476 } 477 478 static int wave5_vpu_dec_enum_framesizes(struct file *f, void *fh, struct v4l2_frmsizeenum *fsize) 479 { 480 const struct vpu_format *vpu_fmt; 481 482 if (fsize->index) 483 return -EINVAL; 484 485 vpu_fmt = wave5_find_vpu_fmt(fsize->pixel_format, dec_fmt_list[VPU_FMT_TYPE_CODEC]); 486 if (!vpu_fmt) { 487 vpu_fmt = wave5_find_vpu_fmt(fsize->pixel_format, dec_fmt_list[VPU_FMT_TYPE_RAW]); 488 if (!vpu_fmt) 489 return -EINVAL; 490 } 491 492 fsize->type = V4L2_FRMSIZE_TYPE_CONTINUOUS; 493 fsize->stepwise.min_width = vpu_fmt->v4l2_frmsize->min_width; 494 fsize->stepwise.max_width = vpu_fmt->v4l2_frmsize->max_width; 495 fsize->stepwise.step_width = W5_DEC_CODEC_STEP_WIDTH; 496 fsize->stepwise.min_height = vpu_fmt->v4l2_frmsize->min_height; 497 fsize->stepwise.max_height = vpu_fmt->v4l2_frmsize->max_height; 498 fsize->stepwise.step_height = W5_DEC_CODEC_STEP_HEIGHT; 499 500 return 0; 501 } 502 503 static int wave5_vpu_dec_enum_fmt_cap(struct file *file, void *fh, struct v4l2_fmtdesc *f) 504 { 505 const struct vpu_format *vpu_fmt; 506 507 vpu_fmt = wave5_find_vpu_fmt_by_idx(f->index, dec_fmt_list[VPU_FMT_TYPE_RAW]); 508 if (!vpu_fmt) 509 return -EINVAL; 510 511 f->pixelformat = vpu_fmt->v4l2_pix_fmt; 512 f->flags = 0; 513 514 return 0; 515 } 516 517 static int wave5_vpu_dec_try_fmt_cap(struct file *file, void *fh, struct v4l2_format *f) 518 { 519 struct vpu_instance *inst = file_to_vpu_inst(file); 520 struct dec_info *p_dec_info = &inst->codec_info->dec_info; 521 const struct v4l2_frmsize_stepwise *frmsize; 522 const struct vpu_format *vpu_fmt; 523 int width, height; 524 525 dev_dbg(inst->dev->dev, 526 "%s: fourcc: %u width: %u height: %u nm planes: %u colorspace: %u field: %u\n", 527 __func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height, 528 f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field); 529 530 vpu_fmt = wave5_find_vpu_fmt(f->fmt.pix_mp.pixelformat, dec_fmt_list[VPU_FMT_TYPE_RAW]); 531 if (!vpu_fmt) { 532 width = inst->dst_fmt.width; 533 height = inst->dst_fmt.height; 534 f->fmt.pix_mp.pixelformat = inst->dst_fmt.pixelformat; 535 frmsize = &dec_raw_frmsize; 536 } else { 537 width = f->fmt.pix_mp.width; 538 height = f->fmt.pix_mp.height; 539 f->fmt.pix_mp.pixelformat = vpu_fmt->v4l2_pix_fmt; 540 frmsize = vpu_fmt->v4l2_frmsize; 541 } 542 543 if (p_dec_info->initial_info_obtained) { 544 width = inst->dst_fmt.width; 545 height = inst->dst_fmt.height; 546 } 547 548 wave5_update_pix_fmt(&f->fmt.pix_mp, VPU_FMT_TYPE_RAW, 549 width, height, frmsize); 550 f->fmt.pix_mp.colorspace = inst->colorspace; 551 f->fmt.pix_mp.ycbcr_enc = inst->ycbcr_enc; 552 f->fmt.pix_mp.quantization = inst->quantization; 553 f->fmt.pix_mp.xfer_func = inst->xfer_func; 554 555 return 0; 556 } 557 558 static int wave5_vpu_dec_s_fmt_cap(struct file *file, void *fh, struct v4l2_format *f) 559 { 560 struct vpu_instance *inst = file_to_vpu_inst(file); 561 int i, ret; 562 563 dev_dbg(inst->dev->dev, 564 "%s: fourcc: %u width: %u height: %u num_planes: %u colorspace: %u field: %u\n", 565 __func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height, 566 f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field); 567 568 ret = wave5_vpu_dec_try_fmt_cap(file, fh, f); 569 if (ret) 570 return ret; 571 572 inst->dst_fmt.width = f->fmt.pix_mp.width; 573 inst->dst_fmt.height = f->fmt.pix_mp.height; 574 inst->dst_fmt.pixelformat = f->fmt.pix_mp.pixelformat; 575 inst->dst_fmt.field = f->fmt.pix_mp.field; 576 inst->dst_fmt.flags = f->fmt.pix_mp.flags; 577 inst->dst_fmt.num_planes = f->fmt.pix_mp.num_planes; 578 for (i = 0; i < inst->dst_fmt.num_planes; i++) { 579 inst->dst_fmt.plane_fmt[i].bytesperline = f->fmt.pix_mp.plane_fmt[i].bytesperline; 580 inst->dst_fmt.plane_fmt[i].sizeimage = f->fmt.pix_mp.plane_fmt[i].sizeimage; 581 } 582 583 if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV12 || 584 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV12M) { 585 inst->cbcr_interleave = true; 586 inst->nv21 = false; 587 inst->output_format = FORMAT_420; 588 } else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV21 || 589 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV21M) { 590 inst->cbcr_interleave = true; 591 inst->nv21 = true; 592 inst->output_format = FORMAT_420; 593 } else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV16 || 594 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV16M) { 595 inst->cbcr_interleave = true; 596 inst->nv21 = false; 597 inst->output_format = FORMAT_422; 598 } else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV61 || 599 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV61M) { 600 inst->cbcr_interleave = true; 601 inst->nv21 = true; 602 inst->output_format = FORMAT_422; 603 } else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_YUV422P || 604 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_YUV422M) { 605 inst->cbcr_interleave = false; 606 inst->nv21 = false; 607 inst->output_format = FORMAT_422; 608 } else { 609 inst->cbcr_interleave = false; 610 inst->nv21 = false; 611 inst->output_format = FORMAT_420; 612 } 613 614 return 0; 615 } 616 617 static int wave5_vpu_dec_g_fmt_cap(struct file *file, void *fh, struct v4l2_format *f) 618 { 619 struct vpu_instance *inst = file_to_vpu_inst(file); 620 int i; 621 622 f->fmt.pix_mp.width = inst->dst_fmt.width; 623 f->fmt.pix_mp.height = inst->dst_fmt.height; 624 f->fmt.pix_mp.pixelformat = inst->dst_fmt.pixelformat; 625 f->fmt.pix_mp.field = inst->dst_fmt.field; 626 f->fmt.pix_mp.flags = inst->dst_fmt.flags; 627 f->fmt.pix_mp.num_planes = inst->dst_fmt.num_planes; 628 for (i = 0; i < f->fmt.pix_mp.num_planes; i++) { 629 f->fmt.pix_mp.plane_fmt[i].bytesperline = inst->dst_fmt.plane_fmt[i].bytesperline; 630 f->fmt.pix_mp.plane_fmt[i].sizeimage = inst->dst_fmt.plane_fmt[i].sizeimage; 631 } 632 633 f->fmt.pix_mp.colorspace = inst->colorspace; 634 f->fmt.pix_mp.ycbcr_enc = inst->ycbcr_enc; 635 f->fmt.pix_mp.quantization = inst->quantization; 636 f->fmt.pix_mp.xfer_func = inst->xfer_func; 637 638 return 0; 639 } 640 641 static int wave5_vpu_dec_enum_fmt_out(struct file *file, void *fh, struct v4l2_fmtdesc *f) 642 { 643 struct vpu_instance *inst = file_to_vpu_inst(file); 644 const struct vpu_format *vpu_fmt; 645 646 dev_dbg(inst->dev->dev, "%s: index: %u\n", __func__, f->index); 647 648 vpu_fmt = wave5_find_vpu_fmt_by_idx(f->index, dec_fmt_list[VPU_FMT_TYPE_CODEC]); 649 if (!vpu_fmt) 650 return -EINVAL; 651 652 f->pixelformat = vpu_fmt->v4l2_pix_fmt; 653 f->flags = V4L2_FMT_FLAG_DYN_RESOLUTION | V4L2_FMT_FLAG_COMPRESSED; 654 655 return 0; 656 } 657 658 static int wave5_vpu_dec_try_fmt_out(struct file *file, void *fh, struct v4l2_format *f) 659 { 660 struct vpu_instance *inst = file_to_vpu_inst(file); 661 const struct v4l2_frmsize_stepwise *frmsize; 662 const struct vpu_format *vpu_fmt; 663 int width, height; 664 665 dev_dbg(inst->dev->dev, 666 "%s: fourcc: %u width: %u height: %u num_planes: %u colorspace: %u field: %u\n", 667 __func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height, 668 f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field); 669 670 vpu_fmt = wave5_find_vpu_fmt(f->fmt.pix_mp.pixelformat, dec_fmt_list[VPU_FMT_TYPE_CODEC]); 671 if (!vpu_fmt) { 672 width = inst->src_fmt.width; 673 height = inst->src_fmt.height; 674 f->fmt.pix_mp.pixelformat = inst->src_fmt.pixelformat; 675 frmsize = &dec_hevc_frmsize; 676 } else { 677 width = f->fmt.pix_mp.width; 678 height = f->fmt.pix_mp.height; 679 f->fmt.pix_mp.pixelformat = vpu_fmt->v4l2_pix_fmt; 680 frmsize = vpu_fmt->v4l2_frmsize; 681 } 682 683 wave5_update_pix_fmt(&f->fmt.pix_mp, VPU_FMT_TYPE_CODEC, 684 width, height, frmsize); 685 686 return 0; 687 } 688 689 static int wave5_vpu_dec_s_fmt_out(struct file *file, void *fh, struct v4l2_format *f) 690 { 691 struct vpu_instance *inst = file_to_vpu_inst(file); 692 const struct vpu_format *vpu_fmt; 693 int i, ret; 694 695 dev_dbg(inst->dev->dev, 696 "%s: fourcc: %u width: %u height: %u num_planes: %u field: %u\n", 697 __func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height, 698 f->fmt.pix_mp.num_planes, f->fmt.pix_mp.field); 699 700 ret = wave5_vpu_dec_try_fmt_out(file, fh, f); 701 if (ret) 702 return ret; 703 704 inst->std = wave5_to_vpu_std(f->fmt.pix_mp.pixelformat, inst->type); 705 if (inst->std == STD_UNKNOWN) { 706 dev_warn(inst->dev->dev, "unsupported pixelformat: %.4s\n", 707 (char *)&f->fmt.pix_mp.pixelformat); 708 return -EINVAL; 709 } 710 711 inst->src_fmt.width = f->fmt.pix_mp.width; 712 inst->src_fmt.height = f->fmt.pix_mp.height; 713 inst->src_fmt.pixelformat = f->fmt.pix_mp.pixelformat; 714 inst->src_fmt.field = f->fmt.pix_mp.field; 715 inst->src_fmt.flags = f->fmt.pix_mp.flags; 716 inst->src_fmt.num_planes = f->fmt.pix_mp.num_planes; 717 for (i = 0; i < inst->src_fmt.num_planes; i++) { 718 inst->src_fmt.plane_fmt[i].bytesperline = f->fmt.pix_mp.plane_fmt[i].bytesperline; 719 inst->src_fmt.plane_fmt[i].sizeimage = f->fmt.pix_mp.plane_fmt[i].sizeimage; 720 } 721 722 inst->colorspace = f->fmt.pix_mp.colorspace; 723 inst->ycbcr_enc = f->fmt.pix_mp.ycbcr_enc; 724 inst->quantization = f->fmt.pix_mp.quantization; 725 inst->xfer_func = f->fmt.pix_mp.xfer_func; 726 727 vpu_fmt = wave5_find_vpu_fmt(inst->dst_fmt.pixelformat, dec_fmt_list[VPU_FMT_TYPE_RAW]); 728 if (!vpu_fmt) 729 return -EINVAL; 730 731 wave5_update_pix_fmt(&inst->dst_fmt, VPU_FMT_TYPE_RAW, 732 f->fmt.pix_mp.width, f->fmt.pix_mp.height, 733 vpu_fmt->v4l2_frmsize); 734 735 return 0; 736 } 737 738 static int wave5_vpu_dec_g_selection(struct file *file, void *fh, struct v4l2_selection *s) 739 { 740 struct vpu_instance *inst = file_to_vpu_inst(file); 741 742 dev_dbg(inst->dev->dev, "%s: type: %u | target: %u\n", __func__, s->type, s->target); 743 744 if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) 745 return -EINVAL; 746 switch (s->target) { 747 case V4L2_SEL_TGT_COMPOSE_BOUNDS: 748 case V4L2_SEL_TGT_COMPOSE_PADDED: 749 s->r.left = 0; 750 s->r.top = 0; 751 s->r.width = inst->dst_fmt.width; 752 s->r.height = inst->dst_fmt.height; 753 break; 754 case V4L2_SEL_TGT_COMPOSE: 755 case V4L2_SEL_TGT_COMPOSE_DEFAULT: 756 s->r.left = 0; 757 s->r.top = 0; 758 if (inst->state > VPU_INST_STATE_OPEN) { 759 s->r = inst->conf_win; 760 } else { 761 s->r.width = inst->src_fmt.width; 762 s->r.height = inst->src_fmt.height; 763 } 764 break; 765 default: 766 return -EINVAL; 767 } 768 769 return 0; 770 } 771 772 static int wave5_vpu_dec_s_selection(struct file *file, void *fh, struct v4l2_selection *s) 773 { 774 struct vpu_instance *inst = file_to_vpu_inst(file); 775 776 if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) 777 return -EINVAL; 778 779 if (s->target != V4L2_SEL_TGT_COMPOSE) 780 return -EINVAL; 781 782 dev_dbg(inst->dev->dev, "V4L2_SEL_TGT_COMPOSE w: %u h: %u\n", 783 s->r.width, s->r.height); 784 785 s->r.left = 0; 786 s->r.top = 0; 787 s->r.width = inst->dst_fmt.width; 788 s->r.height = inst->dst_fmt.height; 789 790 return 0; 791 } 792 793 static int wave5_vpu_dec_stop(struct vpu_instance *inst) 794 { 795 int ret = 0; 796 unsigned long flags; 797 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 798 799 spin_lock_irqsave(&inst->state_spinlock, flags); 800 801 if (m2m_ctx->is_draining) { 802 ret = -EBUSY; 803 goto unlock_and_return; 804 } 805 806 if (inst->state != VPU_INST_STATE_NONE) { 807 /* 808 * Temporarily release the state_spinlock so that subsequent 809 * calls do not block on a mutex while inside this spinlock. 810 */ 811 spin_unlock_irqrestore(&inst->state_spinlock, flags); 812 ret = wave5_vpu_dec_set_eos_on_firmware(inst); 813 if (ret) 814 return ret; 815 816 spin_lock_irqsave(&inst->state_spinlock, flags); 817 /* 818 * TODO eliminate this check by using a separate check for 819 * draining triggered by a resolution change. 820 */ 821 if (m2m_ctx->is_draining) { 822 ret = -EBUSY; 823 goto unlock_and_return; 824 } 825 } 826 827 /* 828 * Used to remember the EOS state after the streamoff/on transition on 829 * the capture queue. 830 */ 831 inst->eos = true; 832 833 if (m2m_ctx->has_stopped) 834 goto unlock_and_return; 835 836 m2m_ctx->last_src_buf = v4l2_m2m_last_src_buf(m2m_ctx); 837 m2m_ctx->is_draining = true; 838 839 /* 840 * Deferred to device run in case it wasn't in the ring buffer 841 * yet. In other case, we have to send the EOS signal to the 842 * firmware so that any pending PIC_RUN ends without new 843 * bitstream buffer. 844 */ 845 if (m2m_ctx->last_src_buf) 846 goto unlock_and_return; 847 848 if (inst->state == VPU_INST_STATE_NONE) { 849 send_eos_event(inst); 850 flag_last_buffer_done(inst); 851 } 852 853 unlock_and_return: 854 spin_unlock_irqrestore(&inst->state_spinlock, flags); 855 return ret; 856 } 857 858 static int wave5_vpu_dec_start(struct vpu_instance *inst) 859 { 860 int ret = 0; 861 unsigned long flags; 862 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 863 struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx); 864 865 spin_lock_irqsave(&inst->state_spinlock, flags); 866 867 if (m2m_ctx->is_draining) { 868 ret = -EBUSY; 869 goto unlock_and_return; 870 } 871 872 if (m2m_ctx->has_stopped) 873 m2m_ctx->has_stopped = false; 874 875 vb2_clear_last_buffer_dequeued(dst_vq); 876 inst->eos = false; 877 878 unlock_and_return: 879 spin_unlock_irqrestore(&inst->state_spinlock, flags); 880 return ret; 881 } 882 883 static int wave5_vpu_dec_decoder_cmd(struct file *file, void *fh, struct v4l2_decoder_cmd *dc) 884 { 885 struct vpu_instance *inst = file_to_vpu_inst(file); 886 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 887 int ret; 888 889 dev_dbg(inst->dev->dev, "decoder command: %u\n", dc->cmd); 890 891 ret = v4l2_m2m_ioctl_try_decoder_cmd(file, fh, dc); 892 if (ret) 893 return ret; 894 895 switch (dc->cmd) { 896 case V4L2_DEC_CMD_STOP: 897 ret = wave5_vpu_dec_stop(inst); 898 /* Just in case we don't have anything to decode anymore */ 899 v4l2_m2m_try_schedule(m2m_ctx); 900 break; 901 case V4L2_DEC_CMD_START: 902 ret = wave5_vpu_dec_start(inst); 903 break; 904 default: 905 ret = -EINVAL; 906 } 907 908 return ret; 909 } 910 911 static const struct v4l2_ioctl_ops wave5_vpu_dec_ioctl_ops = { 912 .vidioc_querycap = wave5_vpu_dec_querycap, 913 .vidioc_enum_framesizes = wave5_vpu_dec_enum_framesizes, 914 915 .vidioc_enum_fmt_vid_cap = wave5_vpu_dec_enum_fmt_cap, 916 .vidioc_s_fmt_vid_cap_mplane = wave5_vpu_dec_s_fmt_cap, 917 .vidioc_g_fmt_vid_cap_mplane = wave5_vpu_dec_g_fmt_cap, 918 .vidioc_try_fmt_vid_cap_mplane = wave5_vpu_dec_try_fmt_cap, 919 920 .vidioc_enum_fmt_vid_out = wave5_vpu_dec_enum_fmt_out, 921 .vidioc_s_fmt_vid_out_mplane = wave5_vpu_dec_s_fmt_out, 922 .vidioc_g_fmt_vid_out_mplane = wave5_vpu_g_fmt_out, 923 .vidioc_try_fmt_vid_out_mplane = wave5_vpu_dec_try_fmt_out, 924 925 .vidioc_g_selection = wave5_vpu_dec_g_selection, 926 .vidioc_s_selection = wave5_vpu_dec_s_selection, 927 928 .vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs, 929 /* 930 * Firmware does not support CREATE_BUFS for CAPTURE queue. Since 931 * there is no immediate use-case for supporting CREATE_BUFS on 932 * just the OUTPUT queue, disable CREATE_BUFS altogether. 933 */ 934 .vidioc_querybuf = v4l2_m2m_ioctl_querybuf, 935 .vidioc_prepare_buf = v4l2_m2m_ioctl_prepare_buf, 936 .vidioc_qbuf = v4l2_m2m_ioctl_qbuf, 937 .vidioc_expbuf = v4l2_m2m_ioctl_expbuf, 938 .vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf, 939 .vidioc_streamon = v4l2_m2m_ioctl_streamon, 940 .vidioc_streamoff = v4l2_m2m_ioctl_streamoff, 941 942 .vidioc_try_decoder_cmd = v4l2_m2m_ioctl_try_decoder_cmd, 943 .vidioc_decoder_cmd = wave5_vpu_dec_decoder_cmd, 944 945 .vidioc_subscribe_event = wave5_vpu_subscribe_event, 946 .vidioc_unsubscribe_event = v4l2_event_unsubscribe, 947 }; 948 949 static int wave5_vpu_dec_queue_setup(struct vb2_queue *q, unsigned int *num_buffers, 950 unsigned int *num_planes, unsigned int sizes[], 951 struct device *alloc_devs[]) 952 { 953 struct vpu_instance *inst = vb2_get_drv_priv(q); 954 struct v4l2_pix_format_mplane inst_format = 955 (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) ? inst->src_fmt : inst->dst_fmt; 956 unsigned int i; 957 958 dev_dbg(inst->dev->dev, "%s: num_buffers: %u | num_planes: %u | type: %u\n", __func__, 959 *num_buffers, *num_planes, q->type); 960 961 *num_planes = inst_format.num_planes; 962 963 if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) { 964 sizes[0] = inst_format.plane_fmt[0].sizeimage; 965 dev_dbg(inst->dev->dev, "%s: size[0]: %u\n", __func__, sizes[0]); 966 } else if (q->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) { 967 if (*num_buffers < inst->fbc_buf_count) 968 *num_buffers = inst->fbc_buf_count; 969 970 for (i = 0; i < *num_planes; i++) { 971 sizes[i] = inst_format.plane_fmt[i].sizeimage; 972 dev_dbg(inst->dev->dev, "%s: size[%u]: %u\n", __func__, i, sizes[i]); 973 } 974 } 975 976 return 0; 977 } 978 979 static int wave5_prepare_fb(struct vpu_instance *inst) 980 { 981 int linear_num; 982 int non_linear_num; 983 int fb_stride = 0, fb_height = 0; 984 int luma_size, chroma_size; 985 int ret, i; 986 struct v4l2_m2m_buffer *buf, *n; 987 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 988 u32 bitdepth = inst->codec_info->dec_info.initial_info.luma_bitdepth; 989 990 switch (bitdepth) { 991 case 8: 992 break; 993 case 10: 994 if (inst->std == W_HEVC_DEC && 995 inst->dev->attr.support_hevc10bit_dec) 996 break; 997 998 fallthrough; 999 default: 1000 dev_err(inst->dev->dev, "no support for %d bit depth\n", bitdepth); 1001 1002 return -EINVAL; 1003 } 1004 1005 linear_num = v4l2_m2m_num_dst_bufs_ready(m2m_ctx); 1006 non_linear_num = inst->fbc_buf_count; 1007 1008 for (i = 0; i < non_linear_num; i++) { 1009 struct frame_buffer *frame = &inst->frame_buf[i]; 1010 struct vpu_buf *vframe = &inst->frame_vbuf[i]; 1011 1012 fb_stride = ALIGN(inst->dst_fmt.width * bitdepth / 8, 32); 1013 fb_height = ALIGN(inst->dst_fmt.height, 32); 1014 luma_size = fb_stride * fb_height; 1015 1016 chroma_size = ALIGN(fb_stride / 2, 16) * fb_height; 1017 1018 if (vframe->size == (luma_size + chroma_size)) 1019 continue; 1020 1021 if (vframe->size) 1022 wave5_vpu_dec_reset_framebuffer(inst, i); 1023 1024 vframe->size = luma_size + chroma_size; 1025 ret = wave5_vdi_allocate_dma_memory(inst->dev, vframe); 1026 if (ret) { 1027 dev_dbg(inst->dev->dev, 1028 "%s: Allocating FBC buf of size %zu, fail: %d\n", 1029 __func__, vframe->size, ret); 1030 return ret; 1031 } 1032 1033 frame->buf_y = vframe->daddr; 1034 frame->buf_cb = vframe->daddr + luma_size; 1035 frame->buf_cr = (dma_addr_t)-1; 1036 frame->size = vframe->size; 1037 frame->width = inst->src_fmt.width; 1038 frame->stride = fb_stride; 1039 frame->map_type = COMPRESSED_FRAME_MAP; 1040 frame->update_fb_info = true; 1041 } 1042 /* In case the count has reduced, clean up leftover framebuffer memory */ 1043 for (i = non_linear_num; i < MAX_REG_FRAME; i++) { 1044 ret = wave5_vpu_dec_reset_framebuffer(inst, i); 1045 if (ret) 1046 break; 1047 } 1048 1049 for (i = 0; i < linear_num; i++) { 1050 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1051 struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx); 1052 struct vb2_buffer *vb = vb2_get_buffer(dst_vq, i); 1053 struct frame_buffer *frame = &inst->frame_buf[non_linear_num + i]; 1054 dma_addr_t buf_addr_y = 0, buf_addr_cb = 0, buf_addr_cr = 0; 1055 u32 buf_size = 0; 1056 u32 fb_stride = inst->dst_fmt.width; 1057 u32 luma_size = fb_stride * inst->dst_fmt.height; 1058 u32 chroma_size; 1059 1060 if (inst->output_format == FORMAT_422) 1061 chroma_size = fb_stride * inst->dst_fmt.height / 2; 1062 else 1063 chroma_size = fb_stride * inst->dst_fmt.height / 4; 1064 1065 if (inst->dst_fmt.num_planes == 1) { 1066 buf_size = vb2_plane_size(vb, 0); 1067 buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0); 1068 buf_addr_cb = buf_addr_y + luma_size; 1069 buf_addr_cr = buf_addr_cb + chroma_size; 1070 } else if (inst->dst_fmt.num_planes == 2) { 1071 buf_size = vb2_plane_size(vb, 0) + 1072 vb2_plane_size(vb, 1); 1073 buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0); 1074 buf_addr_cb = vb2_dma_contig_plane_dma_addr(vb, 1); 1075 buf_addr_cr = buf_addr_cb + chroma_size; 1076 } else if (inst->dst_fmt.num_planes == 3) { 1077 buf_size = vb2_plane_size(vb, 0) + 1078 vb2_plane_size(vb, 1) + 1079 vb2_plane_size(vb, 2); 1080 buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0); 1081 buf_addr_cb = vb2_dma_contig_plane_dma_addr(vb, 1); 1082 buf_addr_cr = vb2_dma_contig_plane_dma_addr(vb, 2); 1083 } 1084 1085 frame->buf_y = buf_addr_y; 1086 frame->buf_cb = buf_addr_cb; 1087 frame->buf_cr = buf_addr_cr; 1088 frame->size = buf_size; 1089 frame->width = inst->src_fmt.width; 1090 frame->stride = fb_stride; 1091 frame->map_type = LINEAR_FRAME_MAP; 1092 frame->update_fb_info = true; 1093 } 1094 1095 ret = wave5_vpu_dec_register_frame_buffer_ex(inst, non_linear_num, linear_num, 1096 fb_stride, inst->dst_fmt.height); 1097 if (ret) { 1098 dev_dbg(inst->dev->dev, "%s: vpu_dec_register_frame_buffer_ex fail: %d", 1099 __func__, ret); 1100 return ret; 1101 } 1102 1103 /* 1104 * Mark all frame buffers as out of display, to avoid using them before 1105 * the application have them queued. 1106 */ 1107 for (i = 0; i < v4l2_m2m_num_dst_bufs_ready(m2m_ctx); i++) { 1108 ret = wave5_vpu_dec_set_disp_flag(inst, i); 1109 if (ret) { 1110 dev_dbg(inst->dev->dev, 1111 "%s: Setting display flag of buf index: %u, fail: %d\n", 1112 __func__, i, ret); 1113 } 1114 } 1115 1116 v4l2_m2m_for_each_dst_buf_safe(m2m_ctx, buf, n) { 1117 struct vb2_v4l2_buffer *vbuf = &buf->vb; 1118 1119 ret = wave5_vpu_dec_clr_disp_flag(inst, vbuf->vb2_buf.index); 1120 if (ret) 1121 dev_dbg(inst->dev->dev, 1122 "%s: Clearing display flag of buf index: %u, fail: %d\n", 1123 __func__, i, ret); 1124 } 1125 1126 return 0; 1127 } 1128 1129 static int write_to_ringbuffer(struct vpu_instance *inst, void *buffer, size_t buffer_size, 1130 struct vpu_buf *ring_buffer, dma_addr_t wr_ptr) 1131 { 1132 size_t size; 1133 size_t offset = wr_ptr - ring_buffer->daddr; 1134 int ret; 1135 1136 if (wr_ptr + buffer_size > ring_buffer->daddr + ring_buffer->size) { 1137 size = ring_buffer->daddr + ring_buffer->size - wr_ptr; 1138 ret = wave5_vdi_write_memory(inst->dev, ring_buffer, offset, (u8 *)buffer, size); 1139 if (ret < 0) 1140 return ret; 1141 1142 ret = wave5_vdi_write_memory(inst->dev, ring_buffer, 0, (u8 *)buffer + size, 1143 buffer_size - size); 1144 if (ret < 0) 1145 return ret; 1146 } else { 1147 ret = wave5_vdi_write_memory(inst->dev, ring_buffer, offset, (u8 *)buffer, 1148 buffer_size); 1149 if (ret < 0) 1150 return ret; 1151 } 1152 1153 return 0; 1154 } 1155 1156 static struct vpu_src_buffer *inst_src_buf_remove(struct vpu_instance *inst) 1157 { 1158 struct vpu_src_buffer *b; 1159 int ret; 1160 1161 ret = mutex_lock_interruptible(&inst->feed_lock); 1162 if (ret) 1163 return NULL; 1164 1165 if (list_empty(&inst->avail_src_bufs)) { 1166 mutex_unlock(&inst->feed_lock); 1167 return NULL; 1168 } 1169 b = list_first_entry(&inst->avail_src_bufs, struct vpu_src_buffer, list); 1170 list_del_init(&b->list); 1171 mutex_unlock(&inst->feed_lock); 1172 return b; 1173 } 1174 1175 static int fill_ringbuffer(struct vpu_instance *inst) 1176 { 1177 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1178 struct vpu_src_buffer *vpu_buf; 1179 int ret = 0; 1180 1181 if (m2m_ctx->last_src_buf) { 1182 struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(m2m_ctx->last_src_buf); 1183 1184 if (vpu_buf->consumed) { 1185 dev_dbg(inst->dev->dev, "last src buffer already written\n"); 1186 return 0; 1187 } 1188 } 1189 1190 while ((vpu_buf = inst_src_buf_remove(inst)) != NULL) { 1191 struct vb2_v4l2_buffer *vbuf = &vpu_buf->v4l2_m2m_buf.vb; 1192 struct vpu_buf *ring_buffer = &inst->bitstream_vbuf; 1193 size_t src_size = vb2_get_plane_payload(&vbuf->vb2_buf, 0); 1194 void *src_buf = vb2_plane_vaddr(&vbuf->vb2_buf, 0); 1195 dma_addr_t rd_ptr = 0; 1196 dma_addr_t wr_ptr = 0; 1197 size_t remain_size = 0; 1198 1199 if (vpu_buf->consumed) { 1200 dev_dbg(inst->dev->dev, "already copied src buf (%u) to the ring buffer\n", 1201 vbuf->vb2_buf.index); 1202 continue; 1203 } 1204 1205 if (!src_buf) { 1206 dev_dbg(inst->dev->dev, 1207 "%s: Acquiring kernel pointer to src buf (%u), fail\n", 1208 __func__, vbuf->vb2_buf.index); 1209 break; 1210 } 1211 1212 ret = wave5_vpu_dec_get_bitstream_buffer(inst, &rd_ptr, &wr_ptr, &remain_size); 1213 if (ret) { 1214 /* Unable to acquire the mutex */ 1215 dev_err(inst->dev->dev, "Getting the bitstream buffer, fail: %d\n", 1216 ret); 1217 return ret; 1218 } 1219 1220 dev_dbg(inst->dev->dev, "%s: rd_ptr %pad wr_ptr %pad", __func__, &rd_ptr, &wr_ptr); 1221 1222 if (remain_size < src_size) { 1223 dev_dbg(inst->dev->dev, 1224 "%s: remaining size: %zu < source size: %zu for src buf (%u)\n", 1225 __func__, remain_size, src_size, vbuf->vb2_buf.index); 1226 break; 1227 } 1228 1229 ret = write_to_ringbuffer(inst, src_buf, src_size, ring_buffer, wr_ptr); 1230 if (ret) { 1231 dev_err(inst->dev->dev, "Write src buf (%u) to ring buffer, fail: %d\n", 1232 vbuf->vb2_buf.index, ret); 1233 return ret; 1234 } 1235 1236 ret = wave5_vpu_dec_update_bitstream_buffer(inst, src_size); 1237 if (ret) { 1238 dev_dbg(inst->dev->dev, 1239 "update_bitstream_buffer fail: %d for src buf (%u)\n", 1240 ret, vbuf->vb2_buf.index); 1241 break; 1242 } 1243 1244 vpu_buf->consumed = true; 1245 1246 /* Don't write buffers passed the last one while draining. */ 1247 if (v4l2_m2m_is_last_draining_src_buf(m2m_ctx, vbuf)) { 1248 dev_dbg(inst->dev->dev, "last src buffer written to the ring buffer\n"); 1249 break; 1250 } 1251 1252 inst->queuing_num++; 1253 break; 1254 } 1255 1256 return ret; 1257 } 1258 1259 static void wave5_vpu_dec_buf_queue_src(struct vb2_buffer *vb) 1260 { 1261 struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue); 1262 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1263 struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); 1264 struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(vbuf); 1265 int ret; 1266 1267 vpu_buf->consumed = false; 1268 vbuf->sequence = inst->queued_src_buf_num++; 1269 ret = mutex_lock_interruptible(&inst->feed_lock); 1270 if (ret) 1271 return; 1272 INIT_LIST_HEAD(&vpu_buf->list); 1273 list_add_tail(&vpu_buf->list, &inst->avail_src_bufs); 1274 mutex_unlock(&inst->feed_lock); 1275 v4l2_m2m_buf_queue(m2m_ctx, vbuf); 1276 } 1277 1278 static void wave5_vpu_dec_buf_queue_dst(struct vb2_buffer *vb) 1279 { 1280 struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); 1281 struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue); 1282 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1283 1284 pm_runtime_resume_and_get(inst->dev->dev); 1285 vbuf->sequence = inst->queued_dst_buf_num++; 1286 1287 if (inst->state == VPU_INST_STATE_PIC_RUN) { 1288 struct vpu_dst_buffer *vpu_buf = wave5_to_vpu_dst_buf(vbuf); 1289 int ret; 1290 1291 /* 1292 * The buffer is already registered just clear the display flag 1293 * to let the firmware know it can be used. 1294 */ 1295 vpu_buf->display = false; 1296 ret = wave5_vpu_dec_clr_disp_flag(inst, vb->index); 1297 if (ret) { 1298 dev_dbg(inst->dev->dev, 1299 "%s: Clearing the display flag of buffer index: %u, fail: %d\n", 1300 __func__, vb->index, ret); 1301 } 1302 } 1303 1304 if (vb2_is_streaming(vb->vb2_queue) && v4l2_m2m_dst_buf_is_last(m2m_ctx)) { 1305 unsigned int i; 1306 1307 for (i = 0; i < vb->num_planes; i++) 1308 vb2_set_plane_payload(vb, i, 0); 1309 1310 vbuf->field = V4L2_FIELD_NONE; 1311 1312 send_eos_event(inst); 1313 v4l2_m2m_last_buffer_done(m2m_ctx, vbuf); 1314 } else { 1315 v4l2_m2m_buf_queue(m2m_ctx, vbuf); 1316 } 1317 pm_runtime_put_autosuspend(inst->dev->dev); 1318 } 1319 1320 static void wave5_vpu_dec_buf_queue(struct vb2_buffer *vb) 1321 { 1322 struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); 1323 struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue); 1324 1325 dev_dbg(inst->dev->dev, "%s: type: %4u index: %4u size: ([0]=%4lu, [1]=%4lu, [2]=%4lu)\n", 1326 __func__, vb->type, vb->index, vb2_plane_size(&vbuf->vb2_buf, 0), 1327 vb2_plane_size(&vbuf->vb2_buf, 1), vb2_plane_size(&vbuf->vb2_buf, 2)); 1328 1329 if (vb->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) { 1330 wave5_vpu_dec_buf_queue_src(vb); 1331 if (inst->empty_queue) 1332 inst->empty_queue = false; 1333 } else if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) { 1334 wave5_vpu_dec_buf_queue_dst(vb); 1335 } 1336 } 1337 1338 static int wave5_vpu_dec_allocate_ring_buffer(struct vpu_instance *inst) 1339 { 1340 int ret; 1341 struct vpu_buf *ring_buffer = &inst->bitstream_vbuf; 1342 1343 ring_buffer->size = ALIGN(inst->src_fmt.plane_fmt[0].sizeimage, 1024) * 4; 1344 ret = wave5_vdi_allocate_dma_memory(inst->dev, ring_buffer); 1345 if (ret) { 1346 dev_dbg(inst->dev->dev, "%s: allocate ring buffer of size %zu fail: %d\n", 1347 __func__, ring_buffer->size, ret); 1348 return ret; 1349 } 1350 1351 inst->last_rd_ptr = ring_buffer->daddr; 1352 1353 return 0; 1354 } 1355 1356 static int wave5_vpu_dec_start_streaming(struct vb2_queue *q, unsigned int count) 1357 { 1358 struct vpu_instance *inst = vb2_get_drv_priv(q); 1359 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1360 int ret = 0; 1361 1362 dev_dbg(inst->dev->dev, "%s: type: %u\n", __func__, q->type); 1363 pm_runtime_resume_and_get(inst->dev->dev); 1364 1365 v4l2_m2m_update_start_streaming_state(m2m_ctx, q); 1366 1367 if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE && inst->state == VPU_INST_STATE_NONE) { 1368 struct dec_open_param open_param; 1369 1370 memset(&open_param, 0, sizeof(struct dec_open_param)); 1371 1372 ret = wave5_vpu_dec_allocate_ring_buffer(inst); 1373 if (ret) 1374 goto return_buffers; 1375 1376 open_param.bitstream_buffer = inst->bitstream_vbuf.daddr; 1377 open_param.bitstream_buffer_size = inst->bitstream_vbuf.size; 1378 1379 ret = wave5_vpu_dec_open(inst, &open_param); 1380 if (ret) { 1381 dev_dbg(inst->dev->dev, "%s: decoder opening, fail: %d\n", 1382 __func__, ret); 1383 goto free_bitstream_vbuf; 1384 } 1385 1386 ret = switch_state(inst, VPU_INST_STATE_OPEN); 1387 if (ret) 1388 goto free_bitstream_vbuf; 1389 } else if (q->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) { 1390 struct dec_initial_info *initial_info = 1391 &inst->codec_info->dec_info.initial_info; 1392 1393 if (inst->state == VPU_INST_STATE_STOP) 1394 ret = switch_state(inst, VPU_INST_STATE_INIT_SEQ); 1395 if (ret) 1396 goto return_buffers; 1397 1398 if (inst->state == VPU_INST_STATE_INIT_SEQ && 1399 inst->dev->product_code == WAVE521C_CODE) { 1400 if (initial_info->luma_bitdepth != 8) { 1401 dev_info(inst->dev->dev, "%s: no support for %d bit depth", 1402 __func__, initial_info->luma_bitdepth); 1403 ret = -EINVAL; 1404 goto return_buffers; 1405 } 1406 } 1407 1408 } 1409 pm_runtime_put_autosuspend(inst->dev->dev); 1410 return ret; 1411 1412 free_bitstream_vbuf: 1413 wave5_vdi_free_dma_memory(inst->dev, &inst->bitstream_vbuf); 1414 return_buffers: 1415 wave5_return_bufs(q, VB2_BUF_STATE_QUEUED); 1416 pm_runtime_put_autosuspend(inst->dev->dev); 1417 return ret; 1418 } 1419 1420 static int streamoff_output(struct vb2_queue *q) 1421 { 1422 struct vpu_instance *inst = vb2_get_drv_priv(q); 1423 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1424 struct vb2_v4l2_buffer *buf; 1425 int ret; 1426 dma_addr_t new_rd_ptr; 1427 struct dec_output_info dec_info; 1428 unsigned int i; 1429 struct vpu_src_buffer *vpu_buf; 1430 1431 inst->retry = false; 1432 inst->queuing_num = 0; 1433 while ((vpu_buf = inst_src_buf_remove(inst)) != NULL) 1434 ; 1435 1436 for (i = 0; i < v4l2_m2m_num_dst_bufs_ready(m2m_ctx); i++) { 1437 ret = wave5_vpu_dec_set_disp_flag(inst, i); 1438 if (ret) 1439 dev_dbg(inst->dev->dev, 1440 "%s: Setting display flag of buf index: %u, fail: %d\n", 1441 __func__, i, ret); 1442 } 1443 1444 while ((buf = v4l2_m2m_src_buf_remove(m2m_ctx))) { 1445 dev_dbg(inst->dev->dev, "%s: (Multiplanar) buf type %4u | index %4u\n", 1446 __func__, buf->vb2_buf.type, buf->vb2_buf.index); 1447 v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR); 1448 } 1449 1450 while (wave5_vpu_dec_get_output_info(inst, &dec_info) == 0) { 1451 if (dec_info.index_frame_display >= 0) 1452 wave5_vpu_dec_set_disp_flag(inst, dec_info.index_frame_display); 1453 } 1454 1455 ret = wave5_vpu_flush_instance(inst); 1456 if (ret) 1457 return ret; 1458 1459 /* Reset the ring buffer information */ 1460 new_rd_ptr = wave5_vpu_dec_get_rd_ptr(inst); 1461 inst->last_rd_ptr = new_rd_ptr; 1462 inst->codec_info->dec_info.stream_rd_ptr = new_rd_ptr; 1463 inst->codec_info->dec_info.stream_wr_ptr = new_rd_ptr; 1464 1465 if (v4l2_m2m_has_stopped(m2m_ctx)) 1466 send_eos_event(inst); 1467 1468 /* streamoff on output cancels any draining operation */ 1469 inst->eos = false; 1470 1471 return 0; 1472 } 1473 1474 static int streamoff_capture(struct vb2_queue *q) 1475 { 1476 struct vpu_instance *inst = vb2_get_drv_priv(q); 1477 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1478 struct vb2_v4l2_buffer *buf; 1479 unsigned int i; 1480 int ret = 0; 1481 1482 for (i = 0; i < v4l2_m2m_num_dst_bufs_ready(m2m_ctx); i++) { 1483 ret = wave5_vpu_dec_set_disp_flag(inst, i); 1484 if (ret) 1485 dev_dbg(inst->dev->dev, 1486 "%s: Setting display flag of buf index: %u, fail: %d\n", 1487 __func__, i, ret); 1488 } 1489 1490 while ((buf = v4l2_m2m_dst_buf_remove(m2m_ctx))) { 1491 u32 plane; 1492 1493 dev_dbg(inst->dev->dev, "%s: buf type %4u | index %4u\n", 1494 __func__, buf->vb2_buf.type, buf->vb2_buf.index); 1495 1496 for (plane = 0; plane < inst->dst_fmt.num_planes; plane++) 1497 vb2_set_plane_payload(&buf->vb2_buf, plane, 0); 1498 1499 v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR); 1500 } 1501 1502 if (inst->needs_reallocation) { 1503 wave5_vpu_dec_give_command(inst, DEC_RESET_FRAMEBUF_INFO, NULL); 1504 inst->needs_reallocation = false; 1505 } 1506 1507 if (v4l2_m2m_has_stopped(m2m_ctx)) { 1508 ret = switch_state(inst, VPU_INST_STATE_INIT_SEQ); 1509 if (ret) 1510 return ret; 1511 } 1512 1513 return 0; 1514 } 1515 1516 static void wave5_vpu_dec_stop_streaming(struct vb2_queue *q) 1517 { 1518 struct vpu_instance *inst = vb2_get_drv_priv(q); 1519 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1520 1521 bool check_cmd = TRUE; 1522 1523 dev_dbg(inst->dev->dev, "%s: type: %u\n", __func__, q->type); 1524 pm_runtime_resume_and_get(inst->dev->dev); 1525 inst->empty_queue = true; 1526 while (check_cmd) { 1527 struct queue_status_info q_status; 1528 struct dec_output_info dec_output_info; 1529 1530 wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status); 1531 if ((inst->state == VPU_INST_STATE_STOP || 1532 inst->state == VPU_INST_STATE_INIT_SEQ || 1533 q_status.instance_queue_count == 0) && 1534 q_status.report_queue_count == 0) 1535 break; 1536 1537 if (wave5_vpu_dec_get_output_info(inst, &dec_output_info)) 1538 dev_dbg(inst->dev->dev, "there is no output info\n"); 1539 } 1540 1541 v4l2_m2m_update_stop_streaming_state(m2m_ctx, q); 1542 1543 if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) 1544 streamoff_output(q); 1545 else 1546 streamoff_capture(q); 1547 1548 inst->empty_queue = false; 1549 inst->sent_eos = false; 1550 pm_runtime_put_autosuspend(inst->dev->dev); 1551 } 1552 1553 static const struct vb2_ops wave5_vpu_dec_vb2_ops = { 1554 .queue_setup = wave5_vpu_dec_queue_setup, 1555 .buf_queue = wave5_vpu_dec_buf_queue, 1556 .start_streaming = wave5_vpu_dec_start_streaming, 1557 .stop_streaming = wave5_vpu_dec_stop_streaming, 1558 }; 1559 1560 static void wave5_set_default_format(struct v4l2_pix_format_mplane *src_fmt, 1561 struct v4l2_pix_format_mplane *dst_fmt) 1562 { 1563 src_fmt->pixelformat = dec_fmt_list[VPU_FMT_TYPE_CODEC][0].v4l2_pix_fmt; 1564 wave5_update_pix_fmt(src_fmt, VPU_FMT_TYPE_CODEC, 1565 W5_DEF_DEC_PIC_WIDTH, W5_DEF_DEC_PIC_HEIGHT, 1566 &dec_hevc_frmsize); 1567 1568 dst_fmt->pixelformat = dec_fmt_list[VPU_FMT_TYPE_RAW][0].v4l2_pix_fmt; 1569 wave5_update_pix_fmt(dst_fmt, VPU_FMT_TYPE_RAW, 1570 W5_DEF_DEC_PIC_WIDTH, W5_DEF_DEC_PIC_HEIGHT, 1571 &dec_raw_frmsize); 1572 } 1573 1574 static int wave5_vpu_dec_queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq) 1575 { 1576 return wave5_vpu_queue_init(priv, src_vq, dst_vq, &wave5_vpu_dec_vb2_ops); 1577 } 1578 1579 static const struct vpu_instance_ops wave5_vpu_dec_inst_ops = { 1580 .finish_process = wave5_vpu_dec_finish_decode, 1581 }; 1582 1583 static int initialize_sequence(struct vpu_instance *inst) 1584 { 1585 struct dec_initial_info initial_info; 1586 int ret = 0; 1587 1588 memset(&initial_info, 0, sizeof(struct dec_initial_info)); 1589 1590 ret = wave5_vpu_dec_issue_seq_init(inst); 1591 if (ret) { 1592 dev_dbg(inst->dev->dev, "%s: wave5_vpu_dec_issue_seq_init, fail: %d\n", 1593 __func__, ret); 1594 return ret; 1595 } 1596 1597 if (wave5_vpu_wait_interrupt(inst, VPU_DEC_TIMEOUT) < 0) 1598 dev_dbg(inst->dev->dev, "%s: failed to call vpu_wait_interrupt()\n", __func__); 1599 1600 ret = wave5_vpu_dec_complete_seq_init(inst, &initial_info); 1601 if (ret) { 1602 dev_dbg(inst->dev->dev, "%s: vpu_dec_complete_seq_init, fail: %d, reason: %u\n", 1603 __func__, ret, initial_info.seq_init_err_reason); 1604 wave5_handle_src_buffer(inst, initial_info.rd_ptr); 1605 return ret; 1606 } 1607 1608 handle_dynamic_resolution_change(inst); 1609 1610 return 0; 1611 } 1612 1613 static bool wave5_is_draining_or_eos(struct vpu_instance *inst) 1614 { 1615 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1616 1617 lockdep_assert_held(&inst->state_spinlock); 1618 return m2m_ctx->is_draining || inst->eos; 1619 } 1620 1621 static void wave5_vpu_dec_device_run(void *priv) 1622 { 1623 struct vpu_instance *inst = priv; 1624 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1625 struct queue_status_info q_status; 1626 u32 fail_res = 0; 1627 int ret = 0; 1628 1629 dev_dbg(inst->dev->dev, "%s: Fill the ring buffer with new bitstream data", __func__); 1630 pm_runtime_resume_and_get(inst->dev->dev); 1631 if (!inst->retry) { 1632 ret = fill_ringbuffer(inst); 1633 if (ret < 0) { 1634 dev_warn(inst->dev->dev, "Filling ring buffer failed\n"); 1635 goto finish_job_and_return; 1636 } else if (!inst->eos && 1637 inst->queuing_num == 0 && 1638 inst->state == VPU_INST_STATE_PIC_RUN) { 1639 dev_dbg(inst->dev->dev, "%s: no bitstream for feeding, so skip ", __func__); 1640 inst->empty_queue = true; 1641 goto finish_job_and_return; 1642 } 1643 } 1644 1645 switch (inst->state) { 1646 case VPU_INST_STATE_OPEN: 1647 ret = initialize_sequence(inst); 1648 if (ret) { 1649 unsigned long flags; 1650 1651 spin_lock_irqsave(&inst->state_spinlock, flags); 1652 if (wave5_is_draining_or_eos(inst) && 1653 wave5_last_src_buffer_consumed(m2m_ctx)) { 1654 struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx); 1655 1656 switch_state(inst, VPU_INST_STATE_STOP); 1657 1658 if (vb2_is_streaming(dst_vq)) 1659 send_eos_event(inst); 1660 else 1661 handle_dynamic_resolution_change(inst); 1662 1663 flag_last_buffer_done(inst); 1664 } 1665 spin_unlock_irqrestore(&inst->state_spinlock, flags); 1666 } else { 1667 set_instance_state(inst, VPU_INST_STATE_INIT_SEQ); 1668 } 1669 1670 break; 1671 1672 case VPU_INST_STATE_INIT_SEQ: 1673 /* 1674 * Do this early, preparing the fb can trigger an IRQ before 1675 * we had a chance to switch, which leads to an invalid state 1676 * change. 1677 */ 1678 set_instance_state(inst, VPU_INST_STATE_PIC_RUN); 1679 /* 1680 * During DRC, the picture decoding remains pending, so just leave the job 1681 * active until this decode operation completes. 1682 */ 1683 wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status); 1684 1685 /* 1686 * The sequence must be analyzed first to calculate the proper 1687 * size of the auxiliary buffers. 1688 */ 1689 ret = wave5_prepare_fb(inst); 1690 if (ret) { 1691 dev_warn(inst->dev->dev, "Framebuffer preparation, fail: %d\n", ret); 1692 set_instance_state(inst, VPU_INST_STATE_STOP); 1693 break; 1694 } 1695 1696 if (q_status.instance_queue_count) 1697 goto finish_job_and_return; 1698 1699 fallthrough; 1700 case VPU_INST_STATE_PIC_RUN: 1701 ret = start_decode(inst, &fail_res); 1702 if (ret) { 1703 dev_err(inst->dev->dev, 1704 "Frame decoding on m2m context (%p), fail: %d (result: %d)\n", 1705 m2m_ctx, ret, fail_res); 1706 goto finish_job_and_return; 1707 } 1708 1709 if (fail_res == WAVE5_SYSERR_QUEUEING_FAIL) { 1710 inst->retry = true; 1711 inst->queuing_fail = true; 1712 } else { 1713 inst->retry = false; 1714 if (!inst->eos) 1715 inst->queuing_num--; 1716 } 1717 break; 1718 default: 1719 dev_dbg(inst->dev->dev, "Execution of a job in state %s illegal.\n", 1720 state_to_str(inst->state)); 1721 } 1722 1723 finish_job_and_return: 1724 dev_dbg(inst->dev->dev, "%s: leave and finish job", __func__); 1725 pm_runtime_put_autosuspend(inst->dev->dev); 1726 /* 1727 * After receiving CMD_STOP, there is no input, but we have to run device_run 1728 * to send DEC_PIC command until display index == -1, so job_finish was always 1729 * called in the device_run to archive it, the logic was very wasteful 1730 * in power and CPU time. 1731 * If EOS is passed, device_run will not call job_finish no more, it is called 1732 * only if HW is idle status in order to reduce overhead. 1733 */ 1734 if (!inst->sent_eos) 1735 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx); 1736 } 1737 1738 static void wave5_vpu_dec_job_abort(void *priv) 1739 { 1740 struct vpu_instance *inst = priv; 1741 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1742 int ret; 1743 1744 ret = set_instance_state(inst, VPU_INST_STATE_STOP); 1745 if (ret) 1746 return; 1747 1748 ret = wave5_vpu_dec_set_eos_on_firmware(inst); 1749 if (ret) 1750 dev_warn(inst->dev->dev, 1751 "Setting EOS for the bitstream, fail: %d\n", ret); 1752 1753 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx); 1754 } 1755 1756 static int wave5_vpu_dec_job_ready(void *priv) 1757 { 1758 struct vpu_instance *inst = priv; 1759 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1760 unsigned long flags; 1761 int ret = 0; 1762 1763 spin_lock_irqsave(&inst->state_spinlock, flags); 1764 1765 switch (inst->state) { 1766 case VPU_INST_STATE_NONE: 1767 dev_dbg(inst->dev->dev, "Decoder must be open to start queueing M2M jobs!\n"); 1768 break; 1769 case VPU_INST_STATE_OPEN: 1770 if (wave5_is_draining_or_eos(inst) || !v4l2_m2m_has_stopped(m2m_ctx) || 1771 v4l2_m2m_num_src_bufs_ready(m2m_ctx) > 0) { 1772 ret = 1; 1773 break; 1774 } 1775 1776 dev_dbg(inst->dev->dev, 1777 "Decoder must be draining or >= 1 OUTPUT queue buffer must be queued!\n"); 1778 break; 1779 case VPU_INST_STATE_INIT_SEQ: 1780 case VPU_INST_STATE_PIC_RUN: 1781 if (!m2m_ctx->cap_q_ctx.q.streaming) { 1782 dev_dbg(inst->dev->dev, "CAPTURE queue must be streaming to queue jobs!\n"); 1783 break; 1784 } else if (v4l2_m2m_num_dst_bufs_ready(m2m_ctx) < (inst->fbc_buf_count - 1)) { 1785 dev_dbg(inst->dev->dev, 1786 "No capture buffer ready to decode!\n"); 1787 break; 1788 } else if (!wave5_is_draining_or_eos(inst) && 1789 (!v4l2_m2m_num_src_bufs_ready(m2m_ctx) || 1790 inst->empty_queue)) { 1791 dev_dbg(inst->dev->dev, 1792 "No bitstream data to decode!\n"); 1793 break; 1794 } else if (inst->state == VPU_INST_STATE_PIC_RUN && 1795 !wave5_is_draining_or_eos(inst) && 1796 inst->queuing_fail) { 1797 break; 1798 } 1799 ret = 1; 1800 break; 1801 case VPU_INST_STATE_STOP: 1802 dev_dbg(inst->dev->dev, "Decoder is stopped, not running.\n"); 1803 break; 1804 } 1805 1806 spin_unlock_irqrestore(&inst->state_spinlock, flags); 1807 1808 return ret; 1809 } 1810 1811 static const struct v4l2_m2m_ops wave5_vpu_dec_m2m_ops = { 1812 .device_run = wave5_vpu_dec_device_run, 1813 .job_abort = wave5_vpu_dec_job_abort, 1814 .job_ready = wave5_vpu_dec_job_ready, 1815 }; 1816 1817 static int wave5_vpu_open_dec(struct file *filp) 1818 { 1819 struct video_device *vdev = video_devdata(filp); 1820 struct vpu_device *dev = video_drvdata(filp); 1821 struct vpu_instance *inst = NULL; 1822 struct v4l2_m2m_ctx *m2m_ctx; 1823 int ret = 0; 1824 1825 inst = kzalloc_obj(*inst); 1826 if (!inst) 1827 return -ENOMEM; 1828 1829 inst->dev = dev; 1830 inst->type = VPU_INST_TYPE_DEC; 1831 inst->ops = &wave5_vpu_dec_inst_ops; 1832 1833 spin_lock_init(&inst->state_spinlock); 1834 mutex_init(&inst->feed_lock); 1835 INIT_LIST_HEAD(&inst->avail_src_bufs); 1836 1837 inst->codec_info = kzalloc_obj(*inst->codec_info); 1838 if (!inst->codec_info) { 1839 kfree(inst); 1840 return -ENOMEM; 1841 } 1842 1843 v4l2_fh_init(&inst->v4l2_fh, vdev); 1844 v4l2_fh_add(&inst->v4l2_fh, filp); 1845 1846 INIT_LIST_HEAD(&inst->list); 1847 1848 inst->v4l2_m2m_dev = inst->dev->v4l2_m2m_dec_dev; 1849 inst->v4l2_fh.m2m_ctx = 1850 v4l2_m2m_ctx_init(inst->v4l2_m2m_dev, inst, wave5_vpu_dec_queue_init); 1851 if (IS_ERR(inst->v4l2_fh.m2m_ctx)) { 1852 ret = PTR_ERR(inst->v4l2_fh.m2m_ctx); 1853 goto cleanup_inst; 1854 } 1855 m2m_ctx = inst->v4l2_fh.m2m_ctx; 1856 1857 v4l2_m2m_set_src_buffered(m2m_ctx, true); 1858 v4l2_m2m_set_dst_buffered(m2m_ctx, true); 1859 /* 1860 * We use the M2M job queue to ensure synchronization of steps where 1861 * needed, as IOCTLs can occur at anytime and we need to run commands on 1862 * the firmware in a specified order. 1863 * In order to initialize the sequence on the firmware within an M2M 1864 * job, the M2M framework needs to be able to queue jobs before 1865 * the CAPTURE queue has been started, because we need the results of the 1866 * initialization to properly prepare the CAPTURE queue with the correct 1867 * amount of buffers. 1868 * By setting ignore_cap_streaming to true the m2m framework will call 1869 * job_ready as soon as the OUTPUT queue is streaming, instead of 1870 * waiting until both the CAPTURE and OUTPUT queues are streaming. 1871 */ 1872 m2m_ctx->ignore_cap_streaming = true; 1873 1874 v4l2_ctrl_handler_init(&inst->v4l2_ctrl_hdl, 10); 1875 v4l2_ctrl_new_std(&inst->v4l2_ctrl_hdl, NULL, 1876 V4L2_CID_MIN_BUFFERS_FOR_CAPTURE, 1, 32, 1, 1); 1877 1878 if (inst->v4l2_ctrl_hdl.error) { 1879 ret = -ENODEV; 1880 goto cleanup_inst; 1881 } 1882 1883 inst->v4l2_fh.ctrl_handler = &inst->v4l2_ctrl_hdl; 1884 v4l2_ctrl_handler_setup(&inst->v4l2_ctrl_hdl); 1885 1886 wave5_set_default_format(&inst->src_fmt, &inst->dst_fmt); 1887 inst->colorspace = V4L2_COLORSPACE_REC709; 1888 inst->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT; 1889 inst->quantization = V4L2_QUANTIZATION_DEFAULT; 1890 inst->xfer_func = V4L2_XFER_FUNC_DEFAULT; 1891 1892 init_completion(&inst->irq_done); 1893 ret = wave5_kfifo_alloc(inst); 1894 if (ret) { 1895 dev_err(inst->dev->dev, "failed to allocate fifo\n"); 1896 goto cleanup_inst; 1897 } 1898 1899 inst->id = ida_alloc(&inst->dev->inst_ida, GFP_KERNEL); 1900 if (inst->id < 0) { 1901 dev_warn(inst->dev->dev, "Allocating instance ID, fail: %d\n", inst->id); 1902 ret = inst->id; 1903 goto cleanup_inst; 1904 } 1905 1906 /* 1907 * For Wave515 SRAM memory was already allocated 1908 * at wave5_vpu_dec_register_device() 1909 */ 1910 if (inst->dev->product_code != WAVE515_CODE) 1911 wave5_vdi_allocate_sram(inst->dev); 1912 1913 ret = mutex_lock_interruptible(&dev->dev_lock); 1914 if (ret) 1915 goto cleanup_inst; 1916 1917 list_add_tail(&inst->list, &dev->instances); 1918 1919 mutex_unlock(&dev->dev_lock); 1920 1921 return 0; 1922 1923 cleanup_inst: 1924 wave5_cleanup_instance(inst, filp); 1925 return ret; 1926 } 1927 1928 static int wave5_vpu_dec_release(struct file *filp) 1929 { 1930 return wave5_vpu_release_device(filp, wave5_vpu_dec_close, "decoder"); 1931 } 1932 1933 static const struct v4l2_file_operations wave5_vpu_dec_fops = { 1934 .owner = THIS_MODULE, 1935 .open = wave5_vpu_open_dec, 1936 .release = wave5_vpu_dec_release, 1937 .unlocked_ioctl = video_ioctl2, 1938 .poll = v4l2_m2m_fop_poll, 1939 .mmap = v4l2_m2m_fop_mmap, 1940 }; 1941 1942 int wave5_vpu_dec_register_device(struct vpu_device *dev) 1943 { 1944 struct video_device *vdev_dec; 1945 int ret; 1946 1947 /* 1948 * Secondary AXI setup for Wave515 is done by INIT_VPU command, 1949 * i.e. wave5_vpu_init(), that's why we allocate SRAM memory early. 1950 */ 1951 if (dev->product_code == WAVE515_CODE) 1952 wave5_vdi_allocate_sram(dev); 1953 1954 vdev_dec = devm_kzalloc(dev->v4l2_dev.dev, sizeof(*vdev_dec), GFP_KERNEL); 1955 if (!vdev_dec) 1956 return -ENOMEM; 1957 1958 dev->v4l2_m2m_dec_dev = v4l2_m2m_init(&wave5_vpu_dec_m2m_ops); 1959 if (IS_ERR(dev->v4l2_m2m_dec_dev)) { 1960 ret = PTR_ERR(dev->v4l2_m2m_dec_dev); 1961 dev_err(dev->dev, "v4l2_m2m_init, fail: %d\n", ret); 1962 return -EINVAL; 1963 } 1964 1965 dev->video_dev_dec = vdev_dec; 1966 1967 strscpy(vdev_dec->name, VPU_DEC_DEV_NAME, sizeof(vdev_dec->name)); 1968 vdev_dec->fops = &wave5_vpu_dec_fops; 1969 vdev_dec->ioctl_ops = &wave5_vpu_dec_ioctl_ops; 1970 vdev_dec->release = video_device_release_empty; 1971 vdev_dec->v4l2_dev = &dev->v4l2_dev; 1972 vdev_dec->vfl_dir = VFL_DIR_M2M; 1973 vdev_dec->device_caps = V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_STREAMING; 1974 vdev_dec->lock = &dev->dev_lock; 1975 1976 ret = video_register_device(vdev_dec, VFL_TYPE_VIDEO, -1); 1977 if (ret) 1978 return ret; 1979 1980 video_set_drvdata(vdev_dec, dev); 1981 1982 return 0; 1983 } 1984 1985 void wave5_vpu_dec_unregister_device(struct vpu_device *dev) 1986 { 1987 /* 1988 * Here is a freeing pair for Wave515 SRAM memory allocation 1989 * happened at wave5_vpu_dec_register_device(). 1990 */ 1991 if (dev->product_code == WAVE515_CODE) 1992 wave5_vdi_free_sram(dev); 1993 1994 video_unregister_device(dev->video_dev_dec); 1995 if (dev->v4l2_m2m_dec_dev) 1996 v4l2_m2m_release(dev->v4l2_m2m_dec_dev); 1997 } 1998