1 // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) 2 /* 3 * Wave5 series multi-standard codec IP - decoder interface 4 * 5 * Copyright (C) 2021-2023 CHIPS&MEDIA INC 6 */ 7 8 #include <linux/pm_runtime.h> 9 #include "wave5-helper.h" 10 11 #define VPU_DEC_DEV_NAME "C&M Wave5 VPU decoder" 12 #define VPU_DEC_DRV_NAME "wave5-dec" 13 14 static const struct v4l2_frmsize_stepwise dec_hevc_frmsize = { 15 .min_width = W5_MIN_DEC_PIC_8_WIDTH, 16 .max_width = W5_MAX_DEC_PIC_WIDTH, 17 .step_width = W5_DEC_CODEC_STEP_WIDTH, 18 .min_height = W5_MIN_DEC_PIC_8_HEIGHT, 19 .max_height = W5_MAX_DEC_PIC_HEIGHT, 20 .step_height = W5_DEC_CODEC_STEP_HEIGHT, 21 }; 22 23 static const struct v4l2_frmsize_stepwise dec_h264_frmsize = { 24 .min_width = W5_MIN_DEC_PIC_32_WIDTH, 25 .max_width = W5_MAX_DEC_PIC_WIDTH, 26 .step_width = W5_DEC_CODEC_STEP_WIDTH, 27 .min_height = W5_MIN_DEC_PIC_32_HEIGHT, 28 .max_height = W5_MAX_DEC_PIC_HEIGHT, 29 .step_height = W5_DEC_CODEC_STEP_HEIGHT, 30 }; 31 32 static const struct v4l2_frmsize_stepwise dec_raw_frmsize = { 33 .min_width = W5_MIN_DEC_PIC_8_WIDTH, 34 .max_width = W5_MAX_DEC_PIC_WIDTH, 35 .step_width = W5_DEC_RAW_STEP_WIDTH, 36 .min_height = W5_MIN_DEC_PIC_8_HEIGHT, 37 .max_height = W5_MAX_DEC_PIC_HEIGHT, 38 .step_height = W5_DEC_RAW_STEP_HEIGHT, 39 }; 40 41 static const struct vpu_format dec_fmt_list[FMT_TYPES][MAX_FMTS] = { 42 [VPU_FMT_TYPE_CODEC] = { 43 { 44 .v4l2_pix_fmt = V4L2_PIX_FMT_HEVC, 45 .v4l2_frmsize = &dec_hevc_frmsize, 46 }, 47 { 48 .v4l2_pix_fmt = V4L2_PIX_FMT_H264, 49 .v4l2_frmsize = &dec_h264_frmsize, 50 }, 51 }, 52 [VPU_FMT_TYPE_RAW] = { 53 { 54 .v4l2_pix_fmt = V4L2_PIX_FMT_YUV420, 55 .v4l2_frmsize = &dec_raw_frmsize, 56 }, 57 { 58 .v4l2_pix_fmt = V4L2_PIX_FMT_NV12, 59 .v4l2_frmsize = &dec_raw_frmsize, 60 }, 61 { 62 .v4l2_pix_fmt = V4L2_PIX_FMT_NV21, 63 .v4l2_frmsize = &dec_raw_frmsize, 64 }, 65 { 66 .v4l2_pix_fmt = V4L2_PIX_FMT_YUV422P, 67 .v4l2_frmsize = &dec_raw_frmsize, 68 }, 69 { 70 .v4l2_pix_fmt = V4L2_PIX_FMT_NV16, 71 .v4l2_frmsize = &dec_raw_frmsize, 72 }, 73 { 74 .v4l2_pix_fmt = V4L2_PIX_FMT_NV61, 75 .v4l2_frmsize = &dec_raw_frmsize, 76 }, 77 { 78 .v4l2_pix_fmt = V4L2_PIX_FMT_YUV420M, 79 .v4l2_frmsize = &dec_raw_frmsize, 80 }, 81 { 82 .v4l2_pix_fmt = V4L2_PIX_FMT_NV12M, 83 .v4l2_frmsize = &dec_raw_frmsize, 84 }, 85 { 86 .v4l2_pix_fmt = V4L2_PIX_FMT_NV21M, 87 .v4l2_frmsize = &dec_raw_frmsize, 88 }, 89 { 90 .v4l2_pix_fmt = V4L2_PIX_FMT_YUV422M, 91 .v4l2_frmsize = &dec_raw_frmsize, 92 }, 93 { 94 .v4l2_pix_fmt = V4L2_PIX_FMT_NV16M, 95 .v4l2_frmsize = &dec_raw_frmsize, 96 }, 97 { 98 .v4l2_pix_fmt = V4L2_PIX_FMT_NV61M, 99 .v4l2_frmsize = &dec_raw_frmsize, 100 }, 101 } 102 }; 103 104 /* 105 * Make sure that the state switch is allowed and add logging for debugging 106 * purposes 107 */ 108 static int switch_state(struct vpu_instance *inst, enum vpu_instance_state state) 109 { 110 switch (state) { 111 case VPU_INST_STATE_NONE: 112 break; 113 case VPU_INST_STATE_OPEN: 114 if (inst->state != VPU_INST_STATE_NONE) 115 goto invalid_state_switch; 116 goto valid_state_switch; 117 case VPU_INST_STATE_INIT_SEQ: 118 if (inst->state != VPU_INST_STATE_OPEN && inst->state != VPU_INST_STATE_STOP) 119 goto invalid_state_switch; 120 goto valid_state_switch; 121 case VPU_INST_STATE_PIC_RUN: 122 if (inst->state != VPU_INST_STATE_INIT_SEQ) 123 goto invalid_state_switch; 124 goto valid_state_switch; 125 case VPU_INST_STATE_STOP: 126 goto valid_state_switch; 127 } 128 invalid_state_switch: 129 WARN(1, "Invalid state switch from %s to %s.\n", 130 state_to_str(inst->state), state_to_str(state)); 131 return -EINVAL; 132 valid_state_switch: 133 dev_dbg(inst->dev->dev, "Switch state from %s to %s.\n", 134 state_to_str(inst->state), state_to_str(state)); 135 inst->state = state; 136 return 0; 137 } 138 139 static int set_instance_state(struct vpu_instance *inst, enum vpu_instance_state state) 140 { 141 unsigned long flags; 142 int ret; 143 144 spin_lock_irqsave(&inst->state_spinlock, flags); 145 ret = switch_state(inst, state); 146 spin_unlock_irqrestore(&inst->state_spinlock, flags); 147 148 return ret; 149 } 150 151 static int wave5_vpu_dec_set_eos_on_firmware(struct vpu_instance *inst) 152 { 153 int ret; 154 155 ret = wave5_vpu_dec_update_bitstream_buffer(inst, 0); 156 if (ret) { 157 /* 158 * To set the EOS flag, a command is sent to the firmware. 159 * That command may never return (timeout) or may report an error. 160 */ 161 dev_err(inst->dev->dev, 162 "Setting EOS for the bitstream, fail: %d\n", ret); 163 return ret; 164 } 165 return 0; 166 } 167 168 static bool wave5_last_src_buffer_consumed(struct v4l2_m2m_ctx *m2m_ctx) 169 { 170 struct vpu_src_buffer *vpu_buf; 171 172 if (!m2m_ctx->last_src_buf) 173 return false; 174 175 vpu_buf = wave5_to_vpu_src_buf(m2m_ctx->last_src_buf); 176 return vpu_buf->consumed; 177 } 178 179 static void wave5_handle_src_buffer(struct vpu_instance *inst, dma_addr_t rd_ptr) 180 { 181 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 182 struct v4l2_m2m_buffer *buf, *n; 183 size_t consumed_bytes = 0; 184 185 if (rd_ptr >= inst->last_rd_ptr) { 186 consumed_bytes = rd_ptr - inst->last_rd_ptr; 187 } else { 188 size_t rd_offs = rd_ptr - inst->bitstream_vbuf.daddr; 189 size_t last_rd_offs = inst->last_rd_ptr - inst->bitstream_vbuf.daddr; 190 191 consumed_bytes = rd_offs + (inst->bitstream_vbuf.size - last_rd_offs); 192 } 193 194 inst->last_rd_ptr = rd_ptr; 195 consumed_bytes += inst->remaining_consumed_bytes; 196 197 dev_dbg(inst->dev->dev, "%s: %zu bytes of bitstream was consumed", __func__, 198 consumed_bytes); 199 200 v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buf, n) { 201 struct vb2_v4l2_buffer *src_buf = &buf->vb; 202 size_t src_size = vb2_get_plane_payload(&src_buf->vb2_buf, 0); 203 204 if (src_size > consumed_bytes) 205 break; 206 207 dev_dbg(inst->dev->dev, "%s: removing src buffer %i", 208 __func__, src_buf->vb2_buf.index); 209 src_buf = v4l2_m2m_src_buf_remove(m2m_ctx); 210 inst->timestamp = src_buf->vb2_buf.timestamp; 211 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE); 212 consumed_bytes -= src_size; 213 214 /* Handle the case the last bitstream buffer has been picked */ 215 if (src_buf == m2m_ctx->last_src_buf) { 216 int ret; 217 218 m2m_ctx->last_src_buf = NULL; 219 ret = wave5_vpu_dec_set_eos_on_firmware(inst); 220 if (ret) 221 dev_warn(inst->dev->dev, 222 "Setting EOS for the bitstream, fail: %d\n", ret); 223 break; 224 } 225 } 226 227 inst->remaining_consumed_bytes = consumed_bytes; 228 } 229 230 static int start_decode(struct vpu_instance *inst, u32 *fail_res) 231 { 232 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 233 int ret = 0; 234 235 ret = wave5_vpu_dec_start_one_frame(inst, fail_res); 236 if (ret) { 237 struct vb2_v4l2_buffer *src_buf; 238 239 src_buf = v4l2_m2m_src_buf_remove(m2m_ctx); 240 if (src_buf) 241 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR); 242 set_instance_state(inst, VPU_INST_STATE_STOP); 243 244 dev_dbg(inst->dev->dev, "%s: pic run failed / finish job", __func__); 245 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx); 246 } 247 248 return ret; 249 } 250 251 static void flag_last_buffer_done(struct vpu_instance *inst) 252 { 253 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 254 struct vb2_v4l2_buffer *vb; 255 int i; 256 257 lockdep_assert_held(&inst->state_spinlock); 258 259 vb = v4l2_m2m_dst_buf_remove(m2m_ctx); 260 if (!vb) { 261 m2m_ctx->is_draining = true; 262 m2m_ctx->next_buf_last = true; 263 return; 264 } 265 266 for (i = 0; i < vb->vb2_buf.num_planes; i++) 267 vb2_set_plane_payload(&vb->vb2_buf, i, 0); 268 vb->field = V4L2_FIELD_NONE; 269 270 v4l2_m2m_last_buffer_done(m2m_ctx, vb); 271 } 272 273 static void send_eos_event(struct vpu_instance *inst) 274 { 275 static const struct v4l2_event vpu_event_eos = { 276 .type = V4L2_EVENT_EOS 277 }; 278 279 lockdep_assert_held(&inst->state_spinlock); 280 281 v4l2_event_queue_fh(&inst->v4l2_fh, &vpu_event_eos); 282 inst->eos = false; 283 inst->sent_eos = true; 284 } 285 286 static int handle_dynamic_resolution_change(struct vpu_instance *inst) 287 { 288 struct v4l2_fh *fh = &inst->v4l2_fh; 289 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 290 291 static const struct v4l2_event vpu_event_src_ch = { 292 .type = V4L2_EVENT_SOURCE_CHANGE, 293 .u.src_change.changes = V4L2_EVENT_SRC_CH_RESOLUTION, 294 }; 295 struct dec_info *p_dec_info = &inst->codec_info->dec_info; 296 struct dec_initial_info *initial_info = &inst->codec_info->dec_info.initial_info; 297 298 lockdep_assert_held(&inst->state_spinlock); 299 300 dev_dbg(inst->dev->dev, "%s: rd_ptr %pad", __func__, &initial_info->rd_ptr); 301 302 dev_dbg(inst->dev->dev, "%s: width: %u height: %u profile: %u | minbuffer: %u\n", 303 __func__, initial_info->pic_width, initial_info->pic_height, 304 initial_info->profile, initial_info->min_frame_buffer_count); 305 306 inst->needs_reallocation = true; 307 inst->fbc_buf_count = initial_info->min_frame_buffer_count + 1; 308 if (inst->fbc_buf_count != v4l2_m2m_num_dst_bufs_ready(m2m_ctx)) { 309 struct v4l2_ctrl *ctrl; 310 311 ctrl = v4l2_ctrl_find(&inst->v4l2_ctrl_hdl, 312 V4L2_CID_MIN_BUFFERS_FOR_CAPTURE); 313 if (ctrl) 314 v4l2_ctrl_s_ctrl(ctrl, inst->fbc_buf_count); 315 } 316 317 if (p_dec_info->initial_info_obtained) { 318 const struct vpu_format *vpu_fmt; 319 320 inst->conf_win.left = initial_info->pic_crop_rect.left; 321 inst->conf_win.top = initial_info->pic_crop_rect.top; 322 inst->conf_win.width = initial_info->pic_width - 323 initial_info->pic_crop_rect.left - initial_info->pic_crop_rect.right; 324 inst->conf_win.height = initial_info->pic_height - 325 initial_info->pic_crop_rect.top - initial_info->pic_crop_rect.bottom; 326 327 vpu_fmt = wave5_find_vpu_fmt(inst->src_fmt.pixelformat, 328 dec_fmt_list[VPU_FMT_TYPE_CODEC]); 329 if (!vpu_fmt) 330 return -EINVAL; 331 332 wave5_update_pix_fmt(&inst->src_fmt, 333 VPU_FMT_TYPE_CODEC, 334 initial_info->pic_width, 335 initial_info->pic_height, 336 vpu_fmt->v4l2_frmsize); 337 338 vpu_fmt = wave5_find_vpu_fmt(inst->dst_fmt.pixelformat, 339 dec_fmt_list[VPU_FMT_TYPE_RAW]); 340 if (!vpu_fmt) 341 return -EINVAL; 342 343 wave5_update_pix_fmt(&inst->dst_fmt, 344 VPU_FMT_TYPE_RAW, 345 initial_info->pic_width, 346 initial_info->pic_height, 347 vpu_fmt->v4l2_frmsize); 348 } 349 350 v4l2_event_queue_fh(fh, &vpu_event_src_ch); 351 352 return 0; 353 } 354 355 static void wave5_vpu_dec_finish_decode(struct vpu_instance *inst) 356 { 357 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 358 struct dec_output_info dec_info; 359 int ret; 360 struct vb2_v4l2_buffer *dec_buf = NULL; 361 struct vb2_v4l2_buffer *disp_buf = NULL; 362 struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx); 363 364 dev_dbg(inst->dev->dev, "%s: Fetch output info from firmware.", __func__); 365 366 ret = wave5_vpu_dec_get_output_info(inst, &dec_info); 367 if (ret) { 368 dev_dbg(inst->dev->dev, "%s: could not get output info.", __func__); 369 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx); 370 return; 371 } 372 373 dev_dbg(inst->dev->dev, "%s: rd_ptr %pad wr_ptr %pad", __func__, &dec_info.rd_ptr, 374 &dec_info.wr_ptr); 375 wave5_handle_src_buffer(inst, dec_info.rd_ptr); 376 377 dev_dbg(inst->dev->dev, "%s: dec_info dec_idx %i disp_idx %i", __func__, 378 dec_info.index_frame_decoded, dec_info.index_frame_display); 379 380 if (!vb2_is_streaming(dst_vq)) { 381 dev_dbg(inst->dev->dev, "%s: capture is not streaming..", __func__); 382 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx); 383 return; 384 } 385 386 /* Remove decoded buffer from the ready queue now that it has been 387 * decoded. 388 */ 389 if (dec_info.index_frame_decoded >= 0) { 390 struct vb2_buffer *vb = vb2_get_buffer(dst_vq, 391 dec_info.index_frame_decoded); 392 if (vb) { 393 dec_buf = to_vb2_v4l2_buffer(vb); 394 dec_buf->vb2_buf.timestamp = inst->timestamp; 395 } else { 396 dev_warn(inst->dev->dev, "%s: invalid decoded frame index %i", 397 __func__, dec_info.index_frame_decoded); 398 } 399 } 400 401 if (dec_info.index_frame_display >= 0) { 402 disp_buf = v4l2_m2m_dst_buf_remove_by_idx(m2m_ctx, dec_info.index_frame_display); 403 if (!disp_buf) 404 dev_warn(inst->dev->dev, "%s: invalid display frame index %i", 405 __func__, dec_info.index_frame_display); 406 } 407 408 /* If there is anything to display, do that now */ 409 if (disp_buf) { 410 struct vpu_dst_buffer *dst_vpu_buf = wave5_to_vpu_dst_buf(disp_buf); 411 412 if (inst->dst_fmt.num_planes == 1) { 413 vb2_set_plane_payload(&disp_buf->vb2_buf, 0, 414 inst->dst_fmt.plane_fmt[0].sizeimage); 415 } else if (inst->dst_fmt.num_planes == 2) { 416 vb2_set_plane_payload(&disp_buf->vb2_buf, 0, 417 inst->dst_fmt.plane_fmt[0].sizeimage); 418 vb2_set_plane_payload(&disp_buf->vb2_buf, 1, 419 inst->dst_fmt.plane_fmt[1].sizeimage); 420 } else if (inst->dst_fmt.num_planes == 3) { 421 vb2_set_plane_payload(&disp_buf->vb2_buf, 0, 422 inst->dst_fmt.plane_fmt[0].sizeimage); 423 vb2_set_plane_payload(&disp_buf->vb2_buf, 1, 424 inst->dst_fmt.plane_fmt[1].sizeimage); 425 vb2_set_plane_payload(&disp_buf->vb2_buf, 2, 426 inst->dst_fmt.plane_fmt[2].sizeimage); 427 } 428 429 /* TODO implement interlace support */ 430 disp_buf->field = V4L2_FIELD_NONE; 431 dst_vpu_buf->display = true; 432 v4l2_m2m_buf_done(disp_buf, VB2_BUF_STATE_DONE); 433 434 dev_dbg(inst->dev->dev, "%s: frame_cycle %8u (payload %lu)\n", 435 __func__, dec_info.frame_cycle, 436 vb2_get_plane_payload(&disp_buf->vb2_buf, 0)); 437 } 438 439 if ((dec_info.index_frame_display == DISPLAY_IDX_FLAG_SEQ_END || 440 dec_info.sequence_changed)) { 441 unsigned long flags; 442 443 spin_lock_irqsave(&inst->state_spinlock, flags); 444 if (!v4l2_m2m_has_stopped(m2m_ctx)) { 445 switch_state(inst, VPU_INST_STATE_STOP); 446 447 if (dec_info.sequence_changed) 448 handle_dynamic_resolution_change(inst); 449 else 450 send_eos_event(inst); 451 452 flag_last_buffer_done(inst); 453 } 454 spin_unlock_irqrestore(&inst->state_spinlock, flags); 455 } 456 457 if (inst->sent_eos && 458 v4l2_m2m_get_curr_priv(inst->v4l2_m2m_dev)) { 459 struct queue_status_info q_status; 460 461 wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status); 462 if (q_status.report_queue_count == 0 && 463 q_status.instance_queue_count == 0) 464 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx); 465 } 466 467 inst->queuing_fail = false; 468 } 469 470 static int wave5_vpu_dec_querycap(struct file *file, void *fh, struct v4l2_capability *cap) 471 { 472 strscpy(cap->driver, VPU_DEC_DRV_NAME, sizeof(cap->driver)); 473 strscpy(cap->card, VPU_DEC_DRV_NAME, sizeof(cap->card)); 474 475 return 0; 476 } 477 478 static int wave5_vpu_dec_enum_framesizes(struct file *f, void *fh, struct v4l2_frmsizeenum *fsize) 479 { 480 const struct vpu_format *vpu_fmt; 481 482 if (fsize->index) 483 return -EINVAL; 484 485 vpu_fmt = wave5_find_vpu_fmt(fsize->pixel_format, dec_fmt_list[VPU_FMT_TYPE_CODEC]); 486 if (!vpu_fmt) { 487 vpu_fmt = wave5_find_vpu_fmt(fsize->pixel_format, dec_fmt_list[VPU_FMT_TYPE_RAW]); 488 if (!vpu_fmt) 489 return -EINVAL; 490 } 491 492 fsize->type = V4L2_FRMSIZE_TYPE_CONTINUOUS; 493 fsize->stepwise.min_width = vpu_fmt->v4l2_frmsize->min_width; 494 fsize->stepwise.max_width = vpu_fmt->v4l2_frmsize->max_width; 495 fsize->stepwise.step_width = W5_DEC_CODEC_STEP_WIDTH; 496 fsize->stepwise.min_height = vpu_fmt->v4l2_frmsize->min_height; 497 fsize->stepwise.max_height = vpu_fmt->v4l2_frmsize->max_height; 498 fsize->stepwise.step_height = W5_DEC_CODEC_STEP_HEIGHT; 499 500 return 0; 501 } 502 503 static int wave5_vpu_dec_enum_fmt_cap(struct file *file, void *fh, struct v4l2_fmtdesc *f) 504 { 505 const struct vpu_format *vpu_fmt; 506 507 vpu_fmt = wave5_find_vpu_fmt_by_idx(f->index, dec_fmt_list[VPU_FMT_TYPE_RAW]); 508 if (!vpu_fmt) 509 return -EINVAL; 510 511 f->pixelformat = vpu_fmt->v4l2_pix_fmt; 512 f->flags = 0; 513 514 return 0; 515 } 516 517 static int wave5_vpu_dec_try_fmt_cap(struct file *file, void *fh, struct v4l2_format *f) 518 { 519 struct vpu_instance *inst = file_to_vpu_inst(file); 520 struct dec_info *p_dec_info = &inst->codec_info->dec_info; 521 const struct v4l2_frmsize_stepwise *frmsize; 522 const struct vpu_format *vpu_fmt; 523 int width, height; 524 525 dev_dbg(inst->dev->dev, 526 "%s: fourcc: %u width: %u height: %u nm planes: %u colorspace: %u field: %u\n", 527 __func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height, 528 f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field); 529 530 vpu_fmt = wave5_find_vpu_fmt(f->fmt.pix_mp.pixelformat, dec_fmt_list[VPU_FMT_TYPE_RAW]); 531 if (!vpu_fmt) { 532 width = inst->dst_fmt.width; 533 height = inst->dst_fmt.height; 534 f->fmt.pix_mp.pixelformat = inst->dst_fmt.pixelformat; 535 frmsize = &dec_raw_frmsize; 536 } else { 537 width = f->fmt.pix_mp.width; 538 height = f->fmt.pix_mp.height; 539 f->fmt.pix_mp.pixelformat = vpu_fmt->v4l2_pix_fmt; 540 frmsize = vpu_fmt->v4l2_frmsize; 541 } 542 543 if (p_dec_info->initial_info_obtained) { 544 width = inst->dst_fmt.width; 545 height = inst->dst_fmt.height; 546 } 547 548 wave5_update_pix_fmt(&f->fmt.pix_mp, VPU_FMT_TYPE_RAW, 549 width, height, frmsize); 550 f->fmt.pix_mp.colorspace = inst->colorspace; 551 f->fmt.pix_mp.ycbcr_enc = inst->ycbcr_enc; 552 f->fmt.pix_mp.quantization = inst->quantization; 553 f->fmt.pix_mp.xfer_func = inst->xfer_func; 554 555 return 0; 556 } 557 558 static int wave5_vpu_dec_s_fmt_cap(struct file *file, void *fh, struct v4l2_format *f) 559 { 560 struct vpu_instance *inst = file_to_vpu_inst(file); 561 int i, ret; 562 563 dev_dbg(inst->dev->dev, 564 "%s: fourcc: %u width: %u height: %u num_planes: %u colorspace: %u field: %u\n", 565 __func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height, 566 f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field); 567 568 ret = wave5_vpu_dec_try_fmt_cap(file, fh, f); 569 if (ret) 570 return ret; 571 572 inst->dst_fmt.width = f->fmt.pix_mp.width; 573 inst->dst_fmt.height = f->fmt.pix_mp.height; 574 inst->dst_fmt.pixelformat = f->fmt.pix_mp.pixelformat; 575 inst->dst_fmt.field = f->fmt.pix_mp.field; 576 inst->dst_fmt.flags = f->fmt.pix_mp.flags; 577 inst->dst_fmt.num_planes = f->fmt.pix_mp.num_planes; 578 for (i = 0; i < inst->dst_fmt.num_planes; i++) { 579 inst->dst_fmt.plane_fmt[i].bytesperline = f->fmt.pix_mp.plane_fmt[i].bytesperline; 580 inst->dst_fmt.plane_fmt[i].sizeimage = f->fmt.pix_mp.plane_fmt[i].sizeimage; 581 } 582 583 if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV12 || 584 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV12M) { 585 inst->cbcr_interleave = true; 586 inst->nv21 = false; 587 inst->output_format = FORMAT_420; 588 } else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV21 || 589 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV21M) { 590 inst->cbcr_interleave = true; 591 inst->nv21 = true; 592 inst->output_format = FORMAT_420; 593 } else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV16 || 594 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV16M) { 595 inst->cbcr_interleave = true; 596 inst->nv21 = false; 597 inst->output_format = FORMAT_422; 598 } else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV61 || 599 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV61M) { 600 inst->cbcr_interleave = true; 601 inst->nv21 = true; 602 inst->output_format = FORMAT_422; 603 } else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_YUV422P || 604 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_YUV422M) { 605 inst->cbcr_interleave = false; 606 inst->nv21 = false; 607 inst->output_format = FORMAT_422; 608 } else { 609 inst->cbcr_interleave = false; 610 inst->nv21 = false; 611 inst->output_format = FORMAT_420; 612 } 613 614 return 0; 615 } 616 617 static int wave5_vpu_dec_g_fmt_cap(struct file *file, void *fh, struct v4l2_format *f) 618 { 619 struct vpu_instance *inst = file_to_vpu_inst(file); 620 int i; 621 622 f->fmt.pix_mp.width = inst->dst_fmt.width; 623 f->fmt.pix_mp.height = inst->dst_fmt.height; 624 f->fmt.pix_mp.pixelformat = inst->dst_fmt.pixelformat; 625 f->fmt.pix_mp.field = inst->dst_fmt.field; 626 f->fmt.pix_mp.flags = inst->dst_fmt.flags; 627 f->fmt.pix_mp.num_planes = inst->dst_fmt.num_planes; 628 for (i = 0; i < f->fmt.pix_mp.num_planes; i++) { 629 f->fmt.pix_mp.plane_fmt[i].bytesperline = inst->dst_fmt.plane_fmt[i].bytesperline; 630 f->fmt.pix_mp.plane_fmt[i].sizeimage = inst->dst_fmt.plane_fmt[i].sizeimage; 631 } 632 633 f->fmt.pix_mp.colorspace = inst->colorspace; 634 f->fmt.pix_mp.ycbcr_enc = inst->ycbcr_enc; 635 f->fmt.pix_mp.quantization = inst->quantization; 636 f->fmt.pix_mp.xfer_func = inst->xfer_func; 637 638 return 0; 639 } 640 641 static int wave5_vpu_dec_enum_fmt_out(struct file *file, void *fh, struct v4l2_fmtdesc *f) 642 { 643 struct vpu_instance *inst = file_to_vpu_inst(file); 644 const struct vpu_format *vpu_fmt; 645 646 dev_dbg(inst->dev->dev, "%s: index: %u\n", __func__, f->index); 647 648 vpu_fmt = wave5_find_vpu_fmt_by_idx(f->index, dec_fmt_list[VPU_FMT_TYPE_CODEC]); 649 if (!vpu_fmt) 650 return -EINVAL; 651 652 f->pixelformat = vpu_fmt->v4l2_pix_fmt; 653 f->flags = V4L2_FMT_FLAG_DYN_RESOLUTION | V4L2_FMT_FLAG_COMPRESSED; 654 655 return 0; 656 } 657 658 static int wave5_vpu_dec_try_fmt_out(struct file *file, void *fh, struct v4l2_format *f) 659 { 660 struct vpu_instance *inst = file_to_vpu_inst(file); 661 const struct v4l2_frmsize_stepwise *frmsize; 662 const struct vpu_format *vpu_fmt; 663 int width, height; 664 665 dev_dbg(inst->dev->dev, 666 "%s: fourcc: %u width: %u height: %u num_planes: %u colorspace: %u field: %u\n", 667 __func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height, 668 f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field); 669 670 vpu_fmt = wave5_find_vpu_fmt(f->fmt.pix_mp.pixelformat, dec_fmt_list[VPU_FMT_TYPE_CODEC]); 671 if (!vpu_fmt) { 672 width = inst->src_fmt.width; 673 height = inst->src_fmt.height; 674 f->fmt.pix_mp.pixelformat = inst->src_fmt.pixelformat; 675 frmsize = &dec_hevc_frmsize; 676 } else { 677 width = f->fmt.pix_mp.width; 678 height = f->fmt.pix_mp.height; 679 f->fmt.pix_mp.pixelformat = vpu_fmt->v4l2_pix_fmt; 680 frmsize = vpu_fmt->v4l2_frmsize; 681 } 682 683 wave5_update_pix_fmt(&f->fmt.pix_mp, VPU_FMT_TYPE_CODEC, 684 width, height, frmsize); 685 686 return 0; 687 } 688 689 static int wave5_vpu_dec_s_fmt_out(struct file *file, void *fh, struct v4l2_format *f) 690 { 691 struct vpu_instance *inst = file_to_vpu_inst(file); 692 const struct vpu_format *vpu_fmt; 693 int i, ret; 694 695 dev_dbg(inst->dev->dev, 696 "%s: fourcc: %u width: %u height: %u num_planes: %u field: %u\n", 697 __func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height, 698 f->fmt.pix_mp.num_planes, f->fmt.pix_mp.field); 699 700 ret = wave5_vpu_dec_try_fmt_out(file, fh, f); 701 if (ret) 702 return ret; 703 704 inst->std = wave5_to_vpu_std(f->fmt.pix_mp.pixelformat, inst->type); 705 if (inst->std == STD_UNKNOWN) { 706 dev_warn(inst->dev->dev, "unsupported pixelformat: %.4s\n", 707 (char *)&f->fmt.pix_mp.pixelformat); 708 return -EINVAL; 709 } 710 711 inst->src_fmt.width = f->fmt.pix_mp.width; 712 inst->src_fmt.height = f->fmt.pix_mp.height; 713 inst->src_fmt.pixelformat = f->fmt.pix_mp.pixelformat; 714 inst->src_fmt.field = f->fmt.pix_mp.field; 715 inst->src_fmt.flags = f->fmt.pix_mp.flags; 716 inst->src_fmt.num_planes = f->fmt.pix_mp.num_planes; 717 for (i = 0; i < inst->src_fmt.num_planes; i++) { 718 inst->src_fmt.plane_fmt[i].bytesperline = f->fmt.pix_mp.plane_fmt[i].bytesperline; 719 inst->src_fmt.plane_fmt[i].sizeimage = f->fmt.pix_mp.plane_fmt[i].sizeimage; 720 } 721 722 inst->colorspace = f->fmt.pix_mp.colorspace; 723 inst->ycbcr_enc = f->fmt.pix_mp.ycbcr_enc; 724 inst->quantization = f->fmt.pix_mp.quantization; 725 inst->xfer_func = f->fmt.pix_mp.xfer_func; 726 727 vpu_fmt = wave5_find_vpu_fmt(inst->dst_fmt.pixelformat, dec_fmt_list[VPU_FMT_TYPE_RAW]); 728 if (!vpu_fmt) 729 return -EINVAL; 730 731 wave5_update_pix_fmt(&inst->dst_fmt, VPU_FMT_TYPE_RAW, 732 f->fmt.pix_mp.width, f->fmt.pix_mp.height, 733 vpu_fmt->v4l2_frmsize); 734 735 return 0; 736 } 737 738 static int wave5_vpu_dec_g_selection(struct file *file, void *fh, struct v4l2_selection *s) 739 { 740 struct vpu_instance *inst = file_to_vpu_inst(file); 741 742 dev_dbg(inst->dev->dev, "%s: type: %u | target: %u\n", __func__, s->type, s->target); 743 744 if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) 745 return -EINVAL; 746 switch (s->target) { 747 case V4L2_SEL_TGT_COMPOSE_BOUNDS: 748 case V4L2_SEL_TGT_COMPOSE_PADDED: 749 s->r.left = 0; 750 s->r.top = 0; 751 s->r.width = inst->dst_fmt.width; 752 s->r.height = inst->dst_fmt.height; 753 break; 754 case V4L2_SEL_TGT_COMPOSE: 755 case V4L2_SEL_TGT_COMPOSE_DEFAULT: 756 s->r.left = 0; 757 s->r.top = 0; 758 if (inst->state > VPU_INST_STATE_OPEN) { 759 s->r = inst->conf_win; 760 } else { 761 s->r.width = inst->src_fmt.width; 762 s->r.height = inst->src_fmt.height; 763 } 764 break; 765 default: 766 return -EINVAL; 767 } 768 769 return 0; 770 } 771 772 static int wave5_vpu_dec_s_selection(struct file *file, void *fh, struct v4l2_selection *s) 773 { 774 struct vpu_instance *inst = file_to_vpu_inst(file); 775 776 if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) 777 return -EINVAL; 778 779 if (s->target != V4L2_SEL_TGT_COMPOSE) 780 return -EINVAL; 781 782 dev_dbg(inst->dev->dev, "V4L2_SEL_TGT_COMPOSE w: %u h: %u\n", 783 s->r.width, s->r.height); 784 785 s->r.left = 0; 786 s->r.top = 0; 787 s->r.width = inst->dst_fmt.width; 788 s->r.height = inst->dst_fmt.height; 789 790 return 0; 791 } 792 793 static int wave5_vpu_dec_stop(struct vpu_instance *inst) 794 { 795 int ret = 0; 796 unsigned long flags; 797 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 798 799 spin_lock_irqsave(&inst->state_spinlock, flags); 800 801 if (m2m_ctx->is_draining) { 802 ret = -EBUSY; 803 goto unlock_and_return; 804 } 805 806 if (inst->state != VPU_INST_STATE_NONE) { 807 /* 808 * Temporarily release the state_spinlock so that subsequent 809 * calls do not block on a mutex while inside this spinlock. 810 */ 811 spin_unlock_irqrestore(&inst->state_spinlock, flags); 812 ret = wave5_vpu_dec_set_eos_on_firmware(inst); 813 if (ret) 814 return ret; 815 816 spin_lock_irqsave(&inst->state_spinlock, flags); 817 /* 818 * TODO eliminate this check by using a separate check for 819 * draining triggered by a resolution change. 820 */ 821 if (m2m_ctx->is_draining) { 822 ret = -EBUSY; 823 goto unlock_and_return; 824 } 825 } 826 827 /* 828 * Used to remember the EOS state after the streamoff/on transition on 829 * the capture queue. 830 */ 831 inst->eos = true; 832 833 if (m2m_ctx->has_stopped) 834 goto unlock_and_return; 835 836 m2m_ctx->last_src_buf = v4l2_m2m_last_src_buf(m2m_ctx); 837 m2m_ctx->is_draining = true; 838 839 /* 840 * Deferred to device run in case it wasn't in the ring buffer 841 * yet. In other case, we have to send the EOS signal to the 842 * firmware so that any pending PIC_RUN ends without new 843 * bitstream buffer. 844 */ 845 if (m2m_ctx->last_src_buf) 846 goto unlock_and_return; 847 848 if (inst->state == VPU_INST_STATE_NONE) { 849 send_eos_event(inst); 850 flag_last_buffer_done(inst); 851 } 852 853 unlock_and_return: 854 spin_unlock_irqrestore(&inst->state_spinlock, flags); 855 return ret; 856 } 857 858 static int wave5_vpu_dec_start(struct vpu_instance *inst) 859 { 860 int ret = 0; 861 unsigned long flags; 862 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 863 struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx); 864 865 spin_lock_irqsave(&inst->state_spinlock, flags); 866 867 if (m2m_ctx->is_draining) { 868 ret = -EBUSY; 869 goto unlock_and_return; 870 } 871 872 if (m2m_ctx->has_stopped) 873 m2m_ctx->has_stopped = false; 874 875 vb2_clear_last_buffer_dequeued(dst_vq); 876 inst->eos = false; 877 878 unlock_and_return: 879 spin_unlock_irqrestore(&inst->state_spinlock, flags); 880 return ret; 881 } 882 883 static int wave5_vpu_dec_decoder_cmd(struct file *file, void *fh, struct v4l2_decoder_cmd *dc) 884 { 885 struct vpu_instance *inst = file_to_vpu_inst(file); 886 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 887 int ret; 888 889 dev_dbg(inst->dev->dev, "decoder command: %u\n", dc->cmd); 890 891 ret = v4l2_m2m_ioctl_try_decoder_cmd(file, fh, dc); 892 if (ret) 893 return ret; 894 895 switch (dc->cmd) { 896 case V4L2_DEC_CMD_STOP: 897 ret = wave5_vpu_dec_stop(inst); 898 /* Just in case we don't have anything to decode anymore */ 899 v4l2_m2m_try_schedule(m2m_ctx); 900 break; 901 case V4L2_DEC_CMD_START: 902 ret = wave5_vpu_dec_start(inst); 903 break; 904 default: 905 ret = -EINVAL; 906 } 907 908 return ret; 909 } 910 911 static const struct v4l2_ioctl_ops wave5_vpu_dec_ioctl_ops = { 912 .vidioc_querycap = wave5_vpu_dec_querycap, 913 .vidioc_enum_framesizes = wave5_vpu_dec_enum_framesizes, 914 915 .vidioc_enum_fmt_vid_cap = wave5_vpu_dec_enum_fmt_cap, 916 .vidioc_s_fmt_vid_cap_mplane = wave5_vpu_dec_s_fmt_cap, 917 .vidioc_g_fmt_vid_cap_mplane = wave5_vpu_dec_g_fmt_cap, 918 .vidioc_try_fmt_vid_cap_mplane = wave5_vpu_dec_try_fmt_cap, 919 920 .vidioc_enum_fmt_vid_out = wave5_vpu_dec_enum_fmt_out, 921 .vidioc_s_fmt_vid_out_mplane = wave5_vpu_dec_s_fmt_out, 922 .vidioc_g_fmt_vid_out_mplane = wave5_vpu_g_fmt_out, 923 .vidioc_try_fmt_vid_out_mplane = wave5_vpu_dec_try_fmt_out, 924 925 .vidioc_g_selection = wave5_vpu_dec_g_selection, 926 .vidioc_s_selection = wave5_vpu_dec_s_selection, 927 928 .vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs, 929 /* 930 * Firmware does not support CREATE_BUFS for CAPTURE queue. Since 931 * there is no immediate use-case for supporting CREATE_BUFS on 932 * just the OUTPUT queue, disable CREATE_BUFS altogether. 933 */ 934 .vidioc_querybuf = v4l2_m2m_ioctl_querybuf, 935 .vidioc_prepare_buf = v4l2_m2m_ioctl_prepare_buf, 936 .vidioc_qbuf = v4l2_m2m_ioctl_qbuf, 937 .vidioc_expbuf = v4l2_m2m_ioctl_expbuf, 938 .vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf, 939 .vidioc_streamon = v4l2_m2m_ioctl_streamon, 940 .vidioc_streamoff = v4l2_m2m_ioctl_streamoff, 941 942 .vidioc_try_decoder_cmd = v4l2_m2m_ioctl_try_decoder_cmd, 943 .vidioc_decoder_cmd = wave5_vpu_dec_decoder_cmd, 944 945 .vidioc_subscribe_event = wave5_vpu_subscribe_event, 946 .vidioc_unsubscribe_event = v4l2_event_unsubscribe, 947 }; 948 949 static int wave5_vpu_dec_queue_setup(struct vb2_queue *q, unsigned int *num_buffers, 950 unsigned int *num_planes, unsigned int sizes[], 951 struct device *alloc_devs[]) 952 { 953 struct vpu_instance *inst = vb2_get_drv_priv(q); 954 struct v4l2_pix_format_mplane inst_format = 955 (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) ? inst->src_fmt : inst->dst_fmt; 956 unsigned int i; 957 958 dev_dbg(inst->dev->dev, "%s: num_buffers: %u | num_planes: %u | type: %u\n", __func__, 959 *num_buffers, *num_planes, q->type); 960 961 *num_planes = inst_format.num_planes; 962 963 if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) { 964 sizes[0] = inst_format.plane_fmt[0].sizeimage; 965 dev_dbg(inst->dev->dev, "%s: size[0]: %u\n", __func__, sizes[0]); 966 } else if (q->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) { 967 if (*num_buffers < inst->fbc_buf_count) 968 *num_buffers = inst->fbc_buf_count; 969 970 for (i = 0; i < *num_planes; i++) { 971 sizes[i] = inst_format.plane_fmt[i].sizeimage; 972 dev_dbg(inst->dev->dev, "%s: size[%u]: %u\n", __func__, i, sizes[i]); 973 } 974 } 975 976 return 0; 977 } 978 979 static int wave5_prepare_fb(struct vpu_instance *inst) 980 { 981 int linear_num; 982 int non_linear_num; 983 int fb_stride = 0, fb_height = 0; 984 int luma_size, chroma_size; 985 int ret, i; 986 struct v4l2_m2m_buffer *buf, *n; 987 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 988 u32 bitdepth = inst->codec_info->dec_info.initial_info.luma_bitdepth; 989 990 switch (bitdepth) { 991 case 8: 992 break; 993 case 10: 994 if (inst->std == W_HEVC_DEC && 995 inst->dev->attr.support_hevc10bit_dec) 996 break; 997 998 fallthrough; 999 default: 1000 dev_err(inst->dev->dev, "no support for %d bit depth\n", bitdepth); 1001 1002 return -EINVAL; 1003 } 1004 1005 linear_num = v4l2_m2m_num_dst_bufs_ready(m2m_ctx); 1006 non_linear_num = inst->fbc_buf_count; 1007 1008 for (i = 0; i < non_linear_num; i++) { 1009 struct frame_buffer *frame = &inst->frame_buf[i]; 1010 struct vpu_buf *vframe = &inst->frame_vbuf[i]; 1011 1012 fb_stride = ALIGN(inst->dst_fmt.width * bitdepth / 8, 32); 1013 fb_height = ALIGN(inst->dst_fmt.height, 32); 1014 luma_size = fb_stride * fb_height; 1015 1016 chroma_size = ALIGN(fb_stride / 2, 16) * fb_height; 1017 1018 if (vframe->size == (luma_size + chroma_size)) 1019 continue; 1020 1021 if (vframe->size) 1022 wave5_vpu_dec_reset_framebuffer(inst, i); 1023 1024 vframe->size = luma_size + chroma_size; 1025 ret = wave5_vdi_allocate_dma_memory(inst->dev, vframe); 1026 if (ret) { 1027 dev_dbg(inst->dev->dev, 1028 "%s: Allocating FBC buf of size %zu, fail: %d\n", 1029 __func__, vframe->size, ret); 1030 return ret; 1031 } 1032 1033 frame->buf_y = vframe->daddr; 1034 frame->buf_cb = vframe->daddr + luma_size; 1035 frame->buf_cr = (dma_addr_t)-1; 1036 frame->size = vframe->size; 1037 frame->width = inst->src_fmt.width; 1038 frame->stride = fb_stride; 1039 frame->map_type = COMPRESSED_FRAME_MAP; 1040 frame->update_fb_info = true; 1041 } 1042 /* In case the count has reduced, clean up leftover framebuffer memory */ 1043 for (i = non_linear_num; i < MAX_REG_FRAME; i++) { 1044 ret = wave5_vpu_dec_reset_framebuffer(inst, i); 1045 if (ret) 1046 break; 1047 } 1048 1049 for (i = 0; i < linear_num; i++) { 1050 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1051 struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx); 1052 struct vb2_buffer *vb = vb2_get_buffer(dst_vq, i); 1053 struct frame_buffer *frame = &inst->frame_buf[non_linear_num + i]; 1054 dma_addr_t buf_addr_y = 0, buf_addr_cb = 0, buf_addr_cr = 0; 1055 u32 buf_size = 0; 1056 u32 fb_stride = inst->dst_fmt.width; 1057 u32 luma_size = fb_stride * inst->dst_fmt.height; 1058 u32 chroma_size; 1059 1060 if (inst->output_format == FORMAT_422) 1061 chroma_size = fb_stride * inst->dst_fmt.height / 2; 1062 else 1063 chroma_size = fb_stride * inst->dst_fmt.height / 4; 1064 1065 if (inst->dst_fmt.num_planes == 1) { 1066 buf_size = vb2_plane_size(vb, 0); 1067 buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0); 1068 buf_addr_cb = buf_addr_y + luma_size; 1069 buf_addr_cr = buf_addr_cb + chroma_size; 1070 } else if (inst->dst_fmt.num_planes == 2) { 1071 buf_size = vb2_plane_size(vb, 0) + 1072 vb2_plane_size(vb, 1); 1073 buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0); 1074 buf_addr_cb = vb2_dma_contig_plane_dma_addr(vb, 1); 1075 buf_addr_cr = buf_addr_cb + chroma_size; 1076 } else if (inst->dst_fmt.num_planes == 3) { 1077 buf_size = vb2_plane_size(vb, 0) + 1078 vb2_plane_size(vb, 1) + 1079 vb2_plane_size(vb, 2); 1080 buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0); 1081 buf_addr_cb = vb2_dma_contig_plane_dma_addr(vb, 1); 1082 buf_addr_cr = vb2_dma_contig_plane_dma_addr(vb, 2); 1083 } 1084 1085 frame->buf_y = buf_addr_y; 1086 frame->buf_cb = buf_addr_cb; 1087 frame->buf_cr = buf_addr_cr; 1088 frame->size = buf_size; 1089 frame->width = inst->src_fmt.width; 1090 frame->stride = fb_stride; 1091 frame->map_type = LINEAR_FRAME_MAP; 1092 frame->update_fb_info = true; 1093 } 1094 1095 ret = wave5_vpu_dec_register_frame_buffer_ex(inst, non_linear_num, linear_num, 1096 fb_stride, inst->dst_fmt.height); 1097 if (ret) { 1098 dev_dbg(inst->dev->dev, "%s: vpu_dec_register_frame_buffer_ex fail: %d", 1099 __func__, ret); 1100 return ret; 1101 } 1102 1103 /* 1104 * Mark all frame buffers as out of display, to avoid using them before 1105 * the application have them queued. 1106 */ 1107 for (i = 0; i < v4l2_m2m_num_dst_bufs_ready(m2m_ctx); i++) { 1108 ret = wave5_vpu_dec_set_disp_flag(inst, i); 1109 if (ret) { 1110 dev_dbg(inst->dev->dev, 1111 "%s: Setting display flag of buf index: %u, fail: %d\n", 1112 __func__, i, ret); 1113 } 1114 } 1115 1116 v4l2_m2m_for_each_dst_buf_safe(m2m_ctx, buf, n) { 1117 struct vb2_v4l2_buffer *vbuf = &buf->vb; 1118 1119 ret = wave5_vpu_dec_clr_disp_flag(inst, vbuf->vb2_buf.index); 1120 if (ret) 1121 dev_dbg(inst->dev->dev, 1122 "%s: Clearing display flag of buf index: %u, fail: %d\n", 1123 __func__, i, ret); 1124 } 1125 1126 return 0; 1127 } 1128 1129 static int write_to_ringbuffer(struct vpu_instance *inst, void *buffer, size_t buffer_size, 1130 struct vpu_buf *ring_buffer, dma_addr_t wr_ptr) 1131 { 1132 size_t size; 1133 size_t offset = wr_ptr - ring_buffer->daddr; 1134 int ret; 1135 1136 if (wr_ptr + buffer_size > ring_buffer->daddr + ring_buffer->size) { 1137 size = ring_buffer->daddr + ring_buffer->size - wr_ptr; 1138 ret = wave5_vdi_write_memory(inst->dev, ring_buffer, offset, (u8 *)buffer, size); 1139 if (ret < 0) 1140 return ret; 1141 1142 ret = wave5_vdi_write_memory(inst->dev, ring_buffer, 0, (u8 *)buffer + size, 1143 buffer_size - size); 1144 if (ret < 0) 1145 return ret; 1146 } else { 1147 ret = wave5_vdi_write_memory(inst->dev, ring_buffer, offset, (u8 *)buffer, 1148 buffer_size); 1149 if (ret < 0) 1150 return ret; 1151 } 1152 1153 return 0; 1154 } 1155 1156 static struct vpu_src_buffer *inst_src_buf_remove(struct vpu_instance *inst) 1157 { 1158 struct vpu_src_buffer *b; 1159 int ret; 1160 1161 ret = mutex_lock_interruptible(&inst->feed_lock); 1162 if (ret) 1163 return NULL; 1164 1165 if (list_empty(&inst->avail_src_bufs)) { 1166 mutex_unlock(&inst->feed_lock); 1167 return NULL; 1168 } 1169 b = list_first_entry(&inst->avail_src_bufs, struct vpu_src_buffer, list); 1170 list_del_init(&b->list); 1171 mutex_unlock(&inst->feed_lock); 1172 return b; 1173 } 1174 1175 static int fill_ringbuffer(struct vpu_instance *inst) 1176 { 1177 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1178 struct vpu_src_buffer *vpu_buf; 1179 int ret = 0; 1180 1181 if (m2m_ctx->last_src_buf) { 1182 struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(m2m_ctx->last_src_buf); 1183 1184 if (vpu_buf->consumed) { 1185 dev_dbg(inst->dev->dev, "last src buffer already written\n"); 1186 return 0; 1187 } 1188 } 1189 1190 while ((vpu_buf = inst_src_buf_remove(inst)) != NULL) { 1191 struct vb2_v4l2_buffer *vbuf = &vpu_buf->v4l2_m2m_buf.vb; 1192 struct vpu_buf *ring_buffer = &inst->bitstream_vbuf; 1193 size_t src_size = vb2_get_plane_payload(&vbuf->vb2_buf, 0); 1194 void *src_buf = vb2_plane_vaddr(&vbuf->vb2_buf, 0); 1195 dma_addr_t rd_ptr = 0; 1196 dma_addr_t wr_ptr = 0; 1197 size_t remain_size = 0; 1198 1199 if (vpu_buf->consumed) { 1200 dev_dbg(inst->dev->dev, "already copied src buf (%u) to the ring buffer\n", 1201 vbuf->vb2_buf.index); 1202 continue; 1203 } 1204 1205 if (!src_buf) { 1206 dev_dbg(inst->dev->dev, 1207 "%s: Acquiring kernel pointer to src buf (%u), fail\n", 1208 __func__, vbuf->vb2_buf.index); 1209 break; 1210 } 1211 1212 ret = wave5_vpu_dec_get_bitstream_buffer(inst, &rd_ptr, &wr_ptr, &remain_size); 1213 if (ret) { 1214 /* Unable to acquire the mutex */ 1215 dev_err(inst->dev->dev, "Getting the bitstream buffer, fail: %d\n", 1216 ret); 1217 return ret; 1218 } 1219 1220 dev_dbg(inst->dev->dev, "%s: rd_ptr %pad wr_ptr %pad", __func__, &rd_ptr, &wr_ptr); 1221 1222 if (remain_size < src_size) { 1223 dev_dbg(inst->dev->dev, 1224 "%s: remaining size: %zu < source size: %zu for src buf (%u)\n", 1225 __func__, remain_size, src_size, vbuf->vb2_buf.index); 1226 break; 1227 } 1228 1229 ret = write_to_ringbuffer(inst, src_buf, src_size, ring_buffer, wr_ptr); 1230 if (ret) { 1231 dev_err(inst->dev->dev, "Write src buf (%u) to ring buffer, fail: %d\n", 1232 vbuf->vb2_buf.index, ret); 1233 return ret; 1234 } 1235 1236 ret = wave5_vpu_dec_update_bitstream_buffer(inst, src_size); 1237 if (ret) { 1238 dev_dbg(inst->dev->dev, 1239 "update_bitstream_buffer fail: %d for src buf (%u)\n", 1240 ret, vbuf->vb2_buf.index); 1241 break; 1242 } 1243 1244 vpu_buf->consumed = true; 1245 1246 /* Don't write buffers passed the last one while draining. */ 1247 if (v4l2_m2m_is_last_draining_src_buf(m2m_ctx, vbuf)) { 1248 dev_dbg(inst->dev->dev, "last src buffer written to the ring buffer\n"); 1249 break; 1250 } 1251 1252 inst->queuing_num++; 1253 break; 1254 } 1255 1256 return ret; 1257 } 1258 1259 static void wave5_vpu_dec_buf_queue_src(struct vb2_buffer *vb) 1260 { 1261 struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue); 1262 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1263 struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); 1264 struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(vbuf); 1265 int ret; 1266 1267 vpu_buf->consumed = false; 1268 vbuf->sequence = inst->queued_src_buf_num++; 1269 ret = mutex_lock_interruptible(&inst->feed_lock); 1270 if (ret) 1271 return; 1272 INIT_LIST_HEAD(&vpu_buf->list); 1273 list_add_tail(&vpu_buf->list, &inst->avail_src_bufs); 1274 mutex_unlock(&inst->feed_lock); 1275 v4l2_m2m_buf_queue(m2m_ctx, vbuf); 1276 } 1277 1278 static void wave5_vpu_dec_buf_queue_dst(struct vb2_buffer *vb) 1279 { 1280 struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); 1281 struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue); 1282 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1283 1284 pm_runtime_resume_and_get(inst->dev->dev); 1285 vbuf->sequence = inst->queued_dst_buf_num++; 1286 1287 if (inst->state == VPU_INST_STATE_PIC_RUN) { 1288 struct vpu_dst_buffer *vpu_buf = wave5_to_vpu_dst_buf(vbuf); 1289 int ret; 1290 1291 /* 1292 * The buffer is already registered just clear the display flag 1293 * to let the firmware know it can be used. 1294 */ 1295 vpu_buf->display = false; 1296 ret = wave5_vpu_dec_clr_disp_flag(inst, vb->index); 1297 if (ret) { 1298 dev_dbg(inst->dev->dev, 1299 "%s: Clearing the display flag of buffer index: %u, fail: %d\n", 1300 __func__, vb->index, ret); 1301 } 1302 } 1303 1304 if (vb2_is_streaming(vb->vb2_queue) && v4l2_m2m_dst_buf_is_last(m2m_ctx)) { 1305 unsigned int i; 1306 unsigned long flags; 1307 1308 for (i = 0; i < vb->num_planes; i++) 1309 vb2_set_plane_payload(vb, i, 0); 1310 1311 vbuf->field = V4L2_FIELD_NONE; 1312 1313 spin_lock_irqsave(&inst->state_spinlock, flags); 1314 send_eos_event(inst); 1315 spin_unlock_irqrestore(&inst->state_spinlock, flags); 1316 1317 v4l2_m2m_last_buffer_done(m2m_ctx, vbuf); 1318 } else { 1319 v4l2_m2m_buf_queue(m2m_ctx, vbuf); 1320 } 1321 pm_runtime_put_autosuspend(inst->dev->dev); 1322 } 1323 1324 static void wave5_vpu_dec_buf_queue(struct vb2_buffer *vb) 1325 { 1326 struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); 1327 struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue); 1328 1329 dev_dbg(inst->dev->dev, "%s: type: %4u index: %4u size: ([0]=%4lu, [1]=%4lu, [2]=%4lu)\n", 1330 __func__, vb->type, vb->index, vb2_plane_size(&vbuf->vb2_buf, 0), 1331 vb2_plane_size(&vbuf->vb2_buf, 1), vb2_plane_size(&vbuf->vb2_buf, 2)); 1332 1333 if (vb->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) { 1334 wave5_vpu_dec_buf_queue_src(vb); 1335 if (inst->empty_queue) 1336 inst->empty_queue = false; 1337 } else if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) { 1338 wave5_vpu_dec_buf_queue_dst(vb); 1339 } 1340 } 1341 1342 static int wave5_vpu_dec_allocate_ring_buffer(struct vpu_instance *inst) 1343 { 1344 int ret; 1345 struct vpu_buf *ring_buffer = &inst->bitstream_vbuf; 1346 1347 ring_buffer->size = ALIGN(inst->src_fmt.plane_fmt[0].sizeimage, 1024) * 4; 1348 ret = wave5_vdi_allocate_dma_memory(inst->dev, ring_buffer); 1349 if (ret) { 1350 dev_dbg(inst->dev->dev, "%s: allocate ring buffer of size %zu fail: %d\n", 1351 __func__, ring_buffer->size, ret); 1352 return ret; 1353 } 1354 1355 inst->last_rd_ptr = ring_buffer->daddr; 1356 1357 return 0; 1358 } 1359 1360 static int wave5_vpu_dec_start_streaming(struct vb2_queue *q, unsigned int count) 1361 { 1362 struct vpu_instance *inst = vb2_get_drv_priv(q); 1363 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1364 int ret = 0; 1365 1366 dev_dbg(inst->dev->dev, "%s: type: %u\n", __func__, q->type); 1367 pm_runtime_resume_and_get(inst->dev->dev); 1368 1369 v4l2_m2m_update_start_streaming_state(m2m_ctx, q); 1370 1371 if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE && inst->state == VPU_INST_STATE_NONE) { 1372 struct dec_open_param open_param; 1373 1374 memset(&open_param, 0, sizeof(struct dec_open_param)); 1375 1376 ret = wave5_vpu_dec_allocate_ring_buffer(inst); 1377 if (ret) 1378 goto return_buffers; 1379 1380 open_param.bitstream_buffer = inst->bitstream_vbuf.daddr; 1381 open_param.bitstream_buffer_size = inst->bitstream_vbuf.size; 1382 1383 ret = wave5_vpu_dec_open(inst, &open_param); 1384 if (ret) { 1385 dev_dbg(inst->dev->dev, "%s: decoder opening, fail: %d\n", 1386 __func__, ret); 1387 goto free_bitstream_vbuf; 1388 } 1389 1390 ret = switch_state(inst, VPU_INST_STATE_OPEN); 1391 if (ret) 1392 goto free_bitstream_vbuf; 1393 } else if (q->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) { 1394 struct dec_initial_info *initial_info = 1395 &inst->codec_info->dec_info.initial_info; 1396 1397 if (inst->state == VPU_INST_STATE_STOP) 1398 ret = switch_state(inst, VPU_INST_STATE_INIT_SEQ); 1399 if (ret) 1400 goto return_buffers; 1401 1402 if (inst->state == VPU_INST_STATE_INIT_SEQ && 1403 inst->dev->product_code == WAVE521C_CODE) { 1404 if (initial_info->luma_bitdepth != 8) { 1405 dev_info(inst->dev->dev, "%s: no support for %d bit depth", 1406 __func__, initial_info->luma_bitdepth); 1407 ret = -EINVAL; 1408 goto return_buffers; 1409 } 1410 } 1411 1412 } 1413 pm_runtime_put_autosuspend(inst->dev->dev); 1414 return ret; 1415 1416 free_bitstream_vbuf: 1417 wave5_vdi_free_dma_memory(inst->dev, &inst->bitstream_vbuf); 1418 return_buffers: 1419 wave5_return_bufs(q, VB2_BUF_STATE_QUEUED); 1420 pm_runtime_put_autosuspend(inst->dev->dev); 1421 return ret; 1422 } 1423 1424 static int streamoff_output(struct vb2_queue *q) 1425 { 1426 struct vpu_instance *inst = vb2_get_drv_priv(q); 1427 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1428 struct vb2_v4l2_buffer *buf; 1429 int ret; 1430 dma_addr_t new_rd_ptr; 1431 struct dec_output_info dec_info; 1432 unsigned int i; 1433 struct vpu_src_buffer *vpu_buf; 1434 1435 inst->retry = false; 1436 inst->queuing_num = 0; 1437 while ((vpu_buf = inst_src_buf_remove(inst)) != NULL) 1438 ; 1439 1440 for (i = 0; i < v4l2_m2m_num_dst_bufs_ready(m2m_ctx); i++) { 1441 ret = wave5_vpu_dec_set_disp_flag(inst, i); 1442 if (ret) 1443 dev_dbg(inst->dev->dev, 1444 "%s: Setting display flag of buf index: %u, fail: %d\n", 1445 __func__, i, ret); 1446 } 1447 1448 while ((buf = v4l2_m2m_src_buf_remove(m2m_ctx))) { 1449 dev_dbg(inst->dev->dev, "%s: (Multiplanar) buf type %4u | index %4u\n", 1450 __func__, buf->vb2_buf.type, buf->vb2_buf.index); 1451 v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR); 1452 } 1453 1454 while (wave5_vpu_dec_get_output_info(inst, &dec_info) == 0) { 1455 if (dec_info.index_frame_display >= 0) 1456 wave5_vpu_dec_set_disp_flag(inst, dec_info.index_frame_display); 1457 } 1458 1459 ret = wave5_vpu_flush_instance(inst); 1460 if (ret) 1461 return ret; 1462 1463 /* Reset the ring buffer information */ 1464 new_rd_ptr = wave5_vpu_dec_get_rd_ptr(inst); 1465 inst->last_rd_ptr = new_rd_ptr; 1466 inst->codec_info->dec_info.stream_rd_ptr = new_rd_ptr; 1467 inst->codec_info->dec_info.stream_wr_ptr = new_rd_ptr; 1468 1469 if (v4l2_m2m_has_stopped(m2m_ctx)) { 1470 unsigned long flags; 1471 1472 spin_lock_irqsave(&inst->state_spinlock, flags); 1473 send_eos_event(inst); 1474 spin_unlock_irqrestore(&inst->state_spinlock, flags); 1475 } 1476 1477 /* streamoff on output cancels any draining operation */ 1478 inst->eos = false; 1479 1480 return 0; 1481 } 1482 1483 static int streamoff_capture(struct vb2_queue *q) 1484 { 1485 struct vpu_instance *inst = vb2_get_drv_priv(q); 1486 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1487 struct vb2_v4l2_buffer *buf; 1488 unsigned int i; 1489 int ret = 0; 1490 1491 for (i = 0; i < v4l2_m2m_num_dst_bufs_ready(m2m_ctx); i++) { 1492 ret = wave5_vpu_dec_set_disp_flag(inst, i); 1493 if (ret) 1494 dev_dbg(inst->dev->dev, 1495 "%s: Setting display flag of buf index: %u, fail: %d\n", 1496 __func__, i, ret); 1497 } 1498 1499 while ((buf = v4l2_m2m_dst_buf_remove(m2m_ctx))) { 1500 u32 plane; 1501 1502 dev_dbg(inst->dev->dev, "%s: buf type %4u | index %4u\n", 1503 __func__, buf->vb2_buf.type, buf->vb2_buf.index); 1504 1505 for (plane = 0; plane < inst->dst_fmt.num_planes; plane++) 1506 vb2_set_plane_payload(&buf->vb2_buf, plane, 0); 1507 1508 v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR); 1509 } 1510 1511 if (inst->needs_reallocation) { 1512 wave5_vpu_dec_give_command(inst, DEC_RESET_FRAMEBUF_INFO, NULL); 1513 inst->needs_reallocation = false; 1514 } 1515 1516 if (v4l2_m2m_has_stopped(m2m_ctx)) { 1517 ret = switch_state(inst, VPU_INST_STATE_INIT_SEQ); 1518 if (ret) 1519 return ret; 1520 } 1521 1522 return 0; 1523 } 1524 1525 static void wave5_vpu_dec_stop_streaming(struct vb2_queue *q) 1526 { 1527 struct vpu_instance *inst = vb2_get_drv_priv(q); 1528 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1529 1530 bool check_cmd = TRUE; 1531 1532 dev_dbg(inst->dev->dev, "%s: type: %u\n", __func__, q->type); 1533 pm_runtime_resume_and_get(inst->dev->dev); 1534 inst->empty_queue = true; 1535 while (check_cmd) { 1536 struct queue_status_info q_status; 1537 struct dec_output_info dec_output_info; 1538 1539 wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status); 1540 if ((inst->state == VPU_INST_STATE_STOP || 1541 inst->state == VPU_INST_STATE_INIT_SEQ || 1542 q_status.instance_queue_count == 0) && 1543 q_status.report_queue_count == 0) 1544 break; 1545 1546 if (wave5_vpu_dec_get_output_info(inst, &dec_output_info)) 1547 dev_dbg(inst->dev->dev, "there is no output info\n"); 1548 } 1549 1550 v4l2_m2m_update_stop_streaming_state(m2m_ctx, q); 1551 1552 if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) 1553 streamoff_output(q); 1554 else 1555 streamoff_capture(q); 1556 1557 inst->empty_queue = false; 1558 inst->sent_eos = false; 1559 pm_runtime_put_autosuspend(inst->dev->dev); 1560 } 1561 1562 static const struct vb2_ops wave5_vpu_dec_vb2_ops = { 1563 .queue_setup = wave5_vpu_dec_queue_setup, 1564 .buf_queue = wave5_vpu_dec_buf_queue, 1565 .start_streaming = wave5_vpu_dec_start_streaming, 1566 .stop_streaming = wave5_vpu_dec_stop_streaming, 1567 }; 1568 1569 static void wave5_set_default_format(struct v4l2_pix_format_mplane *src_fmt, 1570 struct v4l2_pix_format_mplane *dst_fmt) 1571 { 1572 src_fmt->pixelformat = dec_fmt_list[VPU_FMT_TYPE_CODEC][0].v4l2_pix_fmt; 1573 wave5_update_pix_fmt(src_fmt, VPU_FMT_TYPE_CODEC, 1574 W5_DEF_DEC_PIC_WIDTH, W5_DEF_DEC_PIC_HEIGHT, 1575 &dec_hevc_frmsize); 1576 1577 dst_fmt->pixelformat = dec_fmt_list[VPU_FMT_TYPE_RAW][0].v4l2_pix_fmt; 1578 wave5_update_pix_fmt(dst_fmt, VPU_FMT_TYPE_RAW, 1579 W5_DEF_DEC_PIC_WIDTH, W5_DEF_DEC_PIC_HEIGHT, 1580 &dec_raw_frmsize); 1581 } 1582 1583 static int wave5_vpu_dec_queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq) 1584 { 1585 return wave5_vpu_queue_init(priv, src_vq, dst_vq, &wave5_vpu_dec_vb2_ops); 1586 } 1587 1588 static const struct vpu_instance_ops wave5_vpu_dec_inst_ops = { 1589 .finish_process = wave5_vpu_dec_finish_decode, 1590 }; 1591 1592 static int initialize_sequence(struct vpu_instance *inst) 1593 { 1594 struct dec_initial_info initial_info; 1595 int ret = 0; 1596 unsigned long flags; 1597 1598 memset(&initial_info, 0, sizeof(struct dec_initial_info)); 1599 1600 ret = wave5_vpu_dec_issue_seq_init(inst); 1601 if (ret) { 1602 dev_dbg(inst->dev->dev, "%s: wave5_vpu_dec_issue_seq_init, fail: %d\n", 1603 __func__, ret); 1604 return ret; 1605 } 1606 1607 if (wave5_vpu_wait_interrupt(inst, VPU_DEC_TIMEOUT) < 0) 1608 dev_dbg(inst->dev->dev, "%s: failed to call vpu_wait_interrupt()\n", __func__); 1609 1610 ret = wave5_vpu_dec_complete_seq_init(inst, &initial_info); 1611 if (ret) { 1612 dev_dbg(inst->dev->dev, "%s: vpu_dec_complete_seq_init, fail: %d, reason: %u\n", 1613 __func__, ret, initial_info.seq_init_err_reason); 1614 wave5_handle_src_buffer(inst, initial_info.rd_ptr); 1615 return ret; 1616 } 1617 1618 spin_lock_irqsave(&inst->state_spinlock, flags); 1619 handle_dynamic_resolution_change(inst); 1620 spin_unlock_irqrestore(&inst->state_spinlock, flags); 1621 1622 return 0; 1623 } 1624 1625 static bool wave5_is_draining_or_eos(struct vpu_instance *inst) 1626 { 1627 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1628 1629 lockdep_assert_held(&inst->state_spinlock); 1630 return m2m_ctx->is_draining || inst->eos; 1631 } 1632 1633 static void wave5_vpu_dec_device_run(void *priv) 1634 { 1635 struct vpu_instance *inst = priv; 1636 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1637 struct queue_status_info q_status; 1638 u32 fail_res = 0; 1639 int ret = 0; 1640 1641 dev_dbg(inst->dev->dev, "%s: Fill the ring buffer with new bitstream data", __func__); 1642 pm_runtime_resume_and_get(inst->dev->dev); 1643 if (!inst->retry) { 1644 ret = fill_ringbuffer(inst); 1645 if (ret < 0) { 1646 dev_warn(inst->dev->dev, "Filling ring buffer failed\n"); 1647 goto finish_job_and_return; 1648 } else if (!inst->eos && 1649 inst->queuing_num == 0 && 1650 inst->state == VPU_INST_STATE_PIC_RUN) { 1651 dev_dbg(inst->dev->dev, "%s: no bitstream for feeding, so skip ", __func__); 1652 inst->empty_queue = true; 1653 goto finish_job_and_return; 1654 } 1655 } 1656 1657 switch (inst->state) { 1658 case VPU_INST_STATE_OPEN: 1659 ret = initialize_sequence(inst); 1660 if (ret) { 1661 unsigned long flags; 1662 1663 spin_lock_irqsave(&inst->state_spinlock, flags); 1664 if (wave5_is_draining_or_eos(inst) && 1665 wave5_last_src_buffer_consumed(m2m_ctx)) { 1666 struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx); 1667 1668 switch_state(inst, VPU_INST_STATE_STOP); 1669 1670 if (vb2_is_streaming(dst_vq)) 1671 send_eos_event(inst); 1672 else 1673 handle_dynamic_resolution_change(inst); 1674 1675 flag_last_buffer_done(inst); 1676 } 1677 spin_unlock_irqrestore(&inst->state_spinlock, flags); 1678 } else { 1679 set_instance_state(inst, VPU_INST_STATE_INIT_SEQ); 1680 } 1681 1682 break; 1683 1684 case VPU_INST_STATE_INIT_SEQ: 1685 /* 1686 * Do this early, preparing the fb can trigger an IRQ before 1687 * we had a chance to switch, which leads to an invalid state 1688 * change. 1689 */ 1690 set_instance_state(inst, VPU_INST_STATE_PIC_RUN); 1691 /* 1692 * During DRC, the picture decoding remains pending, so just leave the job 1693 * active until this decode operation completes. 1694 */ 1695 wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status); 1696 1697 /* 1698 * The sequence must be analyzed first to calculate the proper 1699 * size of the auxiliary buffers. 1700 */ 1701 ret = wave5_prepare_fb(inst); 1702 if (ret) { 1703 dev_warn(inst->dev->dev, "Framebuffer preparation, fail: %d\n", ret); 1704 set_instance_state(inst, VPU_INST_STATE_STOP); 1705 break; 1706 } 1707 1708 if (q_status.instance_queue_count) 1709 goto finish_job_and_return; 1710 1711 fallthrough; 1712 case VPU_INST_STATE_PIC_RUN: 1713 ret = start_decode(inst, &fail_res); 1714 if (ret) { 1715 dev_err(inst->dev->dev, 1716 "Frame decoding on m2m context (%p), fail: %d (result: %d)\n", 1717 m2m_ctx, ret, fail_res); 1718 goto finish_job_and_return; 1719 } 1720 1721 if (fail_res == WAVE5_SYSERR_QUEUEING_FAIL) { 1722 inst->retry = true; 1723 inst->queuing_fail = true; 1724 } else { 1725 inst->retry = false; 1726 if (!inst->eos) 1727 inst->queuing_num--; 1728 } 1729 break; 1730 default: 1731 dev_dbg(inst->dev->dev, "Execution of a job in state %s illegal.\n", 1732 state_to_str(inst->state)); 1733 } 1734 1735 finish_job_and_return: 1736 dev_dbg(inst->dev->dev, "%s: leave and finish job", __func__); 1737 pm_runtime_put_autosuspend(inst->dev->dev); 1738 /* 1739 * After receiving CMD_STOP, there is no input, but we have to run device_run 1740 * to send DEC_PIC command until display index == -1, so job_finish was always 1741 * called in the device_run to archive it, the logic was very wasteful 1742 * in power and CPU time. 1743 * If EOS is passed, device_run will not call job_finish no more, it is called 1744 * only if HW is idle status in order to reduce overhead. 1745 */ 1746 if (!inst->sent_eos) 1747 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx); 1748 } 1749 1750 static void wave5_vpu_dec_job_abort(void *priv) 1751 { 1752 struct vpu_instance *inst = priv; 1753 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1754 int ret; 1755 1756 ret = set_instance_state(inst, VPU_INST_STATE_STOP); 1757 if (ret) 1758 return; 1759 1760 ret = wave5_vpu_dec_set_eos_on_firmware(inst); 1761 if (ret) 1762 dev_warn(inst->dev->dev, 1763 "Setting EOS for the bitstream, fail: %d\n", ret); 1764 1765 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx); 1766 } 1767 1768 static int wave5_vpu_dec_job_ready(void *priv) 1769 { 1770 struct vpu_instance *inst = priv; 1771 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx; 1772 unsigned long flags; 1773 int ret = 0; 1774 1775 spin_lock_irqsave(&inst->state_spinlock, flags); 1776 1777 switch (inst->state) { 1778 case VPU_INST_STATE_NONE: 1779 dev_dbg(inst->dev->dev, "Decoder must be open to start queueing M2M jobs!\n"); 1780 break; 1781 case VPU_INST_STATE_OPEN: 1782 if (wave5_is_draining_or_eos(inst) || !v4l2_m2m_has_stopped(m2m_ctx) || 1783 v4l2_m2m_num_src_bufs_ready(m2m_ctx) > 0) { 1784 ret = 1; 1785 break; 1786 } 1787 1788 dev_dbg(inst->dev->dev, 1789 "Decoder must be draining or >= 1 OUTPUT queue buffer must be queued!\n"); 1790 break; 1791 case VPU_INST_STATE_INIT_SEQ: 1792 case VPU_INST_STATE_PIC_RUN: 1793 if (!m2m_ctx->cap_q_ctx.q.streaming) { 1794 dev_dbg(inst->dev->dev, "CAPTURE queue must be streaming to queue jobs!\n"); 1795 break; 1796 } else if (v4l2_m2m_num_dst_bufs_ready(m2m_ctx) < (inst->fbc_buf_count - 1)) { 1797 dev_dbg(inst->dev->dev, 1798 "No capture buffer ready to decode!\n"); 1799 break; 1800 } else if (!wave5_is_draining_or_eos(inst) && 1801 (!v4l2_m2m_num_src_bufs_ready(m2m_ctx) || 1802 inst->empty_queue)) { 1803 dev_dbg(inst->dev->dev, 1804 "No bitstream data to decode!\n"); 1805 break; 1806 } else if (inst->state == VPU_INST_STATE_PIC_RUN && 1807 !wave5_is_draining_or_eos(inst) && 1808 inst->queuing_fail) { 1809 break; 1810 } 1811 ret = 1; 1812 break; 1813 case VPU_INST_STATE_STOP: 1814 dev_dbg(inst->dev->dev, "Decoder is stopped, not running.\n"); 1815 break; 1816 } 1817 1818 spin_unlock_irqrestore(&inst->state_spinlock, flags); 1819 1820 return ret; 1821 } 1822 1823 static const struct v4l2_m2m_ops wave5_vpu_dec_m2m_ops = { 1824 .device_run = wave5_vpu_dec_device_run, 1825 .job_abort = wave5_vpu_dec_job_abort, 1826 .job_ready = wave5_vpu_dec_job_ready, 1827 }; 1828 1829 static int wave5_vpu_open_dec(struct file *filp) 1830 { 1831 struct video_device *vdev = video_devdata(filp); 1832 struct vpu_device *dev = video_drvdata(filp); 1833 struct vpu_instance *inst = NULL; 1834 struct v4l2_m2m_ctx *m2m_ctx; 1835 int ret = 0; 1836 1837 inst = kzalloc_obj(*inst); 1838 if (!inst) 1839 return -ENOMEM; 1840 1841 inst->dev = dev; 1842 inst->type = VPU_INST_TYPE_DEC; 1843 inst->ops = &wave5_vpu_dec_inst_ops; 1844 1845 spin_lock_init(&inst->state_spinlock); 1846 mutex_init(&inst->feed_lock); 1847 INIT_LIST_HEAD(&inst->avail_src_bufs); 1848 1849 inst->codec_info = kzalloc_obj(*inst->codec_info); 1850 if (!inst->codec_info) { 1851 kfree(inst); 1852 return -ENOMEM; 1853 } 1854 1855 v4l2_fh_init(&inst->v4l2_fh, vdev); 1856 v4l2_fh_add(&inst->v4l2_fh, filp); 1857 1858 INIT_LIST_HEAD(&inst->list); 1859 1860 inst->v4l2_m2m_dev = inst->dev->v4l2_m2m_dec_dev; 1861 inst->v4l2_fh.m2m_ctx = 1862 v4l2_m2m_ctx_init(inst->v4l2_m2m_dev, inst, wave5_vpu_dec_queue_init); 1863 if (IS_ERR(inst->v4l2_fh.m2m_ctx)) { 1864 ret = PTR_ERR(inst->v4l2_fh.m2m_ctx); 1865 goto cleanup_inst; 1866 } 1867 m2m_ctx = inst->v4l2_fh.m2m_ctx; 1868 1869 v4l2_m2m_set_src_buffered(m2m_ctx, true); 1870 v4l2_m2m_set_dst_buffered(m2m_ctx, true); 1871 /* 1872 * We use the M2M job queue to ensure synchronization of steps where 1873 * needed, as IOCTLs can occur at anytime and we need to run commands on 1874 * the firmware in a specified order. 1875 * In order to initialize the sequence on the firmware within an M2M 1876 * job, the M2M framework needs to be able to queue jobs before 1877 * the CAPTURE queue has been started, because we need the results of the 1878 * initialization to properly prepare the CAPTURE queue with the correct 1879 * amount of buffers. 1880 * By setting ignore_cap_streaming to true the m2m framework will call 1881 * job_ready as soon as the OUTPUT queue is streaming, instead of 1882 * waiting until both the CAPTURE and OUTPUT queues are streaming. 1883 */ 1884 m2m_ctx->ignore_cap_streaming = true; 1885 1886 v4l2_ctrl_handler_init(&inst->v4l2_ctrl_hdl, 10); 1887 v4l2_ctrl_new_std(&inst->v4l2_ctrl_hdl, NULL, 1888 V4L2_CID_MIN_BUFFERS_FOR_CAPTURE, 1, 32, 1, 1); 1889 1890 if (inst->v4l2_ctrl_hdl.error) { 1891 ret = -ENODEV; 1892 goto cleanup_inst; 1893 } 1894 1895 inst->v4l2_fh.ctrl_handler = &inst->v4l2_ctrl_hdl; 1896 v4l2_ctrl_handler_setup(&inst->v4l2_ctrl_hdl); 1897 1898 wave5_set_default_format(&inst->src_fmt, &inst->dst_fmt); 1899 inst->colorspace = V4L2_COLORSPACE_REC709; 1900 inst->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT; 1901 inst->quantization = V4L2_QUANTIZATION_DEFAULT; 1902 inst->xfer_func = V4L2_XFER_FUNC_DEFAULT; 1903 1904 init_completion(&inst->irq_done); 1905 ret = wave5_kfifo_alloc(inst); 1906 if (ret) { 1907 dev_err(inst->dev->dev, "failed to allocate fifo\n"); 1908 goto cleanup_inst; 1909 } 1910 1911 inst->id = ida_alloc(&inst->dev->inst_ida, GFP_KERNEL); 1912 if (inst->id < 0) { 1913 dev_warn(inst->dev->dev, "Allocating instance ID, fail: %d\n", inst->id); 1914 ret = inst->id; 1915 goto cleanup_inst; 1916 } 1917 1918 /* 1919 * For Wave515 SRAM memory was already allocated 1920 * at wave5_vpu_dec_register_device() 1921 */ 1922 if (inst->dev->product_code != WAVE515_CODE) 1923 wave5_vdi_allocate_sram(inst->dev); 1924 1925 ret = mutex_lock_interruptible(&dev->dev_lock); 1926 if (ret) 1927 goto cleanup_inst; 1928 1929 list_add_tail(&inst->list, &dev->instances); 1930 1931 mutex_unlock(&dev->dev_lock); 1932 1933 return 0; 1934 1935 cleanup_inst: 1936 wave5_cleanup_instance(inst, filp); 1937 return ret; 1938 } 1939 1940 static int wave5_vpu_dec_release(struct file *filp) 1941 { 1942 return wave5_vpu_release_device(filp, wave5_vpu_dec_close, "decoder"); 1943 } 1944 1945 static const struct v4l2_file_operations wave5_vpu_dec_fops = { 1946 .owner = THIS_MODULE, 1947 .open = wave5_vpu_open_dec, 1948 .release = wave5_vpu_dec_release, 1949 .unlocked_ioctl = video_ioctl2, 1950 .poll = v4l2_m2m_fop_poll, 1951 .mmap = v4l2_m2m_fop_mmap, 1952 }; 1953 1954 int wave5_vpu_dec_register_device(struct vpu_device *dev) 1955 { 1956 struct video_device *vdev_dec; 1957 int ret; 1958 1959 /* 1960 * Secondary AXI setup for Wave515 is done by INIT_VPU command, 1961 * i.e. wave5_vpu_init(), that's why we allocate SRAM memory early. 1962 */ 1963 if (dev->product_code == WAVE515_CODE) 1964 wave5_vdi_allocate_sram(dev); 1965 1966 vdev_dec = devm_kzalloc(dev->v4l2_dev.dev, sizeof(*vdev_dec), GFP_KERNEL); 1967 if (!vdev_dec) 1968 return -ENOMEM; 1969 1970 dev->v4l2_m2m_dec_dev = v4l2_m2m_init(&wave5_vpu_dec_m2m_ops); 1971 if (IS_ERR(dev->v4l2_m2m_dec_dev)) { 1972 ret = PTR_ERR(dev->v4l2_m2m_dec_dev); 1973 dev_err(dev->dev, "v4l2_m2m_init, fail: %d\n", ret); 1974 return -EINVAL; 1975 } 1976 1977 dev->video_dev_dec = vdev_dec; 1978 1979 strscpy(vdev_dec->name, VPU_DEC_DEV_NAME, sizeof(vdev_dec->name)); 1980 vdev_dec->fops = &wave5_vpu_dec_fops; 1981 vdev_dec->ioctl_ops = &wave5_vpu_dec_ioctl_ops; 1982 vdev_dec->release = video_device_release_empty; 1983 vdev_dec->v4l2_dev = &dev->v4l2_dev; 1984 vdev_dec->vfl_dir = VFL_DIR_M2M; 1985 vdev_dec->device_caps = V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_STREAMING; 1986 vdev_dec->lock = &dev->dev_lock; 1987 1988 ret = video_register_device(vdev_dec, VFL_TYPE_VIDEO, -1); 1989 if (ret) 1990 return ret; 1991 1992 video_set_drvdata(vdev_dec, dev); 1993 1994 return 0; 1995 } 1996 1997 void wave5_vpu_dec_unregister_device(struct vpu_device *dev) 1998 { 1999 /* 2000 * Here is a freeing pair for Wave515 SRAM memory allocation 2001 * happened at wave5_vpu_dec_register_device(). 2002 */ 2003 if (dev->product_code == WAVE515_CODE) 2004 wave5_vdi_free_sram(dev); 2005 2006 video_unregister_device(dev->video_dev_dec); 2007 if (dev->v4l2_m2m_dec_dev) 2008 v4l2_m2m_release(dev->v4l2_m2m_dec_dev); 2009 } 2010