1 // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
2 /*
3 * Wave5 series multi-standard codec IP - decoder interface
4 *
5 * Copyright (C) 2021-2023 CHIPS&MEDIA INC
6 */
7
8 #include <linux/pm_runtime.h>
9 #include "wave5-helper.h"
10
11 #define VPU_DEC_DEV_NAME "C&M Wave5 VPU decoder"
12 #define VPU_DEC_DRV_NAME "wave5-dec"
13
14 static const struct v4l2_frmsize_stepwise dec_hevc_frmsize = {
15 .min_width = W5_MIN_DEC_PIC_8_WIDTH,
16 .max_width = W5_MAX_DEC_PIC_WIDTH,
17 .step_width = W5_DEC_CODEC_STEP_WIDTH,
18 .min_height = W5_MIN_DEC_PIC_8_HEIGHT,
19 .max_height = W5_MAX_DEC_PIC_HEIGHT,
20 .step_height = W5_DEC_CODEC_STEP_HEIGHT,
21 };
22
23 static const struct v4l2_frmsize_stepwise dec_h264_frmsize = {
24 .min_width = W5_MIN_DEC_PIC_32_WIDTH,
25 .max_width = W5_MAX_DEC_PIC_WIDTH,
26 .step_width = W5_DEC_CODEC_STEP_WIDTH,
27 .min_height = W5_MIN_DEC_PIC_32_HEIGHT,
28 .max_height = W5_MAX_DEC_PIC_HEIGHT,
29 .step_height = W5_DEC_CODEC_STEP_HEIGHT,
30 };
31
32 static const struct v4l2_frmsize_stepwise dec_raw_frmsize = {
33 .min_width = W5_MIN_DEC_PIC_8_WIDTH,
34 .max_width = W5_MAX_DEC_PIC_WIDTH,
35 .step_width = W5_DEC_RAW_STEP_WIDTH,
36 .min_height = W5_MIN_DEC_PIC_8_HEIGHT,
37 .max_height = W5_MAX_DEC_PIC_HEIGHT,
38 .step_height = W5_DEC_RAW_STEP_HEIGHT,
39 };
40
41 static const struct vpu_format dec_fmt_list[FMT_TYPES][MAX_FMTS] = {
42 [VPU_FMT_TYPE_CODEC] = {
43 {
44 .v4l2_pix_fmt = V4L2_PIX_FMT_HEVC,
45 .v4l2_frmsize = &dec_hevc_frmsize,
46 },
47 {
48 .v4l2_pix_fmt = V4L2_PIX_FMT_H264,
49 .v4l2_frmsize = &dec_h264_frmsize,
50 },
51 },
52 [VPU_FMT_TYPE_RAW] = {
53 {
54 .v4l2_pix_fmt = V4L2_PIX_FMT_YUV420,
55 .v4l2_frmsize = &dec_raw_frmsize,
56 },
57 {
58 .v4l2_pix_fmt = V4L2_PIX_FMT_NV12,
59 .v4l2_frmsize = &dec_raw_frmsize,
60 },
61 {
62 .v4l2_pix_fmt = V4L2_PIX_FMT_NV21,
63 .v4l2_frmsize = &dec_raw_frmsize,
64 },
65 {
66 .v4l2_pix_fmt = V4L2_PIX_FMT_YUV422P,
67 .v4l2_frmsize = &dec_raw_frmsize,
68 },
69 {
70 .v4l2_pix_fmt = V4L2_PIX_FMT_NV16,
71 .v4l2_frmsize = &dec_raw_frmsize,
72 },
73 {
74 .v4l2_pix_fmt = V4L2_PIX_FMT_NV61,
75 .v4l2_frmsize = &dec_raw_frmsize,
76 },
77 {
78 .v4l2_pix_fmt = V4L2_PIX_FMT_YUV420M,
79 .v4l2_frmsize = &dec_raw_frmsize,
80 },
81 {
82 .v4l2_pix_fmt = V4L2_PIX_FMT_NV12M,
83 .v4l2_frmsize = &dec_raw_frmsize,
84 },
85 {
86 .v4l2_pix_fmt = V4L2_PIX_FMT_NV21M,
87 .v4l2_frmsize = &dec_raw_frmsize,
88 },
89 {
90 .v4l2_pix_fmt = V4L2_PIX_FMT_YUV422M,
91 .v4l2_frmsize = &dec_raw_frmsize,
92 },
93 {
94 .v4l2_pix_fmt = V4L2_PIX_FMT_NV16M,
95 .v4l2_frmsize = &dec_raw_frmsize,
96 },
97 {
98 .v4l2_pix_fmt = V4L2_PIX_FMT_NV61M,
99 .v4l2_frmsize = &dec_raw_frmsize,
100 },
101 }
102 };
103
104 /*
105 * Make sure that the state switch is allowed and add logging for debugging
106 * purposes
107 */
switch_state(struct vpu_instance * inst,enum vpu_instance_state state)108 static int switch_state(struct vpu_instance *inst, enum vpu_instance_state state)
109 {
110 switch (state) {
111 case VPU_INST_STATE_NONE:
112 break;
113 case VPU_INST_STATE_OPEN:
114 if (inst->state != VPU_INST_STATE_NONE)
115 goto invalid_state_switch;
116 goto valid_state_switch;
117 case VPU_INST_STATE_INIT_SEQ:
118 if (inst->state != VPU_INST_STATE_OPEN && inst->state != VPU_INST_STATE_STOP)
119 goto invalid_state_switch;
120 goto valid_state_switch;
121 case VPU_INST_STATE_PIC_RUN:
122 if (inst->state != VPU_INST_STATE_INIT_SEQ)
123 goto invalid_state_switch;
124 goto valid_state_switch;
125 case VPU_INST_STATE_STOP:
126 goto valid_state_switch;
127 }
128 invalid_state_switch:
129 WARN(1, "Invalid state switch from %s to %s.\n",
130 state_to_str(inst->state), state_to_str(state));
131 return -EINVAL;
132 valid_state_switch:
133 dev_dbg(inst->dev->dev, "Switch state from %s to %s.\n",
134 state_to_str(inst->state), state_to_str(state));
135 inst->state = state;
136 return 0;
137 }
138
set_instance_state(struct vpu_instance * inst,enum vpu_instance_state state)139 static int set_instance_state(struct vpu_instance *inst, enum vpu_instance_state state)
140 {
141 unsigned long flags;
142 int ret;
143
144 spin_lock_irqsave(&inst->state_spinlock, flags);
145 ret = switch_state(inst, state);
146 spin_unlock_irqrestore(&inst->state_spinlock, flags);
147
148 return ret;
149 }
150
wave5_vpu_dec_set_eos_on_firmware(struct vpu_instance * inst)151 static int wave5_vpu_dec_set_eos_on_firmware(struct vpu_instance *inst)
152 {
153 int ret;
154
155 ret = wave5_vpu_dec_update_bitstream_buffer(inst, 0);
156 if (ret) {
157 /*
158 * To set the EOS flag, a command is sent to the firmware.
159 * That command may never return (timeout) or may report an error.
160 */
161 dev_err(inst->dev->dev,
162 "Setting EOS for the bitstream, fail: %d\n", ret);
163 return ret;
164 }
165 return 0;
166 }
167
wave5_last_src_buffer_consumed(struct v4l2_m2m_ctx * m2m_ctx)168 static bool wave5_last_src_buffer_consumed(struct v4l2_m2m_ctx *m2m_ctx)
169 {
170 struct vpu_src_buffer *vpu_buf;
171
172 if (!m2m_ctx->last_src_buf)
173 return false;
174
175 vpu_buf = wave5_to_vpu_src_buf(m2m_ctx->last_src_buf);
176 return vpu_buf->consumed;
177 }
178
wave5_handle_src_buffer(struct vpu_instance * inst,dma_addr_t rd_ptr)179 static void wave5_handle_src_buffer(struct vpu_instance *inst, dma_addr_t rd_ptr)
180 {
181 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
182 struct v4l2_m2m_buffer *buf, *n;
183 size_t consumed_bytes = 0;
184
185 if (rd_ptr >= inst->last_rd_ptr) {
186 consumed_bytes = rd_ptr - inst->last_rd_ptr;
187 } else {
188 size_t rd_offs = rd_ptr - inst->bitstream_vbuf.daddr;
189 size_t last_rd_offs = inst->last_rd_ptr - inst->bitstream_vbuf.daddr;
190
191 consumed_bytes = rd_offs + (inst->bitstream_vbuf.size - last_rd_offs);
192 }
193
194 inst->last_rd_ptr = rd_ptr;
195 consumed_bytes += inst->remaining_consumed_bytes;
196
197 dev_dbg(inst->dev->dev, "%s: %zu bytes of bitstream was consumed", __func__,
198 consumed_bytes);
199
200 v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buf, n) {
201 struct vb2_v4l2_buffer *src_buf = &buf->vb;
202 size_t src_size = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
203
204 if (src_size > consumed_bytes)
205 break;
206
207 dev_dbg(inst->dev->dev, "%s: removing src buffer %i",
208 __func__, src_buf->vb2_buf.index);
209 src_buf = v4l2_m2m_src_buf_remove(m2m_ctx);
210 inst->timestamp = src_buf->vb2_buf.timestamp;
211 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
212 consumed_bytes -= src_size;
213
214 /* Handle the case the last bitstream buffer has been picked */
215 if (src_buf == m2m_ctx->last_src_buf) {
216 int ret;
217
218 m2m_ctx->last_src_buf = NULL;
219 ret = wave5_vpu_dec_set_eos_on_firmware(inst);
220 if (ret)
221 dev_warn(inst->dev->dev,
222 "Setting EOS for the bitstream, fail: %d\n", ret);
223 break;
224 }
225 }
226
227 inst->remaining_consumed_bytes = consumed_bytes;
228 }
229
start_decode(struct vpu_instance * inst,u32 * fail_res)230 static int start_decode(struct vpu_instance *inst, u32 *fail_res)
231 {
232 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
233 int ret = 0;
234
235 ret = wave5_vpu_dec_start_one_frame(inst, fail_res);
236 if (ret) {
237 struct vb2_v4l2_buffer *src_buf;
238
239 src_buf = v4l2_m2m_src_buf_remove(m2m_ctx);
240 if (src_buf)
241 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR);
242 set_instance_state(inst, VPU_INST_STATE_STOP);
243
244 dev_dbg(inst->dev->dev, "%s: pic run failed / finish job", __func__);
245 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
246 }
247
248 return ret;
249 }
250
flag_last_buffer_done(struct vpu_instance * inst)251 static void flag_last_buffer_done(struct vpu_instance *inst)
252 {
253 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
254 struct vb2_v4l2_buffer *vb;
255 int i;
256
257 lockdep_assert_held(&inst->state_spinlock);
258
259 vb = v4l2_m2m_dst_buf_remove(m2m_ctx);
260 if (!vb) {
261 m2m_ctx->is_draining = true;
262 m2m_ctx->next_buf_last = true;
263 return;
264 }
265
266 for (i = 0; i < vb->vb2_buf.num_planes; i++)
267 vb2_set_plane_payload(&vb->vb2_buf, i, 0);
268 vb->field = V4L2_FIELD_NONE;
269
270 v4l2_m2m_last_buffer_done(m2m_ctx, vb);
271 }
272
send_eos_event(struct vpu_instance * inst)273 static void send_eos_event(struct vpu_instance *inst)
274 {
275 static const struct v4l2_event vpu_event_eos = {
276 .type = V4L2_EVENT_EOS
277 };
278
279 lockdep_assert_held(&inst->state_spinlock);
280
281 v4l2_event_queue_fh(&inst->v4l2_fh, &vpu_event_eos);
282 inst->eos = false;
283 inst->sent_eos = true;
284 }
285
handle_dynamic_resolution_change(struct vpu_instance * inst)286 static int handle_dynamic_resolution_change(struct vpu_instance *inst)
287 {
288 struct v4l2_fh *fh = &inst->v4l2_fh;
289 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
290
291 static const struct v4l2_event vpu_event_src_ch = {
292 .type = V4L2_EVENT_SOURCE_CHANGE,
293 .u.src_change.changes = V4L2_EVENT_SRC_CH_RESOLUTION,
294 };
295 struct dec_info *p_dec_info = &inst->codec_info->dec_info;
296 struct dec_initial_info *initial_info = &inst->codec_info->dec_info.initial_info;
297
298 lockdep_assert_held(&inst->state_spinlock);
299
300 dev_dbg(inst->dev->dev, "%s: rd_ptr %pad", __func__, &initial_info->rd_ptr);
301
302 dev_dbg(inst->dev->dev, "%s: width: %u height: %u profile: %u | minbuffer: %u\n",
303 __func__, initial_info->pic_width, initial_info->pic_height,
304 initial_info->profile, initial_info->min_frame_buffer_count);
305
306 inst->needs_reallocation = true;
307 inst->fbc_buf_count = initial_info->min_frame_buffer_count + 1;
308 if (inst->fbc_buf_count != v4l2_m2m_num_dst_bufs_ready(m2m_ctx)) {
309 struct v4l2_ctrl *ctrl;
310
311 ctrl = v4l2_ctrl_find(&inst->v4l2_ctrl_hdl,
312 V4L2_CID_MIN_BUFFERS_FOR_CAPTURE);
313 if (ctrl)
314 v4l2_ctrl_s_ctrl(ctrl, inst->fbc_buf_count);
315 }
316
317 if (p_dec_info->initial_info_obtained) {
318 const struct vpu_format *vpu_fmt;
319
320 inst->conf_win.left = initial_info->pic_crop_rect.left;
321 inst->conf_win.top = initial_info->pic_crop_rect.top;
322 inst->conf_win.width = initial_info->pic_width -
323 initial_info->pic_crop_rect.left - initial_info->pic_crop_rect.right;
324 inst->conf_win.height = initial_info->pic_height -
325 initial_info->pic_crop_rect.top - initial_info->pic_crop_rect.bottom;
326
327 vpu_fmt = wave5_find_vpu_fmt(inst->src_fmt.pixelformat,
328 dec_fmt_list[VPU_FMT_TYPE_CODEC]);
329 if (!vpu_fmt)
330 return -EINVAL;
331
332 wave5_update_pix_fmt(&inst->src_fmt,
333 VPU_FMT_TYPE_CODEC,
334 initial_info->pic_width,
335 initial_info->pic_height,
336 vpu_fmt->v4l2_frmsize);
337
338 vpu_fmt = wave5_find_vpu_fmt(inst->dst_fmt.pixelformat,
339 dec_fmt_list[VPU_FMT_TYPE_RAW]);
340 if (!vpu_fmt)
341 return -EINVAL;
342
343 wave5_update_pix_fmt(&inst->dst_fmt,
344 VPU_FMT_TYPE_RAW,
345 initial_info->pic_width,
346 initial_info->pic_height,
347 vpu_fmt->v4l2_frmsize);
348 }
349
350 v4l2_event_queue_fh(fh, &vpu_event_src_ch);
351
352 return 0;
353 }
354
wave5_vpu_dec_finish_decode(struct vpu_instance * inst)355 static void wave5_vpu_dec_finish_decode(struct vpu_instance *inst)
356 {
357 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
358 struct dec_output_info dec_info;
359 int ret;
360 struct vb2_v4l2_buffer *dec_buf = NULL;
361 struct vb2_v4l2_buffer *disp_buf = NULL;
362 struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
363
364 dev_dbg(inst->dev->dev, "%s: Fetch output info from firmware.", __func__);
365
366 ret = wave5_vpu_dec_get_output_info(inst, &dec_info);
367 if (ret) {
368 dev_dbg(inst->dev->dev, "%s: could not get output info.", __func__);
369 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
370 return;
371 }
372
373 dev_dbg(inst->dev->dev, "%s: rd_ptr %pad wr_ptr %pad", __func__, &dec_info.rd_ptr,
374 &dec_info.wr_ptr);
375 wave5_handle_src_buffer(inst, dec_info.rd_ptr);
376
377 dev_dbg(inst->dev->dev, "%s: dec_info dec_idx %i disp_idx %i", __func__,
378 dec_info.index_frame_decoded, dec_info.index_frame_display);
379
380 if (!vb2_is_streaming(dst_vq)) {
381 dev_dbg(inst->dev->dev, "%s: capture is not streaming..", __func__);
382 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
383 return;
384 }
385
386 /* Remove decoded buffer from the ready queue now that it has been
387 * decoded.
388 */
389 if (dec_info.index_frame_decoded >= 0) {
390 struct vb2_buffer *vb = vb2_get_buffer(dst_vq,
391 dec_info.index_frame_decoded);
392 if (vb) {
393 dec_buf = to_vb2_v4l2_buffer(vb);
394 dec_buf->vb2_buf.timestamp = inst->timestamp;
395 } else {
396 dev_warn(inst->dev->dev, "%s: invalid decoded frame index %i",
397 __func__, dec_info.index_frame_decoded);
398 }
399 }
400
401 if (dec_info.index_frame_display >= 0) {
402 disp_buf = v4l2_m2m_dst_buf_remove_by_idx(m2m_ctx, dec_info.index_frame_display);
403 if (!disp_buf)
404 dev_warn(inst->dev->dev, "%s: invalid display frame index %i",
405 __func__, dec_info.index_frame_display);
406 }
407
408 /* If there is anything to display, do that now */
409 if (disp_buf) {
410 struct vpu_dst_buffer *dst_vpu_buf = wave5_to_vpu_dst_buf(disp_buf);
411
412 if (inst->dst_fmt.num_planes == 1) {
413 vb2_set_plane_payload(&disp_buf->vb2_buf, 0,
414 inst->dst_fmt.plane_fmt[0].sizeimage);
415 } else if (inst->dst_fmt.num_planes == 2) {
416 vb2_set_plane_payload(&disp_buf->vb2_buf, 0,
417 inst->dst_fmt.plane_fmt[0].sizeimage);
418 vb2_set_plane_payload(&disp_buf->vb2_buf, 1,
419 inst->dst_fmt.plane_fmt[1].sizeimage);
420 } else if (inst->dst_fmt.num_planes == 3) {
421 vb2_set_plane_payload(&disp_buf->vb2_buf, 0,
422 inst->dst_fmt.plane_fmt[0].sizeimage);
423 vb2_set_plane_payload(&disp_buf->vb2_buf, 1,
424 inst->dst_fmt.plane_fmt[1].sizeimage);
425 vb2_set_plane_payload(&disp_buf->vb2_buf, 2,
426 inst->dst_fmt.plane_fmt[2].sizeimage);
427 }
428
429 /* TODO implement interlace support */
430 disp_buf->field = V4L2_FIELD_NONE;
431 dst_vpu_buf->display = true;
432 v4l2_m2m_buf_done(disp_buf, VB2_BUF_STATE_DONE);
433
434 dev_dbg(inst->dev->dev, "%s: frame_cycle %8u (payload %lu)\n",
435 __func__, dec_info.frame_cycle,
436 vb2_get_plane_payload(&disp_buf->vb2_buf, 0));
437 }
438
439 if ((dec_info.index_frame_display == DISPLAY_IDX_FLAG_SEQ_END ||
440 dec_info.sequence_changed)) {
441 unsigned long flags;
442
443 spin_lock_irqsave(&inst->state_spinlock, flags);
444 if (!v4l2_m2m_has_stopped(m2m_ctx)) {
445 switch_state(inst, VPU_INST_STATE_STOP);
446
447 if (dec_info.sequence_changed)
448 handle_dynamic_resolution_change(inst);
449 else
450 send_eos_event(inst);
451
452 flag_last_buffer_done(inst);
453 }
454 spin_unlock_irqrestore(&inst->state_spinlock, flags);
455 }
456
457 if (inst->sent_eos &&
458 v4l2_m2m_get_curr_priv(inst->v4l2_m2m_dev)) {
459 struct queue_status_info q_status;
460
461 wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status);
462 if (q_status.report_queue_count == 0 &&
463 q_status.instance_queue_count == 0)
464 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
465 }
466
467 inst->queuing_fail = false;
468 }
469
wave5_vpu_dec_querycap(struct file * file,void * fh,struct v4l2_capability * cap)470 static int wave5_vpu_dec_querycap(struct file *file, void *fh, struct v4l2_capability *cap)
471 {
472 strscpy(cap->driver, VPU_DEC_DRV_NAME, sizeof(cap->driver));
473 strscpy(cap->card, VPU_DEC_DRV_NAME, sizeof(cap->card));
474
475 return 0;
476 }
477
wave5_vpu_dec_enum_framesizes(struct file * f,void * fh,struct v4l2_frmsizeenum * fsize)478 static int wave5_vpu_dec_enum_framesizes(struct file *f, void *fh, struct v4l2_frmsizeenum *fsize)
479 {
480 const struct vpu_format *vpu_fmt;
481
482 if (fsize->index)
483 return -EINVAL;
484
485 vpu_fmt = wave5_find_vpu_fmt(fsize->pixel_format, dec_fmt_list[VPU_FMT_TYPE_CODEC]);
486 if (!vpu_fmt) {
487 vpu_fmt = wave5_find_vpu_fmt(fsize->pixel_format, dec_fmt_list[VPU_FMT_TYPE_RAW]);
488 if (!vpu_fmt)
489 return -EINVAL;
490 }
491
492 fsize->type = V4L2_FRMSIZE_TYPE_CONTINUOUS;
493 fsize->stepwise.min_width = vpu_fmt->v4l2_frmsize->min_width;
494 fsize->stepwise.max_width = vpu_fmt->v4l2_frmsize->max_width;
495 fsize->stepwise.step_width = W5_DEC_CODEC_STEP_WIDTH;
496 fsize->stepwise.min_height = vpu_fmt->v4l2_frmsize->min_height;
497 fsize->stepwise.max_height = vpu_fmt->v4l2_frmsize->max_height;
498 fsize->stepwise.step_height = W5_DEC_CODEC_STEP_HEIGHT;
499
500 return 0;
501 }
502
wave5_vpu_dec_enum_fmt_cap(struct file * file,void * fh,struct v4l2_fmtdesc * f)503 static int wave5_vpu_dec_enum_fmt_cap(struct file *file, void *fh, struct v4l2_fmtdesc *f)
504 {
505 const struct vpu_format *vpu_fmt;
506
507 vpu_fmt = wave5_find_vpu_fmt_by_idx(f->index, dec_fmt_list[VPU_FMT_TYPE_RAW]);
508 if (!vpu_fmt)
509 return -EINVAL;
510
511 f->pixelformat = vpu_fmt->v4l2_pix_fmt;
512 f->flags = 0;
513
514 return 0;
515 }
516
wave5_vpu_dec_try_fmt_cap(struct file * file,void * fh,struct v4l2_format * f)517 static int wave5_vpu_dec_try_fmt_cap(struct file *file, void *fh, struct v4l2_format *f)
518 {
519 struct vpu_instance *inst = file_to_vpu_inst(file);
520 struct dec_info *p_dec_info = &inst->codec_info->dec_info;
521 const struct v4l2_frmsize_stepwise *frmsize;
522 const struct vpu_format *vpu_fmt;
523 int width, height;
524
525 dev_dbg(inst->dev->dev,
526 "%s: fourcc: %u width: %u height: %u nm planes: %u colorspace: %u field: %u\n",
527 __func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height,
528 f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field);
529
530 vpu_fmt = wave5_find_vpu_fmt(f->fmt.pix_mp.pixelformat, dec_fmt_list[VPU_FMT_TYPE_RAW]);
531 if (!vpu_fmt) {
532 width = inst->dst_fmt.width;
533 height = inst->dst_fmt.height;
534 f->fmt.pix_mp.pixelformat = inst->dst_fmt.pixelformat;
535 frmsize = &dec_raw_frmsize;
536 } else {
537 width = f->fmt.pix_mp.width;
538 height = f->fmt.pix_mp.height;
539 f->fmt.pix_mp.pixelformat = vpu_fmt->v4l2_pix_fmt;
540 frmsize = vpu_fmt->v4l2_frmsize;
541 }
542
543 if (p_dec_info->initial_info_obtained) {
544 width = inst->dst_fmt.width;
545 height = inst->dst_fmt.height;
546 }
547
548 wave5_update_pix_fmt(&f->fmt.pix_mp, VPU_FMT_TYPE_RAW,
549 width, height, frmsize);
550 f->fmt.pix_mp.colorspace = inst->colorspace;
551 f->fmt.pix_mp.ycbcr_enc = inst->ycbcr_enc;
552 f->fmt.pix_mp.quantization = inst->quantization;
553 f->fmt.pix_mp.xfer_func = inst->xfer_func;
554
555 return 0;
556 }
557
wave5_vpu_dec_s_fmt_cap(struct file * file,void * fh,struct v4l2_format * f)558 static int wave5_vpu_dec_s_fmt_cap(struct file *file, void *fh, struct v4l2_format *f)
559 {
560 struct vpu_instance *inst = file_to_vpu_inst(file);
561 int i, ret;
562
563 dev_dbg(inst->dev->dev,
564 "%s: fourcc: %u width: %u height: %u num_planes: %u colorspace: %u field: %u\n",
565 __func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height,
566 f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field);
567
568 ret = wave5_vpu_dec_try_fmt_cap(file, fh, f);
569 if (ret)
570 return ret;
571
572 inst->dst_fmt.width = f->fmt.pix_mp.width;
573 inst->dst_fmt.height = f->fmt.pix_mp.height;
574 inst->dst_fmt.pixelformat = f->fmt.pix_mp.pixelformat;
575 inst->dst_fmt.field = f->fmt.pix_mp.field;
576 inst->dst_fmt.flags = f->fmt.pix_mp.flags;
577 inst->dst_fmt.num_planes = f->fmt.pix_mp.num_planes;
578 for (i = 0; i < inst->dst_fmt.num_planes; i++) {
579 inst->dst_fmt.plane_fmt[i].bytesperline = f->fmt.pix_mp.plane_fmt[i].bytesperline;
580 inst->dst_fmt.plane_fmt[i].sizeimage = f->fmt.pix_mp.plane_fmt[i].sizeimage;
581 }
582
583 if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV12 ||
584 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV12M) {
585 inst->cbcr_interleave = true;
586 inst->nv21 = false;
587 inst->output_format = FORMAT_420;
588 } else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV21 ||
589 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV21M) {
590 inst->cbcr_interleave = true;
591 inst->nv21 = true;
592 inst->output_format = FORMAT_420;
593 } else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV16 ||
594 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV16M) {
595 inst->cbcr_interleave = true;
596 inst->nv21 = false;
597 inst->output_format = FORMAT_422;
598 } else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV61 ||
599 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV61M) {
600 inst->cbcr_interleave = true;
601 inst->nv21 = true;
602 inst->output_format = FORMAT_422;
603 } else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_YUV422P ||
604 inst->dst_fmt.pixelformat == V4L2_PIX_FMT_YUV422M) {
605 inst->cbcr_interleave = false;
606 inst->nv21 = false;
607 inst->output_format = FORMAT_422;
608 } else {
609 inst->cbcr_interleave = false;
610 inst->nv21 = false;
611 inst->output_format = FORMAT_420;
612 }
613
614 return 0;
615 }
616
wave5_vpu_dec_g_fmt_cap(struct file * file,void * fh,struct v4l2_format * f)617 static int wave5_vpu_dec_g_fmt_cap(struct file *file, void *fh, struct v4l2_format *f)
618 {
619 struct vpu_instance *inst = file_to_vpu_inst(file);
620 int i;
621
622 f->fmt.pix_mp.width = inst->dst_fmt.width;
623 f->fmt.pix_mp.height = inst->dst_fmt.height;
624 f->fmt.pix_mp.pixelformat = inst->dst_fmt.pixelformat;
625 f->fmt.pix_mp.field = inst->dst_fmt.field;
626 f->fmt.pix_mp.flags = inst->dst_fmt.flags;
627 f->fmt.pix_mp.num_planes = inst->dst_fmt.num_planes;
628 for (i = 0; i < f->fmt.pix_mp.num_planes; i++) {
629 f->fmt.pix_mp.plane_fmt[i].bytesperline = inst->dst_fmt.plane_fmt[i].bytesperline;
630 f->fmt.pix_mp.plane_fmt[i].sizeimage = inst->dst_fmt.plane_fmt[i].sizeimage;
631 }
632
633 f->fmt.pix_mp.colorspace = inst->colorspace;
634 f->fmt.pix_mp.ycbcr_enc = inst->ycbcr_enc;
635 f->fmt.pix_mp.quantization = inst->quantization;
636 f->fmt.pix_mp.xfer_func = inst->xfer_func;
637
638 return 0;
639 }
640
wave5_vpu_dec_enum_fmt_out(struct file * file,void * fh,struct v4l2_fmtdesc * f)641 static int wave5_vpu_dec_enum_fmt_out(struct file *file, void *fh, struct v4l2_fmtdesc *f)
642 {
643 struct vpu_instance *inst = file_to_vpu_inst(file);
644 const struct vpu_format *vpu_fmt;
645
646 dev_dbg(inst->dev->dev, "%s: index: %u\n", __func__, f->index);
647
648 vpu_fmt = wave5_find_vpu_fmt_by_idx(f->index, dec_fmt_list[VPU_FMT_TYPE_CODEC]);
649 if (!vpu_fmt)
650 return -EINVAL;
651
652 f->pixelformat = vpu_fmt->v4l2_pix_fmt;
653 f->flags = V4L2_FMT_FLAG_DYN_RESOLUTION | V4L2_FMT_FLAG_COMPRESSED;
654
655 return 0;
656 }
657
wave5_vpu_dec_try_fmt_out(struct file * file,void * fh,struct v4l2_format * f)658 static int wave5_vpu_dec_try_fmt_out(struct file *file, void *fh, struct v4l2_format *f)
659 {
660 struct vpu_instance *inst = file_to_vpu_inst(file);
661 const struct v4l2_frmsize_stepwise *frmsize;
662 const struct vpu_format *vpu_fmt;
663 int width, height;
664
665 dev_dbg(inst->dev->dev,
666 "%s: fourcc: %u width: %u height: %u num_planes: %u colorspace: %u field: %u\n",
667 __func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height,
668 f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field);
669
670 vpu_fmt = wave5_find_vpu_fmt(f->fmt.pix_mp.pixelformat, dec_fmt_list[VPU_FMT_TYPE_CODEC]);
671 if (!vpu_fmt) {
672 width = inst->src_fmt.width;
673 height = inst->src_fmt.height;
674 f->fmt.pix_mp.pixelformat = inst->src_fmt.pixelformat;
675 frmsize = &dec_hevc_frmsize;
676 } else {
677 width = f->fmt.pix_mp.width;
678 height = f->fmt.pix_mp.height;
679 f->fmt.pix_mp.pixelformat = vpu_fmt->v4l2_pix_fmt;
680 frmsize = vpu_fmt->v4l2_frmsize;
681 }
682
683 wave5_update_pix_fmt(&f->fmt.pix_mp, VPU_FMT_TYPE_CODEC,
684 width, height, frmsize);
685
686 return 0;
687 }
688
wave5_vpu_dec_s_fmt_out(struct file * file,void * fh,struct v4l2_format * f)689 static int wave5_vpu_dec_s_fmt_out(struct file *file, void *fh, struct v4l2_format *f)
690 {
691 struct vpu_instance *inst = file_to_vpu_inst(file);
692 const struct vpu_format *vpu_fmt;
693 int i, ret;
694
695 dev_dbg(inst->dev->dev,
696 "%s: fourcc: %u width: %u height: %u num_planes: %u field: %u\n",
697 __func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height,
698 f->fmt.pix_mp.num_planes, f->fmt.pix_mp.field);
699
700 ret = wave5_vpu_dec_try_fmt_out(file, fh, f);
701 if (ret)
702 return ret;
703
704 inst->std = wave5_to_vpu_std(f->fmt.pix_mp.pixelformat, inst->type);
705 if (inst->std == STD_UNKNOWN) {
706 dev_warn(inst->dev->dev, "unsupported pixelformat: %.4s\n",
707 (char *)&f->fmt.pix_mp.pixelformat);
708 return -EINVAL;
709 }
710
711 inst->src_fmt.width = f->fmt.pix_mp.width;
712 inst->src_fmt.height = f->fmt.pix_mp.height;
713 inst->src_fmt.pixelformat = f->fmt.pix_mp.pixelformat;
714 inst->src_fmt.field = f->fmt.pix_mp.field;
715 inst->src_fmt.flags = f->fmt.pix_mp.flags;
716 inst->src_fmt.num_planes = f->fmt.pix_mp.num_planes;
717 for (i = 0; i < inst->src_fmt.num_planes; i++) {
718 inst->src_fmt.plane_fmt[i].bytesperline = f->fmt.pix_mp.plane_fmt[i].bytesperline;
719 inst->src_fmt.plane_fmt[i].sizeimage = f->fmt.pix_mp.plane_fmt[i].sizeimage;
720 }
721
722 inst->colorspace = f->fmt.pix_mp.colorspace;
723 inst->ycbcr_enc = f->fmt.pix_mp.ycbcr_enc;
724 inst->quantization = f->fmt.pix_mp.quantization;
725 inst->xfer_func = f->fmt.pix_mp.xfer_func;
726
727 vpu_fmt = wave5_find_vpu_fmt(inst->dst_fmt.pixelformat, dec_fmt_list[VPU_FMT_TYPE_RAW]);
728 if (!vpu_fmt)
729 return -EINVAL;
730
731 wave5_update_pix_fmt(&inst->dst_fmt, VPU_FMT_TYPE_RAW,
732 f->fmt.pix_mp.width, f->fmt.pix_mp.height,
733 vpu_fmt->v4l2_frmsize);
734
735 return 0;
736 }
737
wave5_vpu_dec_g_selection(struct file * file,void * fh,struct v4l2_selection * s)738 static int wave5_vpu_dec_g_selection(struct file *file, void *fh, struct v4l2_selection *s)
739 {
740 struct vpu_instance *inst = file_to_vpu_inst(file);
741
742 dev_dbg(inst->dev->dev, "%s: type: %u | target: %u\n", __func__, s->type, s->target);
743
744 if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
745 return -EINVAL;
746 switch (s->target) {
747 case V4L2_SEL_TGT_COMPOSE_BOUNDS:
748 case V4L2_SEL_TGT_COMPOSE_PADDED:
749 s->r.left = 0;
750 s->r.top = 0;
751 s->r.width = inst->dst_fmt.width;
752 s->r.height = inst->dst_fmt.height;
753 break;
754 case V4L2_SEL_TGT_COMPOSE:
755 case V4L2_SEL_TGT_COMPOSE_DEFAULT:
756 s->r.left = 0;
757 s->r.top = 0;
758 if (inst->state > VPU_INST_STATE_OPEN) {
759 s->r = inst->conf_win;
760 } else {
761 s->r.width = inst->src_fmt.width;
762 s->r.height = inst->src_fmt.height;
763 }
764 break;
765 default:
766 return -EINVAL;
767 }
768
769 return 0;
770 }
771
wave5_vpu_dec_s_selection(struct file * file,void * fh,struct v4l2_selection * s)772 static int wave5_vpu_dec_s_selection(struct file *file, void *fh, struct v4l2_selection *s)
773 {
774 struct vpu_instance *inst = file_to_vpu_inst(file);
775
776 if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
777 return -EINVAL;
778
779 if (s->target != V4L2_SEL_TGT_COMPOSE)
780 return -EINVAL;
781
782 dev_dbg(inst->dev->dev, "V4L2_SEL_TGT_COMPOSE w: %u h: %u\n",
783 s->r.width, s->r.height);
784
785 s->r.left = 0;
786 s->r.top = 0;
787 s->r.width = inst->dst_fmt.width;
788 s->r.height = inst->dst_fmt.height;
789
790 return 0;
791 }
792
wave5_vpu_dec_stop(struct vpu_instance * inst)793 static int wave5_vpu_dec_stop(struct vpu_instance *inst)
794 {
795 int ret = 0;
796 unsigned long flags;
797 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
798
799 spin_lock_irqsave(&inst->state_spinlock, flags);
800
801 if (m2m_ctx->is_draining) {
802 ret = -EBUSY;
803 goto unlock_and_return;
804 }
805
806 if (inst->state != VPU_INST_STATE_NONE) {
807 /*
808 * Temporarily release the state_spinlock so that subsequent
809 * calls do not block on a mutex while inside this spinlock.
810 */
811 spin_unlock_irqrestore(&inst->state_spinlock, flags);
812 ret = wave5_vpu_dec_set_eos_on_firmware(inst);
813 if (ret)
814 return ret;
815
816 spin_lock_irqsave(&inst->state_spinlock, flags);
817 /*
818 * TODO eliminate this check by using a separate check for
819 * draining triggered by a resolution change.
820 */
821 if (m2m_ctx->is_draining) {
822 ret = -EBUSY;
823 goto unlock_and_return;
824 }
825 }
826
827 /*
828 * Used to remember the EOS state after the streamoff/on transition on
829 * the capture queue.
830 */
831 inst->eos = true;
832
833 if (m2m_ctx->has_stopped)
834 goto unlock_and_return;
835
836 m2m_ctx->last_src_buf = v4l2_m2m_last_src_buf(m2m_ctx);
837 m2m_ctx->is_draining = true;
838
839 /*
840 * Deferred to device run in case it wasn't in the ring buffer
841 * yet. In other case, we have to send the EOS signal to the
842 * firmware so that any pending PIC_RUN ends without new
843 * bitstream buffer.
844 */
845 if (m2m_ctx->last_src_buf)
846 goto unlock_and_return;
847
848 if (inst->state == VPU_INST_STATE_NONE) {
849 send_eos_event(inst);
850 flag_last_buffer_done(inst);
851 }
852
853 unlock_and_return:
854 spin_unlock_irqrestore(&inst->state_spinlock, flags);
855 return ret;
856 }
857
wave5_vpu_dec_start(struct vpu_instance * inst)858 static int wave5_vpu_dec_start(struct vpu_instance *inst)
859 {
860 int ret = 0;
861 unsigned long flags;
862 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
863 struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
864
865 spin_lock_irqsave(&inst->state_spinlock, flags);
866
867 if (m2m_ctx->is_draining) {
868 ret = -EBUSY;
869 goto unlock_and_return;
870 }
871
872 if (m2m_ctx->has_stopped)
873 m2m_ctx->has_stopped = false;
874
875 vb2_clear_last_buffer_dequeued(dst_vq);
876 inst->eos = false;
877
878 unlock_and_return:
879 spin_unlock_irqrestore(&inst->state_spinlock, flags);
880 return ret;
881 }
882
wave5_vpu_dec_decoder_cmd(struct file * file,void * fh,struct v4l2_decoder_cmd * dc)883 static int wave5_vpu_dec_decoder_cmd(struct file *file, void *fh, struct v4l2_decoder_cmd *dc)
884 {
885 struct vpu_instance *inst = file_to_vpu_inst(file);
886 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
887 int ret;
888
889 dev_dbg(inst->dev->dev, "decoder command: %u\n", dc->cmd);
890
891 ret = v4l2_m2m_ioctl_try_decoder_cmd(file, fh, dc);
892 if (ret)
893 return ret;
894
895 switch (dc->cmd) {
896 case V4L2_DEC_CMD_STOP:
897 ret = wave5_vpu_dec_stop(inst);
898 /* Just in case we don't have anything to decode anymore */
899 v4l2_m2m_try_schedule(m2m_ctx);
900 break;
901 case V4L2_DEC_CMD_START:
902 ret = wave5_vpu_dec_start(inst);
903 break;
904 default:
905 ret = -EINVAL;
906 }
907
908 return ret;
909 }
910
911 static const struct v4l2_ioctl_ops wave5_vpu_dec_ioctl_ops = {
912 .vidioc_querycap = wave5_vpu_dec_querycap,
913 .vidioc_enum_framesizes = wave5_vpu_dec_enum_framesizes,
914
915 .vidioc_enum_fmt_vid_cap = wave5_vpu_dec_enum_fmt_cap,
916 .vidioc_s_fmt_vid_cap_mplane = wave5_vpu_dec_s_fmt_cap,
917 .vidioc_g_fmt_vid_cap_mplane = wave5_vpu_dec_g_fmt_cap,
918 .vidioc_try_fmt_vid_cap_mplane = wave5_vpu_dec_try_fmt_cap,
919
920 .vidioc_enum_fmt_vid_out = wave5_vpu_dec_enum_fmt_out,
921 .vidioc_s_fmt_vid_out_mplane = wave5_vpu_dec_s_fmt_out,
922 .vidioc_g_fmt_vid_out_mplane = wave5_vpu_g_fmt_out,
923 .vidioc_try_fmt_vid_out_mplane = wave5_vpu_dec_try_fmt_out,
924
925 .vidioc_g_selection = wave5_vpu_dec_g_selection,
926 .vidioc_s_selection = wave5_vpu_dec_s_selection,
927
928 .vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs,
929 /*
930 * Firmware does not support CREATE_BUFS for CAPTURE queue. Since
931 * there is no immediate use-case for supporting CREATE_BUFS on
932 * just the OUTPUT queue, disable CREATE_BUFS altogether.
933 */
934 .vidioc_querybuf = v4l2_m2m_ioctl_querybuf,
935 .vidioc_prepare_buf = v4l2_m2m_ioctl_prepare_buf,
936 .vidioc_qbuf = v4l2_m2m_ioctl_qbuf,
937 .vidioc_expbuf = v4l2_m2m_ioctl_expbuf,
938 .vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf,
939 .vidioc_streamon = v4l2_m2m_ioctl_streamon,
940 .vidioc_streamoff = v4l2_m2m_ioctl_streamoff,
941
942 .vidioc_try_decoder_cmd = v4l2_m2m_ioctl_try_decoder_cmd,
943 .vidioc_decoder_cmd = wave5_vpu_dec_decoder_cmd,
944
945 .vidioc_subscribe_event = wave5_vpu_subscribe_event,
946 .vidioc_unsubscribe_event = v4l2_event_unsubscribe,
947 };
948
wave5_vpu_dec_queue_setup(struct vb2_queue * q,unsigned int * num_buffers,unsigned int * num_planes,unsigned int sizes[],struct device * alloc_devs[])949 static int wave5_vpu_dec_queue_setup(struct vb2_queue *q, unsigned int *num_buffers,
950 unsigned int *num_planes, unsigned int sizes[],
951 struct device *alloc_devs[])
952 {
953 struct vpu_instance *inst = vb2_get_drv_priv(q);
954 struct v4l2_pix_format_mplane inst_format =
955 (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) ? inst->src_fmt : inst->dst_fmt;
956 unsigned int i;
957
958 dev_dbg(inst->dev->dev, "%s: num_buffers: %u | num_planes: %u | type: %u\n", __func__,
959 *num_buffers, *num_planes, q->type);
960
961 *num_planes = inst_format.num_planes;
962
963 if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
964 sizes[0] = inst_format.plane_fmt[0].sizeimage;
965 dev_dbg(inst->dev->dev, "%s: size[0]: %u\n", __func__, sizes[0]);
966 } else if (q->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
967 if (*num_buffers < inst->fbc_buf_count)
968 *num_buffers = inst->fbc_buf_count;
969
970 for (i = 0; i < *num_planes; i++) {
971 sizes[i] = inst_format.plane_fmt[i].sizeimage;
972 dev_dbg(inst->dev->dev, "%s: size[%u]: %u\n", __func__, i, sizes[i]);
973 }
974 }
975
976 return 0;
977 }
978
wave5_prepare_fb(struct vpu_instance * inst)979 static int wave5_prepare_fb(struct vpu_instance *inst)
980 {
981 int linear_num;
982 int non_linear_num;
983 int fb_stride = 0, fb_height = 0;
984 int luma_size, chroma_size;
985 int ret, i;
986 struct v4l2_m2m_buffer *buf, *n;
987 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
988 u32 bitdepth = inst->codec_info->dec_info.initial_info.luma_bitdepth;
989
990 switch (bitdepth) {
991 case 8:
992 break;
993 case 10:
994 if (inst->std == W_HEVC_DEC &&
995 inst->dev->attr.support_hevc10bit_dec)
996 break;
997
998 fallthrough;
999 default:
1000 dev_err(inst->dev->dev, "no support for %d bit depth\n", bitdepth);
1001
1002 return -EINVAL;
1003 }
1004
1005 linear_num = v4l2_m2m_num_dst_bufs_ready(m2m_ctx);
1006 non_linear_num = inst->fbc_buf_count;
1007
1008 for (i = 0; i < non_linear_num; i++) {
1009 struct frame_buffer *frame = &inst->frame_buf[i];
1010 struct vpu_buf *vframe = &inst->frame_vbuf[i];
1011
1012 fb_stride = ALIGN(inst->dst_fmt.width * bitdepth / 8, 32);
1013 fb_height = ALIGN(inst->dst_fmt.height, 32);
1014 luma_size = fb_stride * fb_height;
1015
1016 chroma_size = ALIGN(fb_stride / 2, 16) * fb_height;
1017
1018 if (vframe->size == (luma_size + chroma_size))
1019 continue;
1020
1021 if (vframe->size)
1022 wave5_vpu_dec_reset_framebuffer(inst, i);
1023
1024 vframe->size = luma_size + chroma_size;
1025 ret = wave5_vdi_allocate_dma_memory(inst->dev, vframe);
1026 if (ret) {
1027 dev_dbg(inst->dev->dev,
1028 "%s: Allocating FBC buf of size %zu, fail: %d\n",
1029 __func__, vframe->size, ret);
1030 return ret;
1031 }
1032
1033 frame->buf_y = vframe->daddr;
1034 frame->buf_cb = vframe->daddr + luma_size;
1035 frame->buf_cr = (dma_addr_t)-1;
1036 frame->size = vframe->size;
1037 frame->width = inst->src_fmt.width;
1038 frame->stride = fb_stride;
1039 frame->map_type = COMPRESSED_FRAME_MAP;
1040 frame->update_fb_info = true;
1041 }
1042 /* In case the count has reduced, clean up leftover framebuffer memory */
1043 for (i = non_linear_num; i < MAX_REG_FRAME; i++) {
1044 ret = wave5_vpu_dec_reset_framebuffer(inst, i);
1045 if (ret)
1046 break;
1047 }
1048
1049 for (i = 0; i < linear_num; i++) {
1050 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1051 struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
1052 struct vb2_buffer *vb = vb2_get_buffer(dst_vq, i);
1053 struct frame_buffer *frame = &inst->frame_buf[non_linear_num + i];
1054 dma_addr_t buf_addr_y = 0, buf_addr_cb = 0, buf_addr_cr = 0;
1055 u32 buf_size = 0;
1056 u32 fb_stride = inst->dst_fmt.width;
1057 u32 luma_size = fb_stride * inst->dst_fmt.height;
1058 u32 chroma_size;
1059
1060 if (inst->output_format == FORMAT_422)
1061 chroma_size = fb_stride * inst->dst_fmt.height / 2;
1062 else
1063 chroma_size = fb_stride * inst->dst_fmt.height / 4;
1064
1065 if (inst->dst_fmt.num_planes == 1) {
1066 buf_size = vb2_plane_size(vb, 0);
1067 buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0);
1068 buf_addr_cb = buf_addr_y + luma_size;
1069 buf_addr_cr = buf_addr_cb + chroma_size;
1070 } else if (inst->dst_fmt.num_planes == 2) {
1071 buf_size = vb2_plane_size(vb, 0) +
1072 vb2_plane_size(vb, 1);
1073 buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0);
1074 buf_addr_cb = vb2_dma_contig_plane_dma_addr(vb, 1);
1075 buf_addr_cr = buf_addr_cb + chroma_size;
1076 } else if (inst->dst_fmt.num_planes == 3) {
1077 buf_size = vb2_plane_size(vb, 0) +
1078 vb2_plane_size(vb, 1) +
1079 vb2_plane_size(vb, 2);
1080 buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0);
1081 buf_addr_cb = vb2_dma_contig_plane_dma_addr(vb, 1);
1082 buf_addr_cr = vb2_dma_contig_plane_dma_addr(vb, 2);
1083 }
1084
1085 frame->buf_y = buf_addr_y;
1086 frame->buf_cb = buf_addr_cb;
1087 frame->buf_cr = buf_addr_cr;
1088 frame->size = buf_size;
1089 frame->width = inst->src_fmt.width;
1090 frame->stride = fb_stride;
1091 frame->map_type = LINEAR_FRAME_MAP;
1092 frame->update_fb_info = true;
1093 }
1094
1095 ret = wave5_vpu_dec_register_frame_buffer_ex(inst, non_linear_num, linear_num,
1096 fb_stride, inst->dst_fmt.height);
1097 if (ret) {
1098 dev_dbg(inst->dev->dev, "%s: vpu_dec_register_frame_buffer_ex fail: %d",
1099 __func__, ret);
1100 return ret;
1101 }
1102
1103 /*
1104 * Mark all frame buffers as out of display, to avoid using them before
1105 * the application have them queued.
1106 */
1107 for (i = 0; i < v4l2_m2m_num_dst_bufs_ready(m2m_ctx); i++) {
1108 ret = wave5_vpu_dec_set_disp_flag(inst, i);
1109 if (ret) {
1110 dev_dbg(inst->dev->dev,
1111 "%s: Setting display flag of buf index: %u, fail: %d\n",
1112 __func__, i, ret);
1113 }
1114 }
1115
1116 v4l2_m2m_for_each_dst_buf_safe(m2m_ctx, buf, n) {
1117 struct vb2_v4l2_buffer *vbuf = &buf->vb;
1118
1119 ret = wave5_vpu_dec_clr_disp_flag(inst, vbuf->vb2_buf.index);
1120 if (ret)
1121 dev_dbg(inst->dev->dev,
1122 "%s: Clearing display flag of buf index: %u, fail: %d\n",
1123 __func__, i, ret);
1124 }
1125
1126 return 0;
1127 }
1128
write_to_ringbuffer(struct vpu_instance * inst,void * buffer,size_t buffer_size,struct vpu_buf * ring_buffer,dma_addr_t wr_ptr)1129 static int write_to_ringbuffer(struct vpu_instance *inst, void *buffer, size_t buffer_size,
1130 struct vpu_buf *ring_buffer, dma_addr_t wr_ptr)
1131 {
1132 size_t size;
1133 size_t offset = wr_ptr - ring_buffer->daddr;
1134 int ret;
1135
1136 if (wr_ptr + buffer_size > ring_buffer->daddr + ring_buffer->size) {
1137 size = ring_buffer->daddr + ring_buffer->size - wr_ptr;
1138 ret = wave5_vdi_write_memory(inst->dev, ring_buffer, offset, (u8 *)buffer, size);
1139 if (ret < 0)
1140 return ret;
1141
1142 ret = wave5_vdi_write_memory(inst->dev, ring_buffer, 0, (u8 *)buffer + size,
1143 buffer_size - size);
1144 if (ret < 0)
1145 return ret;
1146 } else {
1147 ret = wave5_vdi_write_memory(inst->dev, ring_buffer, offset, (u8 *)buffer,
1148 buffer_size);
1149 if (ret < 0)
1150 return ret;
1151 }
1152
1153 return 0;
1154 }
1155
inst_src_buf_remove(struct vpu_instance * inst)1156 static struct vpu_src_buffer *inst_src_buf_remove(struct vpu_instance *inst)
1157 {
1158 struct vpu_src_buffer *b;
1159 int ret;
1160
1161 ret = mutex_lock_interruptible(&inst->feed_lock);
1162 if (ret)
1163 return NULL;
1164
1165 if (list_empty(&inst->avail_src_bufs)) {
1166 mutex_unlock(&inst->feed_lock);
1167 return NULL;
1168 }
1169 b = list_first_entry(&inst->avail_src_bufs, struct vpu_src_buffer, list);
1170 list_del_init(&b->list);
1171 mutex_unlock(&inst->feed_lock);
1172 return b;
1173 }
1174
fill_ringbuffer(struct vpu_instance * inst)1175 static int fill_ringbuffer(struct vpu_instance *inst)
1176 {
1177 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1178 struct vpu_src_buffer *vpu_buf;
1179 int ret = 0;
1180
1181 if (m2m_ctx->last_src_buf) {
1182 struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(m2m_ctx->last_src_buf);
1183
1184 if (vpu_buf->consumed) {
1185 dev_dbg(inst->dev->dev, "last src buffer already written\n");
1186 return 0;
1187 }
1188 }
1189
1190 while ((vpu_buf = inst_src_buf_remove(inst)) != NULL) {
1191 struct vb2_v4l2_buffer *vbuf = &vpu_buf->v4l2_m2m_buf.vb;
1192 struct vpu_buf *ring_buffer = &inst->bitstream_vbuf;
1193 size_t src_size = vb2_get_plane_payload(&vbuf->vb2_buf, 0);
1194 void *src_buf = vb2_plane_vaddr(&vbuf->vb2_buf, 0);
1195 dma_addr_t rd_ptr = 0;
1196 dma_addr_t wr_ptr = 0;
1197 size_t remain_size = 0;
1198
1199 if (vpu_buf->consumed) {
1200 dev_dbg(inst->dev->dev, "already copied src buf (%u) to the ring buffer\n",
1201 vbuf->vb2_buf.index);
1202 continue;
1203 }
1204
1205 if (!src_buf) {
1206 dev_dbg(inst->dev->dev,
1207 "%s: Acquiring kernel pointer to src buf (%u), fail\n",
1208 __func__, vbuf->vb2_buf.index);
1209 break;
1210 }
1211
1212 ret = wave5_vpu_dec_get_bitstream_buffer(inst, &rd_ptr, &wr_ptr, &remain_size);
1213 if (ret) {
1214 /* Unable to acquire the mutex */
1215 dev_err(inst->dev->dev, "Getting the bitstream buffer, fail: %d\n",
1216 ret);
1217 return ret;
1218 }
1219
1220 dev_dbg(inst->dev->dev, "%s: rd_ptr %pad wr_ptr %pad", __func__, &rd_ptr, &wr_ptr);
1221
1222 if (remain_size < src_size) {
1223 dev_dbg(inst->dev->dev,
1224 "%s: remaining size: %zu < source size: %zu for src buf (%u)\n",
1225 __func__, remain_size, src_size, vbuf->vb2_buf.index);
1226 break;
1227 }
1228
1229 ret = write_to_ringbuffer(inst, src_buf, src_size, ring_buffer, wr_ptr);
1230 if (ret) {
1231 dev_err(inst->dev->dev, "Write src buf (%u) to ring buffer, fail: %d\n",
1232 vbuf->vb2_buf.index, ret);
1233 return ret;
1234 }
1235
1236 ret = wave5_vpu_dec_update_bitstream_buffer(inst, src_size);
1237 if (ret) {
1238 dev_dbg(inst->dev->dev,
1239 "update_bitstream_buffer fail: %d for src buf (%u)\n",
1240 ret, vbuf->vb2_buf.index);
1241 break;
1242 }
1243
1244 vpu_buf->consumed = true;
1245
1246 /* Don't write buffers passed the last one while draining. */
1247 if (v4l2_m2m_is_last_draining_src_buf(m2m_ctx, vbuf)) {
1248 dev_dbg(inst->dev->dev, "last src buffer written to the ring buffer\n");
1249 break;
1250 }
1251
1252 inst->queuing_num++;
1253 break;
1254 }
1255
1256 return ret;
1257 }
1258
wave5_vpu_dec_buf_queue_src(struct vb2_buffer * vb)1259 static void wave5_vpu_dec_buf_queue_src(struct vb2_buffer *vb)
1260 {
1261 struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue);
1262 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1263 struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
1264 struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(vbuf);
1265 int ret;
1266
1267 vpu_buf->consumed = false;
1268 vbuf->sequence = inst->queued_src_buf_num++;
1269 ret = mutex_lock_interruptible(&inst->feed_lock);
1270 if (ret)
1271 return;
1272 INIT_LIST_HEAD(&vpu_buf->list);
1273 list_add_tail(&vpu_buf->list, &inst->avail_src_bufs);
1274 mutex_unlock(&inst->feed_lock);
1275 v4l2_m2m_buf_queue(m2m_ctx, vbuf);
1276 }
1277
wave5_vpu_dec_buf_queue_dst(struct vb2_buffer * vb)1278 static void wave5_vpu_dec_buf_queue_dst(struct vb2_buffer *vb)
1279 {
1280 struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
1281 struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue);
1282 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1283
1284 pm_runtime_resume_and_get(inst->dev->dev);
1285 vbuf->sequence = inst->queued_dst_buf_num++;
1286
1287 if (inst->state == VPU_INST_STATE_PIC_RUN) {
1288 struct vpu_dst_buffer *vpu_buf = wave5_to_vpu_dst_buf(vbuf);
1289 int ret;
1290
1291 /*
1292 * The buffer is already registered just clear the display flag
1293 * to let the firmware know it can be used.
1294 */
1295 vpu_buf->display = false;
1296 ret = wave5_vpu_dec_clr_disp_flag(inst, vb->index);
1297 if (ret) {
1298 dev_dbg(inst->dev->dev,
1299 "%s: Clearing the display flag of buffer index: %u, fail: %d\n",
1300 __func__, vb->index, ret);
1301 }
1302 }
1303
1304 if (vb2_is_streaming(vb->vb2_queue) && v4l2_m2m_dst_buf_is_last(m2m_ctx)) {
1305 unsigned int i;
1306
1307 for (i = 0; i < vb->num_planes; i++)
1308 vb2_set_plane_payload(vb, i, 0);
1309
1310 vbuf->field = V4L2_FIELD_NONE;
1311
1312 send_eos_event(inst);
1313 v4l2_m2m_last_buffer_done(m2m_ctx, vbuf);
1314 } else {
1315 v4l2_m2m_buf_queue(m2m_ctx, vbuf);
1316 }
1317 pm_runtime_put_autosuspend(inst->dev->dev);
1318 }
1319
wave5_vpu_dec_buf_queue(struct vb2_buffer * vb)1320 static void wave5_vpu_dec_buf_queue(struct vb2_buffer *vb)
1321 {
1322 struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
1323 struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue);
1324
1325 dev_dbg(inst->dev->dev, "%s: type: %4u index: %4u size: ([0]=%4lu, [1]=%4lu, [2]=%4lu)\n",
1326 __func__, vb->type, vb->index, vb2_plane_size(&vbuf->vb2_buf, 0),
1327 vb2_plane_size(&vbuf->vb2_buf, 1), vb2_plane_size(&vbuf->vb2_buf, 2));
1328
1329 if (vb->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
1330 wave5_vpu_dec_buf_queue_src(vb);
1331 if (inst->empty_queue)
1332 inst->empty_queue = false;
1333 } else if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
1334 wave5_vpu_dec_buf_queue_dst(vb);
1335 }
1336 }
1337
wave5_vpu_dec_allocate_ring_buffer(struct vpu_instance * inst)1338 static int wave5_vpu_dec_allocate_ring_buffer(struct vpu_instance *inst)
1339 {
1340 int ret;
1341 struct vpu_buf *ring_buffer = &inst->bitstream_vbuf;
1342
1343 ring_buffer->size = ALIGN(inst->src_fmt.plane_fmt[0].sizeimage, 1024) * 4;
1344 ret = wave5_vdi_allocate_dma_memory(inst->dev, ring_buffer);
1345 if (ret) {
1346 dev_dbg(inst->dev->dev, "%s: allocate ring buffer of size %zu fail: %d\n",
1347 __func__, ring_buffer->size, ret);
1348 return ret;
1349 }
1350
1351 inst->last_rd_ptr = ring_buffer->daddr;
1352
1353 return 0;
1354 }
1355
wave5_vpu_dec_start_streaming(struct vb2_queue * q,unsigned int count)1356 static int wave5_vpu_dec_start_streaming(struct vb2_queue *q, unsigned int count)
1357 {
1358 struct vpu_instance *inst = vb2_get_drv_priv(q);
1359 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1360 int ret = 0;
1361
1362 dev_dbg(inst->dev->dev, "%s: type: %u\n", __func__, q->type);
1363 pm_runtime_resume_and_get(inst->dev->dev);
1364
1365 v4l2_m2m_update_start_streaming_state(m2m_ctx, q);
1366
1367 if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE && inst->state == VPU_INST_STATE_NONE) {
1368 struct dec_open_param open_param;
1369
1370 memset(&open_param, 0, sizeof(struct dec_open_param));
1371
1372 ret = wave5_vpu_dec_allocate_ring_buffer(inst);
1373 if (ret)
1374 goto return_buffers;
1375
1376 open_param.bitstream_buffer = inst->bitstream_vbuf.daddr;
1377 open_param.bitstream_buffer_size = inst->bitstream_vbuf.size;
1378
1379 ret = wave5_vpu_dec_open(inst, &open_param);
1380 if (ret) {
1381 dev_dbg(inst->dev->dev, "%s: decoder opening, fail: %d\n",
1382 __func__, ret);
1383 goto free_bitstream_vbuf;
1384 }
1385
1386 ret = switch_state(inst, VPU_INST_STATE_OPEN);
1387 if (ret)
1388 goto free_bitstream_vbuf;
1389 } else if (q->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
1390 struct dec_initial_info *initial_info =
1391 &inst->codec_info->dec_info.initial_info;
1392
1393 if (inst->state == VPU_INST_STATE_STOP)
1394 ret = switch_state(inst, VPU_INST_STATE_INIT_SEQ);
1395 if (ret)
1396 goto return_buffers;
1397
1398 if (inst->state == VPU_INST_STATE_INIT_SEQ &&
1399 inst->dev->product_code == WAVE521C_CODE) {
1400 if (initial_info->luma_bitdepth != 8) {
1401 dev_info(inst->dev->dev, "%s: no support for %d bit depth",
1402 __func__, initial_info->luma_bitdepth);
1403 ret = -EINVAL;
1404 goto return_buffers;
1405 }
1406 }
1407
1408 }
1409 pm_runtime_put_autosuspend(inst->dev->dev);
1410 return ret;
1411
1412 free_bitstream_vbuf:
1413 wave5_vdi_free_dma_memory(inst->dev, &inst->bitstream_vbuf);
1414 return_buffers:
1415 wave5_return_bufs(q, VB2_BUF_STATE_QUEUED);
1416 pm_runtime_put_autosuspend(inst->dev->dev);
1417 return ret;
1418 }
1419
streamoff_output(struct vb2_queue * q)1420 static int streamoff_output(struct vb2_queue *q)
1421 {
1422 struct vpu_instance *inst = vb2_get_drv_priv(q);
1423 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1424 struct vb2_v4l2_buffer *buf;
1425 int ret;
1426 dma_addr_t new_rd_ptr;
1427 struct dec_output_info dec_info;
1428 unsigned int i;
1429 struct vpu_src_buffer *vpu_buf;
1430
1431 inst->retry = false;
1432 inst->queuing_num = 0;
1433 while ((vpu_buf = inst_src_buf_remove(inst)) != NULL)
1434 ;
1435
1436 for (i = 0; i < v4l2_m2m_num_dst_bufs_ready(m2m_ctx); i++) {
1437 ret = wave5_vpu_dec_set_disp_flag(inst, i);
1438 if (ret)
1439 dev_dbg(inst->dev->dev,
1440 "%s: Setting display flag of buf index: %u, fail: %d\n",
1441 __func__, i, ret);
1442 }
1443
1444 while ((buf = v4l2_m2m_src_buf_remove(m2m_ctx))) {
1445 dev_dbg(inst->dev->dev, "%s: (Multiplanar) buf type %4u | index %4u\n",
1446 __func__, buf->vb2_buf.type, buf->vb2_buf.index);
1447 v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR);
1448 }
1449
1450 while (wave5_vpu_dec_get_output_info(inst, &dec_info) == 0) {
1451 if (dec_info.index_frame_display >= 0)
1452 wave5_vpu_dec_set_disp_flag(inst, dec_info.index_frame_display);
1453 }
1454
1455 ret = wave5_vpu_flush_instance(inst);
1456 if (ret)
1457 return ret;
1458
1459 /* Reset the ring buffer information */
1460 new_rd_ptr = wave5_vpu_dec_get_rd_ptr(inst);
1461 inst->last_rd_ptr = new_rd_ptr;
1462 inst->codec_info->dec_info.stream_rd_ptr = new_rd_ptr;
1463 inst->codec_info->dec_info.stream_wr_ptr = new_rd_ptr;
1464
1465 if (v4l2_m2m_has_stopped(m2m_ctx))
1466 send_eos_event(inst);
1467
1468 /* streamoff on output cancels any draining operation */
1469 inst->eos = false;
1470
1471 return 0;
1472 }
1473
streamoff_capture(struct vb2_queue * q)1474 static int streamoff_capture(struct vb2_queue *q)
1475 {
1476 struct vpu_instance *inst = vb2_get_drv_priv(q);
1477 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1478 struct vb2_v4l2_buffer *buf;
1479 unsigned int i;
1480 int ret = 0;
1481
1482 for (i = 0; i < v4l2_m2m_num_dst_bufs_ready(m2m_ctx); i++) {
1483 ret = wave5_vpu_dec_set_disp_flag(inst, i);
1484 if (ret)
1485 dev_dbg(inst->dev->dev,
1486 "%s: Setting display flag of buf index: %u, fail: %d\n",
1487 __func__, i, ret);
1488 }
1489
1490 while ((buf = v4l2_m2m_dst_buf_remove(m2m_ctx))) {
1491 u32 plane;
1492
1493 dev_dbg(inst->dev->dev, "%s: buf type %4u | index %4u\n",
1494 __func__, buf->vb2_buf.type, buf->vb2_buf.index);
1495
1496 for (plane = 0; plane < inst->dst_fmt.num_planes; plane++)
1497 vb2_set_plane_payload(&buf->vb2_buf, plane, 0);
1498
1499 v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR);
1500 }
1501
1502 if (inst->needs_reallocation) {
1503 wave5_vpu_dec_give_command(inst, DEC_RESET_FRAMEBUF_INFO, NULL);
1504 inst->needs_reallocation = false;
1505 }
1506
1507 if (v4l2_m2m_has_stopped(m2m_ctx)) {
1508 ret = switch_state(inst, VPU_INST_STATE_INIT_SEQ);
1509 if (ret)
1510 return ret;
1511 }
1512
1513 return 0;
1514 }
1515
wave5_vpu_dec_stop_streaming(struct vb2_queue * q)1516 static void wave5_vpu_dec_stop_streaming(struct vb2_queue *q)
1517 {
1518 struct vpu_instance *inst = vb2_get_drv_priv(q);
1519 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1520
1521 bool check_cmd = TRUE;
1522
1523 dev_dbg(inst->dev->dev, "%s: type: %u\n", __func__, q->type);
1524 pm_runtime_resume_and_get(inst->dev->dev);
1525 inst->empty_queue = true;
1526 while (check_cmd) {
1527 struct queue_status_info q_status;
1528 struct dec_output_info dec_output_info;
1529
1530 wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status);
1531 if ((inst->state == VPU_INST_STATE_STOP ||
1532 inst->state == VPU_INST_STATE_INIT_SEQ ||
1533 q_status.instance_queue_count == 0) &&
1534 q_status.report_queue_count == 0)
1535 break;
1536
1537 if (wave5_vpu_dec_get_output_info(inst, &dec_output_info))
1538 dev_dbg(inst->dev->dev, "there is no output info\n");
1539 }
1540
1541 v4l2_m2m_update_stop_streaming_state(m2m_ctx, q);
1542
1543 if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
1544 streamoff_output(q);
1545 else
1546 streamoff_capture(q);
1547
1548 inst->empty_queue = false;
1549 inst->sent_eos = false;
1550 pm_runtime_put_autosuspend(inst->dev->dev);
1551 }
1552
1553 static const struct vb2_ops wave5_vpu_dec_vb2_ops = {
1554 .queue_setup = wave5_vpu_dec_queue_setup,
1555 .buf_queue = wave5_vpu_dec_buf_queue,
1556 .start_streaming = wave5_vpu_dec_start_streaming,
1557 .stop_streaming = wave5_vpu_dec_stop_streaming,
1558 };
1559
wave5_set_default_format(struct v4l2_pix_format_mplane * src_fmt,struct v4l2_pix_format_mplane * dst_fmt)1560 static void wave5_set_default_format(struct v4l2_pix_format_mplane *src_fmt,
1561 struct v4l2_pix_format_mplane *dst_fmt)
1562 {
1563 src_fmt->pixelformat = dec_fmt_list[VPU_FMT_TYPE_CODEC][0].v4l2_pix_fmt;
1564 wave5_update_pix_fmt(src_fmt, VPU_FMT_TYPE_CODEC,
1565 W5_DEF_DEC_PIC_WIDTH, W5_DEF_DEC_PIC_HEIGHT,
1566 &dec_hevc_frmsize);
1567
1568 dst_fmt->pixelformat = dec_fmt_list[VPU_FMT_TYPE_RAW][0].v4l2_pix_fmt;
1569 wave5_update_pix_fmt(dst_fmt, VPU_FMT_TYPE_RAW,
1570 W5_DEF_DEC_PIC_WIDTH, W5_DEF_DEC_PIC_HEIGHT,
1571 &dec_raw_frmsize);
1572 }
1573
wave5_vpu_dec_queue_init(void * priv,struct vb2_queue * src_vq,struct vb2_queue * dst_vq)1574 static int wave5_vpu_dec_queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq)
1575 {
1576 return wave5_vpu_queue_init(priv, src_vq, dst_vq, &wave5_vpu_dec_vb2_ops);
1577 }
1578
1579 static const struct vpu_instance_ops wave5_vpu_dec_inst_ops = {
1580 .finish_process = wave5_vpu_dec_finish_decode,
1581 };
1582
initialize_sequence(struct vpu_instance * inst)1583 static int initialize_sequence(struct vpu_instance *inst)
1584 {
1585 struct dec_initial_info initial_info;
1586 int ret = 0;
1587
1588 memset(&initial_info, 0, sizeof(struct dec_initial_info));
1589
1590 ret = wave5_vpu_dec_issue_seq_init(inst);
1591 if (ret) {
1592 dev_dbg(inst->dev->dev, "%s: wave5_vpu_dec_issue_seq_init, fail: %d\n",
1593 __func__, ret);
1594 return ret;
1595 }
1596
1597 if (wave5_vpu_wait_interrupt(inst, VPU_DEC_TIMEOUT) < 0)
1598 dev_dbg(inst->dev->dev, "%s: failed to call vpu_wait_interrupt()\n", __func__);
1599
1600 ret = wave5_vpu_dec_complete_seq_init(inst, &initial_info);
1601 if (ret) {
1602 dev_dbg(inst->dev->dev, "%s: vpu_dec_complete_seq_init, fail: %d, reason: %u\n",
1603 __func__, ret, initial_info.seq_init_err_reason);
1604 wave5_handle_src_buffer(inst, initial_info.rd_ptr);
1605 return ret;
1606 }
1607
1608 handle_dynamic_resolution_change(inst);
1609
1610 return 0;
1611 }
1612
wave5_is_draining_or_eos(struct vpu_instance * inst)1613 static bool wave5_is_draining_or_eos(struct vpu_instance *inst)
1614 {
1615 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1616
1617 lockdep_assert_held(&inst->state_spinlock);
1618 return m2m_ctx->is_draining || inst->eos;
1619 }
1620
wave5_vpu_dec_device_run(void * priv)1621 static void wave5_vpu_dec_device_run(void *priv)
1622 {
1623 struct vpu_instance *inst = priv;
1624 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1625 struct queue_status_info q_status;
1626 u32 fail_res = 0;
1627 int ret = 0;
1628
1629 dev_dbg(inst->dev->dev, "%s: Fill the ring buffer with new bitstream data", __func__);
1630 pm_runtime_resume_and_get(inst->dev->dev);
1631 if (!inst->retry) {
1632 ret = fill_ringbuffer(inst);
1633 if (ret < 0) {
1634 dev_warn(inst->dev->dev, "Filling ring buffer failed\n");
1635 goto finish_job_and_return;
1636 } else if (!inst->eos &&
1637 inst->queuing_num == 0 &&
1638 inst->state == VPU_INST_STATE_PIC_RUN) {
1639 dev_dbg(inst->dev->dev, "%s: no bitstream for feeding, so skip ", __func__);
1640 inst->empty_queue = true;
1641 goto finish_job_and_return;
1642 }
1643 }
1644
1645 switch (inst->state) {
1646 case VPU_INST_STATE_OPEN:
1647 ret = initialize_sequence(inst);
1648 if (ret) {
1649 unsigned long flags;
1650
1651 spin_lock_irqsave(&inst->state_spinlock, flags);
1652 if (wave5_is_draining_or_eos(inst) &&
1653 wave5_last_src_buffer_consumed(m2m_ctx)) {
1654 struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
1655
1656 switch_state(inst, VPU_INST_STATE_STOP);
1657
1658 if (vb2_is_streaming(dst_vq))
1659 send_eos_event(inst);
1660 else
1661 handle_dynamic_resolution_change(inst);
1662
1663 flag_last_buffer_done(inst);
1664 }
1665 spin_unlock_irqrestore(&inst->state_spinlock, flags);
1666 } else {
1667 set_instance_state(inst, VPU_INST_STATE_INIT_SEQ);
1668 }
1669
1670 break;
1671
1672 case VPU_INST_STATE_INIT_SEQ:
1673 /*
1674 * Do this early, preparing the fb can trigger an IRQ before
1675 * we had a chance to switch, which leads to an invalid state
1676 * change.
1677 */
1678 set_instance_state(inst, VPU_INST_STATE_PIC_RUN);
1679 /*
1680 * During DRC, the picture decoding remains pending, so just leave the job
1681 * active until this decode operation completes.
1682 */
1683 wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status);
1684
1685 /*
1686 * The sequence must be analyzed first to calculate the proper
1687 * size of the auxiliary buffers.
1688 */
1689 ret = wave5_prepare_fb(inst);
1690 if (ret) {
1691 dev_warn(inst->dev->dev, "Framebuffer preparation, fail: %d\n", ret);
1692 set_instance_state(inst, VPU_INST_STATE_STOP);
1693 break;
1694 }
1695
1696 if (q_status.instance_queue_count)
1697 goto finish_job_and_return;
1698
1699 fallthrough;
1700 case VPU_INST_STATE_PIC_RUN:
1701 ret = start_decode(inst, &fail_res);
1702 if (ret) {
1703 dev_err(inst->dev->dev,
1704 "Frame decoding on m2m context (%p), fail: %d (result: %d)\n",
1705 m2m_ctx, ret, fail_res);
1706 goto finish_job_and_return;
1707 }
1708
1709 if (fail_res == WAVE5_SYSERR_QUEUEING_FAIL) {
1710 inst->retry = true;
1711 inst->queuing_fail = true;
1712 } else {
1713 inst->retry = false;
1714 if (!inst->eos)
1715 inst->queuing_num--;
1716 }
1717 break;
1718 default:
1719 dev_dbg(inst->dev->dev, "Execution of a job in state %s illegal.\n",
1720 state_to_str(inst->state));
1721 }
1722
1723 finish_job_and_return:
1724 dev_dbg(inst->dev->dev, "%s: leave and finish job", __func__);
1725 pm_runtime_put_autosuspend(inst->dev->dev);
1726 /*
1727 * After receiving CMD_STOP, there is no input, but we have to run device_run
1728 * to send DEC_PIC command until display index == -1, so job_finish was always
1729 * called in the device_run to archive it, the logic was very wasteful
1730 * in power and CPU time.
1731 * If EOS is passed, device_run will not call job_finish no more, it is called
1732 * only if HW is idle status in order to reduce overhead.
1733 */
1734 if (!inst->sent_eos)
1735 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
1736 }
1737
wave5_vpu_dec_job_abort(void * priv)1738 static void wave5_vpu_dec_job_abort(void *priv)
1739 {
1740 struct vpu_instance *inst = priv;
1741 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1742 int ret;
1743
1744 ret = set_instance_state(inst, VPU_INST_STATE_STOP);
1745 if (ret)
1746 return;
1747
1748 ret = wave5_vpu_dec_set_eos_on_firmware(inst);
1749 if (ret)
1750 dev_warn(inst->dev->dev,
1751 "Setting EOS for the bitstream, fail: %d\n", ret);
1752
1753 v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
1754 }
1755
wave5_vpu_dec_job_ready(void * priv)1756 static int wave5_vpu_dec_job_ready(void *priv)
1757 {
1758 struct vpu_instance *inst = priv;
1759 struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1760 unsigned long flags;
1761 int ret = 0;
1762
1763 spin_lock_irqsave(&inst->state_spinlock, flags);
1764
1765 switch (inst->state) {
1766 case VPU_INST_STATE_NONE:
1767 dev_dbg(inst->dev->dev, "Decoder must be open to start queueing M2M jobs!\n");
1768 break;
1769 case VPU_INST_STATE_OPEN:
1770 if (wave5_is_draining_or_eos(inst) || !v4l2_m2m_has_stopped(m2m_ctx) ||
1771 v4l2_m2m_num_src_bufs_ready(m2m_ctx) > 0) {
1772 ret = 1;
1773 break;
1774 }
1775
1776 dev_dbg(inst->dev->dev,
1777 "Decoder must be draining or >= 1 OUTPUT queue buffer must be queued!\n");
1778 break;
1779 case VPU_INST_STATE_INIT_SEQ:
1780 case VPU_INST_STATE_PIC_RUN:
1781 if (!m2m_ctx->cap_q_ctx.q.streaming) {
1782 dev_dbg(inst->dev->dev, "CAPTURE queue must be streaming to queue jobs!\n");
1783 break;
1784 } else if (v4l2_m2m_num_dst_bufs_ready(m2m_ctx) < (inst->fbc_buf_count - 1)) {
1785 dev_dbg(inst->dev->dev,
1786 "No capture buffer ready to decode!\n");
1787 break;
1788 } else if (!wave5_is_draining_or_eos(inst) &&
1789 (!v4l2_m2m_num_src_bufs_ready(m2m_ctx) ||
1790 inst->empty_queue)) {
1791 dev_dbg(inst->dev->dev,
1792 "No bitstream data to decode!\n");
1793 break;
1794 } else if (inst->state == VPU_INST_STATE_PIC_RUN &&
1795 !wave5_is_draining_or_eos(inst) &&
1796 inst->queuing_fail) {
1797 break;
1798 }
1799 ret = 1;
1800 break;
1801 case VPU_INST_STATE_STOP:
1802 dev_dbg(inst->dev->dev, "Decoder is stopped, not running.\n");
1803 break;
1804 }
1805
1806 spin_unlock_irqrestore(&inst->state_spinlock, flags);
1807
1808 return ret;
1809 }
1810
1811 static const struct v4l2_m2m_ops wave5_vpu_dec_m2m_ops = {
1812 .device_run = wave5_vpu_dec_device_run,
1813 .job_abort = wave5_vpu_dec_job_abort,
1814 .job_ready = wave5_vpu_dec_job_ready,
1815 };
1816
wave5_vpu_open_dec(struct file * filp)1817 static int wave5_vpu_open_dec(struct file *filp)
1818 {
1819 struct video_device *vdev = video_devdata(filp);
1820 struct vpu_device *dev = video_drvdata(filp);
1821 struct vpu_instance *inst = NULL;
1822 struct v4l2_m2m_ctx *m2m_ctx;
1823 int ret = 0;
1824
1825 inst = kzalloc_obj(*inst);
1826 if (!inst)
1827 return -ENOMEM;
1828
1829 inst->dev = dev;
1830 inst->type = VPU_INST_TYPE_DEC;
1831 inst->ops = &wave5_vpu_dec_inst_ops;
1832
1833 spin_lock_init(&inst->state_spinlock);
1834 mutex_init(&inst->feed_lock);
1835 INIT_LIST_HEAD(&inst->avail_src_bufs);
1836
1837 inst->codec_info = kzalloc_obj(*inst->codec_info);
1838 if (!inst->codec_info) {
1839 kfree(inst);
1840 return -ENOMEM;
1841 }
1842
1843 v4l2_fh_init(&inst->v4l2_fh, vdev);
1844 v4l2_fh_add(&inst->v4l2_fh, filp);
1845
1846 INIT_LIST_HEAD(&inst->list);
1847
1848 inst->v4l2_m2m_dev = inst->dev->v4l2_m2m_dec_dev;
1849 inst->v4l2_fh.m2m_ctx =
1850 v4l2_m2m_ctx_init(inst->v4l2_m2m_dev, inst, wave5_vpu_dec_queue_init);
1851 if (IS_ERR(inst->v4l2_fh.m2m_ctx)) {
1852 ret = PTR_ERR(inst->v4l2_fh.m2m_ctx);
1853 goto cleanup_inst;
1854 }
1855 m2m_ctx = inst->v4l2_fh.m2m_ctx;
1856
1857 v4l2_m2m_set_src_buffered(m2m_ctx, true);
1858 v4l2_m2m_set_dst_buffered(m2m_ctx, true);
1859 /*
1860 * We use the M2M job queue to ensure synchronization of steps where
1861 * needed, as IOCTLs can occur at anytime and we need to run commands on
1862 * the firmware in a specified order.
1863 * In order to initialize the sequence on the firmware within an M2M
1864 * job, the M2M framework needs to be able to queue jobs before
1865 * the CAPTURE queue has been started, because we need the results of the
1866 * initialization to properly prepare the CAPTURE queue with the correct
1867 * amount of buffers.
1868 * By setting ignore_cap_streaming to true the m2m framework will call
1869 * job_ready as soon as the OUTPUT queue is streaming, instead of
1870 * waiting until both the CAPTURE and OUTPUT queues are streaming.
1871 */
1872 m2m_ctx->ignore_cap_streaming = true;
1873
1874 v4l2_ctrl_handler_init(&inst->v4l2_ctrl_hdl, 10);
1875 v4l2_ctrl_new_std(&inst->v4l2_ctrl_hdl, NULL,
1876 V4L2_CID_MIN_BUFFERS_FOR_CAPTURE, 1, 32, 1, 1);
1877
1878 if (inst->v4l2_ctrl_hdl.error) {
1879 ret = -ENODEV;
1880 goto cleanup_inst;
1881 }
1882
1883 inst->v4l2_fh.ctrl_handler = &inst->v4l2_ctrl_hdl;
1884 v4l2_ctrl_handler_setup(&inst->v4l2_ctrl_hdl);
1885
1886 wave5_set_default_format(&inst->src_fmt, &inst->dst_fmt);
1887 inst->colorspace = V4L2_COLORSPACE_REC709;
1888 inst->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
1889 inst->quantization = V4L2_QUANTIZATION_DEFAULT;
1890 inst->xfer_func = V4L2_XFER_FUNC_DEFAULT;
1891
1892 init_completion(&inst->irq_done);
1893 ret = wave5_kfifo_alloc(inst);
1894 if (ret) {
1895 dev_err(inst->dev->dev, "failed to allocate fifo\n");
1896 goto cleanup_inst;
1897 }
1898
1899 inst->id = ida_alloc(&inst->dev->inst_ida, GFP_KERNEL);
1900 if (inst->id < 0) {
1901 dev_warn(inst->dev->dev, "Allocating instance ID, fail: %d\n", inst->id);
1902 ret = inst->id;
1903 goto cleanup_inst;
1904 }
1905
1906 /*
1907 * For Wave515 SRAM memory was already allocated
1908 * at wave5_vpu_dec_register_device()
1909 */
1910 if (inst->dev->product_code != WAVE515_CODE)
1911 wave5_vdi_allocate_sram(inst->dev);
1912
1913 ret = mutex_lock_interruptible(&dev->dev_lock);
1914 if (ret)
1915 goto cleanup_inst;
1916
1917 list_add_tail(&inst->list, &dev->instances);
1918
1919 mutex_unlock(&dev->dev_lock);
1920
1921 return 0;
1922
1923 cleanup_inst:
1924 wave5_cleanup_instance(inst, filp);
1925 return ret;
1926 }
1927
wave5_vpu_dec_release(struct file * filp)1928 static int wave5_vpu_dec_release(struct file *filp)
1929 {
1930 return wave5_vpu_release_device(filp, wave5_vpu_dec_close, "decoder");
1931 }
1932
1933 static const struct v4l2_file_operations wave5_vpu_dec_fops = {
1934 .owner = THIS_MODULE,
1935 .open = wave5_vpu_open_dec,
1936 .release = wave5_vpu_dec_release,
1937 .unlocked_ioctl = video_ioctl2,
1938 .poll = v4l2_m2m_fop_poll,
1939 .mmap = v4l2_m2m_fop_mmap,
1940 };
1941
wave5_vpu_dec_register_device(struct vpu_device * dev)1942 int wave5_vpu_dec_register_device(struct vpu_device *dev)
1943 {
1944 struct video_device *vdev_dec;
1945 int ret;
1946
1947 /*
1948 * Secondary AXI setup for Wave515 is done by INIT_VPU command,
1949 * i.e. wave5_vpu_init(), that's why we allocate SRAM memory early.
1950 */
1951 if (dev->product_code == WAVE515_CODE)
1952 wave5_vdi_allocate_sram(dev);
1953
1954 vdev_dec = devm_kzalloc(dev->v4l2_dev.dev, sizeof(*vdev_dec), GFP_KERNEL);
1955 if (!vdev_dec)
1956 return -ENOMEM;
1957
1958 dev->v4l2_m2m_dec_dev = v4l2_m2m_init(&wave5_vpu_dec_m2m_ops);
1959 if (IS_ERR(dev->v4l2_m2m_dec_dev)) {
1960 ret = PTR_ERR(dev->v4l2_m2m_dec_dev);
1961 dev_err(dev->dev, "v4l2_m2m_init, fail: %d\n", ret);
1962 return -EINVAL;
1963 }
1964
1965 dev->video_dev_dec = vdev_dec;
1966
1967 strscpy(vdev_dec->name, VPU_DEC_DEV_NAME, sizeof(vdev_dec->name));
1968 vdev_dec->fops = &wave5_vpu_dec_fops;
1969 vdev_dec->ioctl_ops = &wave5_vpu_dec_ioctl_ops;
1970 vdev_dec->release = video_device_release_empty;
1971 vdev_dec->v4l2_dev = &dev->v4l2_dev;
1972 vdev_dec->vfl_dir = VFL_DIR_M2M;
1973 vdev_dec->device_caps = V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_STREAMING;
1974 vdev_dec->lock = &dev->dev_lock;
1975
1976 ret = video_register_device(vdev_dec, VFL_TYPE_VIDEO, -1);
1977 if (ret)
1978 return ret;
1979
1980 video_set_drvdata(vdev_dec, dev);
1981
1982 return 0;
1983 }
1984
wave5_vpu_dec_unregister_device(struct vpu_device * dev)1985 void wave5_vpu_dec_unregister_device(struct vpu_device *dev)
1986 {
1987 /*
1988 * Here is a freeing pair for Wave515 SRAM memory allocation
1989 * happened at wave5_vpu_dec_register_device().
1990 */
1991 if (dev->product_code == WAVE515_CODE)
1992 wave5_vdi_free_sram(dev);
1993
1994 video_unregister_device(dev->video_dev_dec);
1995 if (dev->v4l2_m2m_dec_dev)
1996 v4l2_m2m_release(dev->v4l2_m2m_dec_dev);
1997 }
1998