xref: /linux/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c (revision eb01fe7abbe2d0b38824d2a93fdb4cc3eaf2ccc1)
1 // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
2 /*
3  * Wave5 series multi-standard codec IP - decoder interface
4  *
5  * Copyright (C) 2021-2023 CHIPS&MEDIA INC
6  */
7 
8 #include "wave5-helper.h"
9 
10 #define VPU_DEC_DEV_NAME "C&M Wave5 VPU decoder"
11 #define VPU_DEC_DRV_NAME "wave5-dec"
12 
13 #define DEFAULT_SRC_SIZE(width, height) ({			\
14 	(width) * (height) / 8 * 3;					\
15 })
16 
17 static const struct vpu_format dec_fmt_list[FMT_TYPES][MAX_FMTS] = {
18 	[VPU_FMT_TYPE_CODEC] = {
19 		{
20 			.v4l2_pix_fmt = V4L2_PIX_FMT_HEVC,
21 			.max_width = 8192,
22 			.min_width = 8,
23 			.max_height = 4320,
24 			.min_height = 8,
25 		},
26 		{
27 			.v4l2_pix_fmt = V4L2_PIX_FMT_H264,
28 			.max_width = 8192,
29 			.min_width = 32,
30 			.max_height = 4320,
31 			.min_height = 32,
32 		},
33 	},
34 	[VPU_FMT_TYPE_RAW] = {
35 		{
36 			.v4l2_pix_fmt = V4L2_PIX_FMT_YUV420,
37 			.max_width = 8192,
38 			.min_width = 8,
39 			.max_height = 4320,
40 			.min_height = 8,
41 		},
42 		{
43 			.v4l2_pix_fmt = V4L2_PIX_FMT_NV12,
44 			.max_width = 8192,
45 			.min_width = 8,
46 			.max_height = 4320,
47 			.min_height = 8,
48 		},
49 		{
50 			.v4l2_pix_fmt = V4L2_PIX_FMT_NV21,
51 			.max_width = 8192,
52 			.min_width = 8,
53 			.max_height = 4320,
54 			.min_height = 8,
55 		},
56 		{
57 			.v4l2_pix_fmt = V4L2_PIX_FMT_YUV422P,
58 			.max_width = 8192,
59 			.min_width = 8,
60 			.max_height = 4320,
61 			.min_height = 8,
62 		},
63 		{
64 			.v4l2_pix_fmt = V4L2_PIX_FMT_NV16,
65 			.max_width = 8192,
66 			.min_width = 8,
67 			.max_height = 4320,
68 			.min_height = 8,
69 		},
70 		{
71 			.v4l2_pix_fmt = V4L2_PIX_FMT_NV61,
72 			.max_width = 8192,
73 			.min_width = 8,
74 			.max_height = 4320,
75 			.min_height = 8,
76 		},
77 		{
78 			.v4l2_pix_fmt = V4L2_PIX_FMT_YUV420M,
79 			.max_width = 8192,
80 			.min_width = 8,
81 			.max_height = 4320,
82 			.min_height = 8,
83 		},
84 		{
85 			.v4l2_pix_fmt = V4L2_PIX_FMT_NV12M,
86 			.max_width = 8192,
87 			.min_width = 8,
88 			.max_height = 4320,
89 			.min_height = 8,
90 		},
91 		{
92 			.v4l2_pix_fmt = V4L2_PIX_FMT_NV21M,
93 			.max_width = 8192,
94 			.min_width = 8,
95 			.max_height = 4320,
96 			.min_height = 8,
97 		},
98 		{
99 			.v4l2_pix_fmt = V4L2_PIX_FMT_YUV422M,
100 			.max_width = 8192,
101 			.min_width = 8,
102 			.max_height = 4320,
103 			.min_height = 8,
104 		},
105 		{
106 			.v4l2_pix_fmt = V4L2_PIX_FMT_NV16M,
107 			.max_width = 8192,
108 			.min_width = 8,
109 			.max_height = 4320,
110 			.min_height = 8,
111 		},
112 		{
113 			.v4l2_pix_fmt = V4L2_PIX_FMT_NV61M,
114 			.max_width = 8192,
115 			.min_width = 8,
116 			.max_height = 4320,
117 			.min_height = 8,
118 		},
119 	}
120 };
121 
122 /*
123  * Make sure that the state switch is allowed and add logging for debugging
124  * purposes
125  */
126 static int switch_state(struct vpu_instance *inst, enum vpu_instance_state state)
127 {
128 	switch (state) {
129 	case VPU_INST_STATE_NONE:
130 		break;
131 	case VPU_INST_STATE_OPEN:
132 		if (inst->state != VPU_INST_STATE_NONE)
133 			goto invalid_state_switch;
134 		goto valid_state_switch;
135 	case VPU_INST_STATE_INIT_SEQ:
136 		if (inst->state != VPU_INST_STATE_OPEN && inst->state != VPU_INST_STATE_STOP)
137 			goto invalid_state_switch;
138 		goto valid_state_switch;
139 	case VPU_INST_STATE_PIC_RUN:
140 		if (inst->state != VPU_INST_STATE_INIT_SEQ)
141 			goto invalid_state_switch;
142 		goto valid_state_switch;
143 	case VPU_INST_STATE_STOP:
144 		goto valid_state_switch;
145 	}
146 invalid_state_switch:
147 	WARN(1, "Invalid state switch from %s to %s.\n",
148 	     state_to_str(inst->state), state_to_str(state));
149 	return -EINVAL;
150 valid_state_switch:
151 	dev_dbg(inst->dev->dev, "Switch state from %s to %s.\n",
152 		state_to_str(inst->state), state_to_str(state));
153 	inst->state = state;
154 	return 0;
155 }
156 
157 static int wave5_vpu_dec_set_eos_on_firmware(struct vpu_instance *inst)
158 {
159 	int ret;
160 
161 	ret = wave5_vpu_dec_update_bitstream_buffer(inst, 0);
162 	if (ret) {
163 		/*
164 		 * To set the EOS flag, a command is sent to the firmware.
165 		 * That command may never return (timeout) or may report an error.
166 		 */
167 		dev_err(inst->dev->dev,
168 			"Setting EOS for the bitstream, fail: %d\n", ret);
169 		return ret;
170 	}
171 	return 0;
172 }
173 
174 static bool wave5_last_src_buffer_consumed(struct v4l2_m2m_ctx *m2m_ctx)
175 {
176 	struct vpu_src_buffer *vpu_buf;
177 
178 	if (!m2m_ctx->last_src_buf)
179 		return false;
180 
181 	vpu_buf = wave5_to_vpu_src_buf(m2m_ctx->last_src_buf);
182 	return vpu_buf->consumed;
183 }
184 
185 static void wave5_handle_src_buffer(struct vpu_instance *inst, dma_addr_t rd_ptr)
186 {
187 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
188 	struct v4l2_m2m_buffer *buf, *n;
189 	size_t consumed_bytes = 0;
190 
191 	if (rd_ptr >= inst->last_rd_ptr) {
192 		consumed_bytes = rd_ptr - inst->last_rd_ptr;
193 	} else {
194 		size_t rd_offs = rd_ptr - inst->bitstream_vbuf.daddr;
195 		size_t last_rd_offs = inst->last_rd_ptr - inst->bitstream_vbuf.daddr;
196 
197 		consumed_bytes = rd_offs + (inst->bitstream_vbuf.size - last_rd_offs);
198 	}
199 
200 	inst->last_rd_ptr = rd_ptr;
201 	consumed_bytes += inst->remaining_consumed_bytes;
202 
203 	dev_dbg(inst->dev->dev, "%s: %zu bytes of bitstream was consumed", __func__,
204 		consumed_bytes);
205 
206 	v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buf, n) {
207 		struct vb2_v4l2_buffer *src_buf = &buf->vb;
208 		size_t src_size = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
209 
210 		if (src_size > consumed_bytes)
211 			break;
212 
213 		dev_dbg(inst->dev->dev, "%s: removing src buffer %i",
214 			__func__, src_buf->vb2_buf.index);
215 		src_buf = v4l2_m2m_src_buf_remove(m2m_ctx);
216 		inst->timestamp = src_buf->vb2_buf.timestamp;
217 		v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
218 		consumed_bytes -= src_size;
219 
220 		/* Handle the case the last bitstream buffer has been picked */
221 		if (src_buf == m2m_ctx->last_src_buf) {
222 			int ret;
223 
224 			m2m_ctx->last_src_buf = NULL;
225 			ret = wave5_vpu_dec_set_eos_on_firmware(inst);
226 			if (ret)
227 				dev_warn(inst->dev->dev,
228 					 "Setting EOS for the bitstream, fail: %d\n", ret);
229 			break;
230 		}
231 	}
232 
233 	inst->remaining_consumed_bytes = consumed_bytes;
234 }
235 
236 static void wave5_update_pix_fmt(struct v4l2_pix_format_mplane *pix_mp, unsigned int width,
237 				 unsigned int height)
238 {
239 	switch (pix_mp->pixelformat) {
240 	case V4L2_PIX_FMT_YUV420:
241 	case V4L2_PIX_FMT_NV12:
242 	case V4L2_PIX_FMT_NV21:
243 		pix_mp->width = round_up(width, 32);
244 		pix_mp->height = round_up(height, 16);
245 		pix_mp->plane_fmt[0].bytesperline = round_up(width, 32);
246 		pix_mp->plane_fmt[0].sizeimage = width * height * 3 / 2;
247 		break;
248 	case V4L2_PIX_FMT_YUV422P:
249 	case V4L2_PIX_FMT_NV16:
250 	case V4L2_PIX_FMT_NV61:
251 		pix_mp->width = round_up(width, 32);
252 		pix_mp->height = round_up(height, 16);
253 		pix_mp->plane_fmt[0].bytesperline = round_up(width, 32);
254 		pix_mp->plane_fmt[0].sizeimage = width * height * 2;
255 		break;
256 	case V4L2_PIX_FMT_YUV420M:
257 		pix_mp->width = round_up(width, 32);
258 		pix_mp->height = round_up(height, 16);
259 		pix_mp->plane_fmt[0].bytesperline = round_up(width, 32);
260 		pix_mp->plane_fmt[0].sizeimage = width * height;
261 		pix_mp->plane_fmt[1].bytesperline = round_up(width, 32) / 2;
262 		pix_mp->plane_fmt[1].sizeimage = width * height / 4;
263 		pix_mp->plane_fmt[2].bytesperline = round_up(width, 32) / 2;
264 		pix_mp->plane_fmt[2].sizeimage = width * height / 4;
265 		break;
266 	case V4L2_PIX_FMT_NV12M:
267 	case V4L2_PIX_FMT_NV21M:
268 		pix_mp->width = round_up(width, 32);
269 		pix_mp->height = round_up(height, 16);
270 		pix_mp->plane_fmt[0].bytesperline = round_up(width, 32);
271 		pix_mp->plane_fmt[0].sizeimage = width * height;
272 		pix_mp->plane_fmt[1].bytesperline = round_up(width, 32);
273 		pix_mp->plane_fmt[1].sizeimage = width * height / 2;
274 		break;
275 	case V4L2_PIX_FMT_YUV422M:
276 		pix_mp->width = round_up(width, 32);
277 		pix_mp->height = round_up(height, 16);
278 		pix_mp->plane_fmt[0].bytesperline = round_up(width, 32);
279 		pix_mp->plane_fmt[0].sizeimage = width * height;
280 		pix_mp->plane_fmt[1].bytesperline = round_up(width, 32) / 2;
281 		pix_mp->plane_fmt[1].sizeimage = width * height / 2;
282 		pix_mp->plane_fmt[2].bytesperline = round_up(width, 32) / 2;
283 		pix_mp->plane_fmt[2].sizeimage = width * height / 2;
284 		break;
285 	case V4L2_PIX_FMT_NV16M:
286 	case V4L2_PIX_FMT_NV61M:
287 		pix_mp->width = round_up(width, 32);
288 		pix_mp->height = round_up(height, 16);
289 		pix_mp->plane_fmt[0].bytesperline = round_up(width, 32);
290 		pix_mp->plane_fmt[0].sizeimage = width * height;
291 		pix_mp->plane_fmt[1].bytesperline = round_up(width, 32);
292 		pix_mp->plane_fmt[1].sizeimage = width * height;
293 		break;
294 	default:
295 		pix_mp->width = width;
296 		pix_mp->height = height;
297 		pix_mp->plane_fmt[0].bytesperline = 0;
298 		pix_mp->plane_fmt[0].sizeimage = max(DEFAULT_SRC_SIZE(width, height),
299 						     pix_mp->plane_fmt[0].sizeimage);
300 		break;
301 	}
302 }
303 
304 static int start_decode(struct vpu_instance *inst, u32 *fail_res)
305 {
306 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
307 	int ret = 0;
308 
309 	ret = wave5_vpu_dec_start_one_frame(inst, fail_res);
310 	if (ret) {
311 		struct vb2_v4l2_buffer *src_buf;
312 
313 		src_buf = v4l2_m2m_src_buf_remove(m2m_ctx);
314 		if (src_buf)
315 			v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR);
316 		switch_state(inst, VPU_INST_STATE_STOP);
317 
318 		dev_dbg(inst->dev->dev, "%s: pic run failed / finish job", __func__);
319 		v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
320 	}
321 
322 	return ret;
323 }
324 
325 static void flag_last_buffer_done(struct vpu_instance *inst)
326 {
327 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
328 	struct vb2_v4l2_buffer *vb;
329 	int i;
330 
331 	lockdep_assert_held(&inst->state_spinlock);
332 
333 	vb = v4l2_m2m_dst_buf_remove(m2m_ctx);
334 	if (!vb) {
335 		m2m_ctx->is_draining = true;
336 		m2m_ctx->next_buf_last = true;
337 		return;
338 	}
339 
340 	for (i = 0; i < vb->vb2_buf.num_planes; i++)
341 		vb2_set_plane_payload(&vb->vb2_buf, i, 0);
342 	vb->field = V4L2_FIELD_NONE;
343 
344 	v4l2_m2m_last_buffer_done(m2m_ctx, vb);
345 }
346 
347 static void send_eos_event(struct vpu_instance *inst)
348 {
349 	static const struct v4l2_event vpu_event_eos = {
350 		.type = V4L2_EVENT_EOS
351 	};
352 
353 	lockdep_assert_held(&inst->state_spinlock);
354 
355 	v4l2_event_queue_fh(&inst->v4l2_fh, &vpu_event_eos);
356 	inst->eos = false;
357 }
358 
359 static int handle_dynamic_resolution_change(struct vpu_instance *inst)
360 {
361 	struct v4l2_fh *fh = &inst->v4l2_fh;
362 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
363 
364 	static const struct v4l2_event vpu_event_src_ch = {
365 		.type = V4L2_EVENT_SOURCE_CHANGE,
366 		.u.src_change.changes = V4L2_EVENT_SRC_CH_RESOLUTION,
367 	};
368 	struct dec_info *p_dec_info = &inst->codec_info->dec_info;
369 	struct dec_initial_info *initial_info = &inst->codec_info->dec_info.initial_info;
370 
371 	lockdep_assert_held(&inst->state_spinlock);
372 
373 	dev_dbg(inst->dev->dev, "%s: rd_ptr %pad", __func__, &initial_info->rd_ptr);
374 
375 	dev_dbg(inst->dev->dev, "%s: width: %u height: %u profile: %u | minbuffer: %u\n",
376 		__func__, initial_info->pic_width, initial_info->pic_height,
377 		initial_info->profile, initial_info->min_frame_buffer_count);
378 
379 	inst->needs_reallocation = true;
380 	inst->fbc_buf_count = initial_info->min_frame_buffer_count + 1;
381 	if (inst->fbc_buf_count != v4l2_m2m_num_dst_bufs_ready(m2m_ctx)) {
382 		struct v4l2_ctrl *ctrl;
383 
384 		ctrl = v4l2_ctrl_find(&inst->v4l2_ctrl_hdl,
385 				      V4L2_CID_MIN_BUFFERS_FOR_CAPTURE);
386 		if (ctrl)
387 			v4l2_ctrl_s_ctrl(ctrl, inst->fbc_buf_count);
388 	}
389 
390 	if (p_dec_info->initial_info_obtained) {
391 		inst->conf_win.left = initial_info->pic_crop_rect.left;
392 		inst->conf_win.top = initial_info->pic_crop_rect.top;
393 		inst->conf_win.width = initial_info->pic_width -
394 			initial_info->pic_crop_rect.left - initial_info->pic_crop_rect.right;
395 		inst->conf_win.height = initial_info->pic_height -
396 			initial_info->pic_crop_rect.top - initial_info->pic_crop_rect.bottom;
397 
398 		wave5_update_pix_fmt(&inst->src_fmt, initial_info->pic_width,
399 				     initial_info->pic_height);
400 		wave5_update_pix_fmt(&inst->dst_fmt, initial_info->pic_width,
401 				     initial_info->pic_height);
402 	}
403 
404 	v4l2_event_queue_fh(fh, &vpu_event_src_ch);
405 
406 	return 0;
407 }
408 
409 static void wave5_vpu_dec_finish_decode(struct vpu_instance *inst)
410 {
411 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
412 	struct dec_output_info dec_info;
413 	int ret;
414 	struct vb2_v4l2_buffer *dec_buf = NULL;
415 	struct vb2_v4l2_buffer *disp_buf = NULL;
416 	struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
417 	struct queue_status_info q_status;
418 
419 	dev_dbg(inst->dev->dev, "%s: Fetch output info from firmware.", __func__);
420 
421 	ret = wave5_vpu_dec_get_output_info(inst, &dec_info);
422 	if (ret) {
423 		dev_warn(inst->dev->dev, "%s: could not get output info.", __func__);
424 		v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
425 		return;
426 	}
427 
428 	dev_dbg(inst->dev->dev, "%s: rd_ptr %pad wr_ptr %pad", __func__, &dec_info.rd_ptr,
429 		&dec_info.wr_ptr);
430 	wave5_handle_src_buffer(inst, dec_info.rd_ptr);
431 
432 	dev_dbg(inst->dev->dev, "%s: dec_info dec_idx %i disp_idx %i", __func__,
433 		dec_info.index_frame_decoded, dec_info.index_frame_display);
434 
435 	if (!vb2_is_streaming(dst_vq)) {
436 		dev_dbg(inst->dev->dev, "%s: capture is not streaming..", __func__);
437 		v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
438 		return;
439 	}
440 
441 	/* Remove decoded buffer from the ready queue now that it has been
442 	 * decoded.
443 	 */
444 	if (dec_info.index_frame_decoded >= 0) {
445 		struct vb2_buffer *vb = vb2_get_buffer(dst_vq,
446 						       dec_info.index_frame_decoded);
447 		if (vb) {
448 			dec_buf = to_vb2_v4l2_buffer(vb);
449 			dec_buf->vb2_buf.timestamp = inst->timestamp;
450 		} else {
451 			dev_warn(inst->dev->dev, "%s: invalid decoded frame index %i",
452 				 __func__, dec_info.index_frame_decoded);
453 		}
454 	}
455 
456 	if (dec_info.index_frame_display >= 0) {
457 		disp_buf = v4l2_m2m_dst_buf_remove_by_idx(m2m_ctx, dec_info.index_frame_display);
458 		if (!disp_buf)
459 			dev_warn(inst->dev->dev, "%s: invalid display frame index %i",
460 				 __func__, dec_info.index_frame_display);
461 	}
462 
463 	/* If there is anything to display, do that now */
464 	if (disp_buf) {
465 		struct vpu_dst_buffer *dst_vpu_buf = wave5_to_vpu_dst_buf(disp_buf);
466 
467 		if (inst->dst_fmt.num_planes == 1) {
468 			vb2_set_plane_payload(&disp_buf->vb2_buf, 0,
469 					      inst->dst_fmt.plane_fmt[0].sizeimage);
470 		} else if (inst->dst_fmt.num_planes == 2) {
471 			vb2_set_plane_payload(&disp_buf->vb2_buf, 0,
472 					      inst->dst_fmt.plane_fmt[0].sizeimage);
473 			vb2_set_plane_payload(&disp_buf->vb2_buf, 1,
474 					      inst->dst_fmt.plane_fmt[1].sizeimage);
475 		} else if (inst->dst_fmt.num_planes == 3) {
476 			vb2_set_plane_payload(&disp_buf->vb2_buf, 0,
477 					      inst->dst_fmt.plane_fmt[0].sizeimage);
478 			vb2_set_plane_payload(&disp_buf->vb2_buf, 1,
479 					      inst->dst_fmt.plane_fmt[1].sizeimage);
480 			vb2_set_plane_payload(&disp_buf->vb2_buf, 2,
481 					      inst->dst_fmt.plane_fmt[2].sizeimage);
482 		}
483 
484 		/* TODO implement interlace support */
485 		disp_buf->field = V4L2_FIELD_NONE;
486 		dst_vpu_buf->display = true;
487 		v4l2_m2m_buf_done(disp_buf, VB2_BUF_STATE_DONE);
488 
489 		dev_dbg(inst->dev->dev, "%s: frame_cycle %8u (payload %lu)\n",
490 			__func__, dec_info.frame_cycle,
491 			vb2_get_plane_payload(&disp_buf->vb2_buf, 0));
492 	}
493 
494 	if ((dec_info.index_frame_display == DISPLAY_IDX_FLAG_SEQ_END ||
495 	     dec_info.sequence_changed)) {
496 		unsigned long flags;
497 
498 		spin_lock_irqsave(&inst->state_spinlock, flags);
499 		if (!v4l2_m2m_has_stopped(m2m_ctx)) {
500 			switch_state(inst, VPU_INST_STATE_STOP);
501 
502 			if (dec_info.sequence_changed)
503 				handle_dynamic_resolution_change(inst);
504 			else
505 				send_eos_event(inst);
506 
507 			flag_last_buffer_done(inst);
508 		}
509 		spin_unlock_irqrestore(&inst->state_spinlock, flags);
510 	}
511 
512 	/*
513 	 * During a resolution change and while draining, the firmware may flush
514 	 * the reorder queue regardless of having a matching decoding operation
515 	 * pending. Only terminate the job if there are no more IRQ coming.
516 	 */
517 	wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status);
518 	if (q_status.report_queue_count == 0 &&
519 	    (q_status.instance_queue_count == 0 || dec_info.sequence_changed)) {
520 		dev_dbg(inst->dev->dev, "%s: finishing job.\n", __func__);
521 		v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
522 	}
523 }
524 
525 static int wave5_vpu_dec_querycap(struct file *file, void *fh, struct v4l2_capability *cap)
526 {
527 	strscpy(cap->driver, VPU_DEC_DRV_NAME, sizeof(cap->driver));
528 	strscpy(cap->card, VPU_DEC_DRV_NAME, sizeof(cap->card));
529 
530 	return 0;
531 }
532 
533 static int wave5_vpu_dec_enum_framesizes(struct file *f, void *fh, struct v4l2_frmsizeenum *fsize)
534 {
535 	const struct vpu_format *vpu_fmt;
536 
537 	if (fsize->index)
538 		return -EINVAL;
539 
540 	vpu_fmt = wave5_find_vpu_fmt(fsize->pixel_format, dec_fmt_list[VPU_FMT_TYPE_CODEC]);
541 	if (!vpu_fmt) {
542 		vpu_fmt = wave5_find_vpu_fmt(fsize->pixel_format, dec_fmt_list[VPU_FMT_TYPE_RAW]);
543 		if (!vpu_fmt)
544 			return -EINVAL;
545 	}
546 
547 	fsize->type = V4L2_FRMSIZE_TYPE_CONTINUOUS;
548 	fsize->stepwise.min_width = vpu_fmt->min_width;
549 	fsize->stepwise.max_width = vpu_fmt->max_width;
550 	fsize->stepwise.step_width = 1;
551 	fsize->stepwise.min_height = vpu_fmt->min_height;
552 	fsize->stepwise.max_height = vpu_fmt->max_height;
553 	fsize->stepwise.step_height = 1;
554 
555 	return 0;
556 }
557 
558 static int wave5_vpu_dec_enum_fmt_cap(struct file *file, void *fh, struct v4l2_fmtdesc *f)
559 {
560 	const struct vpu_format *vpu_fmt;
561 
562 	vpu_fmt = wave5_find_vpu_fmt_by_idx(f->index, dec_fmt_list[VPU_FMT_TYPE_RAW]);
563 	if (!vpu_fmt)
564 		return -EINVAL;
565 
566 	f->pixelformat = vpu_fmt->v4l2_pix_fmt;
567 	f->flags = 0;
568 
569 	return 0;
570 }
571 
572 static int wave5_vpu_dec_try_fmt_cap(struct file *file, void *fh, struct v4l2_format *f)
573 {
574 	struct vpu_instance *inst = wave5_to_vpu_inst(fh);
575 	struct dec_info *p_dec_info = &inst->codec_info->dec_info;
576 	const struct vpu_format *vpu_fmt;
577 	int width, height;
578 
579 	dev_dbg(inst->dev->dev,
580 		"%s: fourcc: %u width: %u height: %u nm planes: %u colorspace: %u field: %u\n",
581 		__func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height,
582 		f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field);
583 
584 	vpu_fmt = wave5_find_vpu_fmt(f->fmt.pix_mp.pixelformat, dec_fmt_list[VPU_FMT_TYPE_RAW]);
585 	if (!vpu_fmt) {
586 		width = inst->dst_fmt.width;
587 		height = inst->dst_fmt.height;
588 		f->fmt.pix_mp.pixelformat = inst->dst_fmt.pixelformat;
589 		f->fmt.pix_mp.num_planes = inst->dst_fmt.num_planes;
590 	} else {
591 		const struct v4l2_format_info *info = v4l2_format_info(vpu_fmt->v4l2_pix_fmt);
592 
593 		width = clamp(f->fmt.pix_mp.width, vpu_fmt->min_width, vpu_fmt->max_width);
594 		height = clamp(f->fmt.pix_mp.height, vpu_fmt->min_height, vpu_fmt->max_height);
595 		f->fmt.pix_mp.pixelformat = vpu_fmt->v4l2_pix_fmt;
596 		f->fmt.pix_mp.num_planes = info->mem_planes;
597 	}
598 
599 	if (p_dec_info->initial_info_obtained) {
600 		width = inst->dst_fmt.width;
601 		height = inst->dst_fmt.height;
602 	}
603 
604 	wave5_update_pix_fmt(&f->fmt.pix_mp, width, height);
605 	f->fmt.pix_mp.flags = 0;
606 	f->fmt.pix_mp.field = V4L2_FIELD_NONE;
607 	f->fmt.pix_mp.colorspace = inst->colorspace;
608 	f->fmt.pix_mp.ycbcr_enc = inst->ycbcr_enc;
609 	f->fmt.pix_mp.quantization = inst->quantization;
610 	f->fmt.pix_mp.xfer_func = inst->xfer_func;
611 
612 	return 0;
613 }
614 
615 static int wave5_vpu_dec_s_fmt_cap(struct file *file, void *fh, struct v4l2_format *f)
616 {
617 	struct vpu_instance *inst = wave5_to_vpu_inst(fh);
618 	int i, ret;
619 
620 	dev_dbg(inst->dev->dev,
621 		"%s: fourcc: %u width: %u height: %u num_planes: %u colorspace: %u field: %u\n",
622 		__func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height,
623 		f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field);
624 
625 	ret = wave5_vpu_dec_try_fmt_cap(file, fh, f);
626 	if (ret)
627 		return ret;
628 
629 	inst->dst_fmt.width = f->fmt.pix_mp.width;
630 	inst->dst_fmt.height = f->fmt.pix_mp.height;
631 	inst->dst_fmt.pixelformat = f->fmt.pix_mp.pixelformat;
632 	inst->dst_fmt.field = f->fmt.pix_mp.field;
633 	inst->dst_fmt.flags = f->fmt.pix_mp.flags;
634 	inst->dst_fmt.num_planes = f->fmt.pix_mp.num_planes;
635 	for (i = 0; i < inst->dst_fmt.num_planes; i++) {
636 		inst->dst_fmt.plane_fmt[i].bytesperline = f->fmt.pix_mp.plane_fmt[i].bytesperline;
637 		inst->dst_fmt.plane_fmt[i].sizeimage = f->fmt.pix_mp.plane_fmt[i].sizeimage;
638 	}
639 
640 	if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV12 ||
641 	    inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV12M) {
642 		inst->cbcr_interleave = true;
643 		inst->nv21 = false;
644 		inst->output_format = FORMAT_420;
645 	} else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV21 ||
646 		   inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV21M) {
647 		inst->cbcr_interleave = true;
648 		inst->nv21 = true;
649 		inst->output_format = FORMAT_420;
650 	} else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV16 ||
651 		   inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV16M) {
652 		inst->cbcr_interleave = true;
653 		inst->nv21 = false;
654 		inst->output_format = FORMAT_422;
655 	} else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV61 ||
656 		   inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV61M) {
657 		inst->cbcr_interleave = true;
658 		inst->nv21 = true;
659 		inst->output_format = FORMAT_422;
660 	} else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_YUV422P ||
661 		   inst->dst_fmt.pixelformat == V4L2_PIX_FMT_YUV422M) {
662 		inst->cbcr_interleave = false;
663 		inst->nv21 = false;
664 		inst->output_format = FORMAT_422;
665 	} else {
666 		inst->cbcr_interleave = false;
667 		inst->nv21 = false;
668 		inst->output_format = FORMAT_420;
669 	}
670 
671 	return 0;
672 }
673 
674 static int wave5_vpu_dec_g_fmt_cap(struct file *file, void *fh, struct v4l2_format *f)
675 {
676 	struct vpu_instance *inst = wave5_to_vpu_inst(fh);
677 	int i;
678 
679 	f->fmt.pix_mp.width = inst->dst_fmt.width;
680 	f->fmt.pix_mp.height = inst->dst_fmt.height;
681 	f->fmt.pix_mp.pixelformat = inst->dst_fmt.pixelformat;
682 	f->fmt.pix_mp.field = inst->dst_fmt.field;
683 	f->fmt.pix_mp.flags = inst->dst_fmt.flags;
684 	f->fmt.pix_mp.num_planes = inst->dst_fmt.num_planes;
685 	for (i = 0; i < f->fmt.pix_mp.num_planes; i++) {
686 		f->fmt.pix_mp.plane_fmt[i].bytesperline = inst->dst_fmt.plane_fmt[i].bytesperline;
687 		f->fmt.pix_mp.plane_fmt[i].sizeimage = inst->dst_fmt.plane_fmt[i].sizeimage;
688 	}
689 
690 	f->fmt.pix_mp.colorspace = inst->colorspace;
691 	f->fmt.pix_mp.ycbcr_enc = inst->ycbcr_enc;
692 	f->fmt.pix_mp.quantization = inst->quantization;
693 	f->fmt.pix_mp.xfer_func = inst->xfer_func;
694 
695 	return 0;
696 }
697 
698 static int wave5_vpu_dec_enum_fmt_out(struct file *file, void *fh, struct v4l2_fmtdesc *f)
699 {
700 	struct vpu_instance *inst = wave5_to_vpu_inst(fh);
701 	const struct vpu_format *vpu_fmt;
702 
703 	dev_dbg(inst->dev->dev, "%s: index: %u\n", __func__, f->index);
704 
705 	vpu_fmt = wave5_find_vpu_fmt_by_idx(f->index, dec_fmt_list[VPU_FMT_TYPE_CODEC]);
706 	if (!vpu_fmt)
707 		return -EINVAL;
708 
709 	f->pixelformat = vpu_fmt->v4l2_pix_fmt;
710 	f->flags = V4L2_FMT_FLAG_DYN_RESOLUTION | V4L2_FMT_FLAG_COMPRESSED;
711 
712 	return 0;
713 }
714 
715 static int wave5_vpu_dec_try_fmt_out(struct file *file, void *fh, struct v4l2_format *f)
716 {
717 	struct vpu_instance *inst = wave5_to_vpu_inst(fh);
718 	const struct vpu_format *vpu_fmt;
719 
720 	dev_dbg(inst->dev->dev,
721 		"%s: fourcc: %u width: %u height: %u num_planes: %u colorspace: %u field: %u\n",
722 		__func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height,
723 		f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field);
724 
725 	vpu_fmt = wave5_find_vpu_fmt(f->fmt.pix_mp.pixelformat, dec_fmt_list[VPU_FMT_TYPE_CODEC]);
726 	if (!vpu_fmt) {
727 		f->fmt.pix_mp.pixelformat = inst->src_fmt.pixelformat;
728 		f->fmt.pix_mp.num_planes = inst->src_fmt.num_planes;
729 		wave5_update_pix_fmt(&f->fmt.pix_mp, inst->src_fmt.width, inst->src_fmt.height);
730 	} else {
731 		int width = clamp(f->fmt.pix_mp.width, vpu_fmt->min_width, vpu_fmt->max_width);
732 		int height = clamp(f->fmt.pix_mp.height, vpu_fmt->min_height, vpu_fmt->max_height);
733 
734 		f->fmt.pix_mp.pixelformat = vpu_fmt->v4l2_pix_fmt;
735 		f->fmt.pix_mp.num_planes = 1;
736 		wave5_update_pix_fmt(&f->fmt.pix_mp, width, height);
737 	}
738 
739 	f->fmt.pix_mp.flags = 0;
740 	f->fmt.pix_mp.field = V4L2_FIELD_NONE;
741 
742 	return 0;
743 }
744 
745 static int wave5_vpu_dec_s_fmt_out(struct file *file, void *fh, struct v4l2_format *f)
746 {
747 	struct vpu_instance *inst = wave5_to_vpu_inst(fh);
748 	int i, ret;
749 
750 	dev_dbg(inst->dev->dev,
751 		"%s: fourcc: %u width: %u height: %u num_planes: %u field: %u\n",
752 		__func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height,
753 		f->fmt.pix_mp.num_planes, f->fmt.pix_mp.field);
754 
755 	ret = wave5_vpu_dec_try_fmt_out(file, fh, f);
756 	if (ret)
757 		return ret;
758 
759 	inst->std = wave5_to_vpu_std(f->fmt.pix_mp.pixelformat, inst->type);
760 	if (inst->std == STD_UNKNOWN) {
761 		dev_warn(inst->dev->dev, "unsupported pixelformat: %.4s\n",
762 			 (char *)&f->fmt.pix_mp.pixelformat);
763 		return -EINVAL;
764 	}
765 
766 	inst->src_fmt.width = f->fmt.pix_mp.width;
767 	inst->src_fmt.height = f->fmt.pix_mp.height;
768 	inst->src_fmt.pixelformat = f->fmt.pix_mp.pixelformat;
769 	inst->src_fmt.field = f->fmt.pix_mp.field;
770 	inst->src_fmt.flags = f->fmt.pix_mp.flags;
771 	inst->src_fmt.num_planes = f->fmt.pix_mp.num_planes;
772 	for (i = 0; i < inst->src_fmt.num_planes; i++) {
773 		inst->src_fmt.plane_fmt[i].bytesperline = f->fmt.pix_mp.plane_fmt[i].bytesperline;
774 		inst->src_fmt.plane_fmt[i].sizeimage = f->fmt.pix_mp.plane_fmt[i].sizeimage;
775 	}
776 
777 	inst->colorspace = f->fmt.pix_mp.colorspace;
778 	inst->ycbcr_enc = f->fmt.pix_mp.ycbcr_enc;
779 	inst->quantization = f->fmt.pix_mp.quantization;
780 	inst->xfer_func = f->fmt.pix_mp.xfer_func;
781 
782 	wave5_update_pix_fmt(&inst->dst_fmt, f->fmt.pix_mp.width, f->fmt.pix_mp.height);
783 
784 	return 0;
785 }
786 
787 static int wave5_vpu_dec_g_selection(struct file *file, void *fh, struct v4l2_selection *s)
788 {
789 	struct vpu_instance *inst = wave5_to_vpu_inst(fh);
790 
791 	dev_dbg(inst->dev->dev, "%s: type: %u | target: %u\n", __func__, s->type, s->target);
792 
793 	if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
794 		return -EINVAL;
795 	switch (s->target) {
796 	case V4L2_SEL_TGT_COMPOSE_BOUNDS:
797 	case V4L2_SEL_TGT_COMPOSE_PADDED:
798 		s->r.left = 0;
799 		s->r.top = 0;
800 		s->r.width = inst->dst_fmt.width;
801 		s->r.height = inst->dst_fmt.height;
802 		break;
803 	case V4L2_SEL_TGT_COMPOSE:
804 	case V4L2_SEL_TGT_COMPOSE_DEFAULT:
805 		s->r.left = 0;
806 		s->r.top = 0;
807 		if (inst->state > VPU_INST_STATE_OPEN) {
808 			s->r = inst->conf_win;
809 		} else {
810 			s->r.width = inst->src_fmt.width;
811 			s->r.height = inst->src_fmt.height;
812 		}
813 		break;
814 	default:
815 		return -EINVAL;
816 	}
817 
818 	return 0;
819 }
820 
821 static int wave5_vpu_dec_s_selection(struct file *file, void *fh, struct v4l2_selection *s)
822 {
823 	struct vpu_instance *inst = wave5_to_vpu_inst(fh);
824 
825 	if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
826 		return -EINVAL;
827 
828 	if (s->target != V4L2_SEL_TGT_COMPOSE)
829 		return -EINVAL;
830 
831 	dev_dbg(inst->dev->dev, "V4L2_SEL_TGT_COMPOSE w: %u h: %u\n",
832 		s->r.width, s->r.height);
833 
834 	s->r.left = 0;
835 	s->r.top = 0;
836 	s->r.width = inst->dst_fmt.width;
837 	s->r.height = inst->dst_fmt.height;
838 
839 	return 0;
840 }
841 
842 static int wave5_vpu_dec_stop(struct vpu_instance *inst)
843 {
844 	int ret = 0;
845 	unsigned long flags;
846 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
847 
848 	spin_lock_irqsave(&inst->state_spinlock, flags);
849 
850 	if (m2m_ctx->is_draining) {
851 		ret = -EBUSY;
852 		goto unlock_and_return;
853 	}
854 
855 	if (inst->state != VPU_INST_STATE_NONE) {
856 		/*
857 		 * Temporarily release the state_spinlock so that subsequent
858 		 * calls do not block on a mutex while inside this spinlock.
859 		 */
860 		spin_unlock_irqrestore(&inst->state_spinlock, flags);
861 		ret = wave5_vpu_dec_set_eos_on_firmware(inst);
862 		if (ret)
863 			return ret;
864 
865 		spin_lock_irqsave(&inst->state_spinlock, flags);
866 		/*
867 		 * TODO eliminate this check by using a separate check for
868 		 * draining triggered by a resolution change.
869 		 */
870 		if (m2m_ctx->is_draining) {
871 			ret = -EBUSY;
872 			goto unlock_and_return;
873 		}
874 	}
875 
876 	/*
877 	 * Used to remember the EOS state after the streamoff/on transition on
878 	 * the capture queue.
879 	 */
880 	inst->eos = true;
881 
882 	if (m2m_ctx->has_stopped)
883 		goto unlock_and_return;
884 
885 	m2m_ctx->last_src_buf = v4l2_m2m_last_src_buf(m2m_ctx);
886 	m2m_ctx->is_draining = true;
887 
888 	/*
889 	 * Deferred to device run in case it wasn't in the ring buffer
890 	 * yet. In other case, we have to send the EOS signal to the
891 	 * firmware so that any pending PIC_RUN ends without new
892 	 * bitstream buffer.
893 	 */
894 	if (m2m_ctx->last_src_buf)
895 		goto unlock_and_return;
896 
897 	if (inst->state == VPU_INST_STATE_NONE) {
898 		send_eos_event(inst);
899 		flag_last_buffer_done(inst);
900 	}
901 
902 unlock_and_return:
903 	spin_unlock_irqrestore(&inst->state_spinlock, flags);
904 	return ret;
905 }
906 
907 static int wave5_vpu_dec_start(struct vpu_instance *inst)
908 {
909 	int ret = 0;
910 	unsigned long flags;
911 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
912 	struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
913 
914 	spin_lock_irqsave(&inst->state_spinlock, flags);
915 
916 	if (m2m_ctx->is_draining) {
917 		ret = -EBUSY;
918 		goto unlock_and_return;
919 	}
920 
921 	if (m2m_ctx->has_stopped)
922 		m2m_ctx->has_stopped = false;
923 
924 	vb2_clear_last_buffer_dequeued(dst_vq);
925 	inst->eos = false;
926 
927 unlock_and_return:
928 	spin_unlock_irqrestore(&inst->state_spinlock, flags);
929 	return ret;
930 }
931 
932 static int wave5_vpu_dec_decoder_cmd(struct file *file, void *fh, struct v4l2_decoder_cmd *dc)
933 {
934 	struct vpu_instance *inst = wave5_to_vpu_inst(fh);
935 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
936 	int ret;
937 
938 	dev_dbg(inst->dev->dev, "decoder command: %u\n", dc->cmd);
939 
940 	ret = v4l2_m2m_ioctl_try_decoder_cmd(file, fh, dc);
941 	if (ret)
942 		return ret;
943 
944 	switch (dc->cmd) {
945 	case V4L2_DEC_CMD_STOP:
946 		ret = wave5_vpu_dec_stop(inst);
947 		/* Just in case we don't have anything to decode anymore */
948 		v4l2_m2m_try_schedule(m2m_ctx);
949 		break;
950 	case V4L2_DEC_CMD_START:
951 		ret = wave5_vpu_dec_start(inst);
952 		break;
953 	default:
954 		ret = -EINVAL;
955 	}
956 
957 	return ret;
958 }
959 
960 static const struct v4l2_ioctl_ops wave5_vpu_dec_ioctl_ops = {
961 	.vidioc_querycap = wave5_vpu_dec_querycap,
962 	.vidioc_enum_framesizes = wave5_vpu_dec_enum_framesizes,
963 
964 	.vidioc_enum_fmt_vid_cap	= wave5_vpu_dec_enum_fmt_cap,
965 	.vidioc_s_fmt_vid_cap_mplane = wave5_vpu_dec_s_fmt_cap,
966 	.vidioc_g_fmt_vid_cap_mplane = wave5_vpu_dec_g_fmt_cap,
967 	.vidioc_try_fmt_vid_cap_mplane = wave5_vpu_dec_try_fmt_cap,
968 
969 	.vidioc_enum_fmt_vid_out	= wave5_vpu_dec_enum_fmt_out,
970 	.vidioc_s_fmt_vid_out_mplane = wave5_vpu_dec_s_fmt_out,
971 	.vidioc_g_fmt_vid_out_mplane = wave5_vpu_g_fmt_out,
972 	.vidioc_try_fmt_vid_out_mplane = wave5_vpu_dec_try_fmt_out,
973 
974 	.vidioc_g_selection = wave5_vpu_dec_g_selection,
975 	.vidioc_s_selection = wave5_vpu_dec_s_selection,
976 
977 	.vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs,
978 	/*
979 	 * Firmware does not support CREATE_BUFS for CAPTURE queue. Since
980 	 * there is no immediate use-case for supporting CREATE_BUFS on
981 	 * just the OUTPUT queue, disable CREATE_BUFS altogether.
982 	 */
983 	.vidioc_querybuf = v4l2_m2m_ioctl_querybuf,
984 	.vidioc_prepare_buf = v4l2_m2m_ioctl_prepare_buf,
985 	.vidioc_qbuf = v4l2_m2m_ioctl_qbuf,
986 	.vidioc_expbuf = v4l2_m2m_ioctl_expbuf,
987 	.vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf,
988 	.vidioc_streamon = v4l2_m2m_ioctl_streamon,
989 	.vidioc_streamoff = v4l2_m2m_ioctl_streamoff,
990 
991 	.vidioc_try_decoder_cmd = v4l2_m2m_ioctl_try_decoder_cmd,
992 	.vidioc_decoder_cmd = wave5_vpu_dec_decoder_cmd,
993 
994 	.vidioc_subscribe_event = wave5_vpu_subscribe_event,
995 	.vidioc_unsubscribe_event = v4l2_event_unsubscribe,
996 };
997 
998 static int wave5_vpu_dec_queue_setup(struct vb2_queue *q, unsigned int *num_buffers,
999 				     unsigned int *num_planes, unsigned int sizes[],
1000 				     struct device *alloc_devs[])
1001 {
1002 	struct vpu_instance *inst = vb2_get_drv_priv(q);
1003 	struct v4l2_pix_format_mplane inst_format =
1004 		(q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) ? inst->src_fmt : inst->dst_fmt;
1005 
1006 	dev_dbg(inst->dev->dev, "%s: num_buffers: %u | num_planes: %u | type: %u\n", __func__,
1007 		*num_buffers, *num_planes, q->type);
1008 
1009 	*num_planes = inst_format.num_planes;
1010 
1011 	if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
1012 		sizes[0] = inst_format.plane_fmt[0].sizeimage;
1013 		dev_dbg(inst->dev->dev, "%s: size[0]: %u\n", __func__, sizes[0]);
1014 	} else if (q->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
1015 		if (*num_buffers < inst->fbc_buf_count)
1016 			*num_buffers = inst->fbc_buf_count;
1017 
1018 		if (*num_planes == 1) {
1019 			if (inst->output_format == FORMAT_422)
1020 				sizes[0] = inst_format.width * inst_format.height * 2;
1021 			else
1022 				sizes[0] = inst_format.width * inst_format.height * 3 / 2;
1023 			dev_dbg(inst->dev->dev, "%s: size[0]: %u\n", __func__, sizes[0]);
1024 		} else if (*num_planes == 2) {
1025 			sizes[0] = inst_format.width * inst_format.height;
1026 			if (inst->output_format == FORMAT_422)
1027 				sizes[1] = inst_format.width * inst_format.height;
1028 			else
1029 				sizes[1] = inst_format.width * inst_format.height / 2;
1030 			dev_dbg(inst->dev->dev, "%s: size[0]: %u | size[1]: %u\n",
1031 				__func__, sizes[0], sizes[1]);
1032 		} else if (*num_planes == 3) {
1033 			sizes[0] = inst_format.width * inst_format.height;
1034 			if (inst->output_format == FORMAT_422) {
1035 				sizes[1] = inst_format.width * inst_format.height / 2;
1036 				sizes[2] = inst_format.width * inst_format.height / 2;
1037 			} else {
1038 				sizes[1] = inst_format.width * inst_format.height / 4;
1039 				sizes[2] = inst_format.width * inst_format.height / 4;
1040 			}
1041 			dev_dbg(inst->dev->dev, "%s: size[0]: %u | size[1]: %u | size[2]: %u\n",
1042 				__func__, sizes[0], sizes[1], sizes[2]);
1043 		}
1044 	}
1045 
1046 	return 0;
1047 }
1048 
1049 static int wave5_prepare_fb(struct vpu_instance *inst)
1050 {
1051 	int linear_num;
1052 	int non_linear_num;
1053 	int fb_stride = 0, fb_height = 0;
1054 	int luma_size, chroma_size;
1055 	int ret, i;
1056 	struct v4l2_m2m_buffer *buf, *n;
1057 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1058 
1059 	linear_num = v4l2_m2m_num_dst_bufs_ready(m2m_ctx);
1060 	non_linear_num = inst->fbc_buf_count;
1061 
1062 	for (i = 0; i < non_linear_num; i++) {
1063 		struct frame_buffer *frame = &inst->frame_buf[i];
1064 		struct vpu_buf *vframe = &inst->frame_vbuf[i];
1065 
1066 		fb_stride = inst->dst_fmt.width;
1067 		fb_height = ALIGN(inst->dst_fmt.height, 32);
1068 		luma_size = fb_stride * fb_height;
1069 
1070 		chroma_size = ALIGN(fb_stride / 2, 16) * fb_height;
1071 
1072 		if (vframe->size == (luma_size + chroma_size))
1073 			continue;
1074 
1075 		if (vframe->size)
1076 			wave5_vpu_dec_reset_framebuffer(inst, i);
1077 
1078 		vframe->size = luma_size + chroma_size;
1079 		ret = wave5_vdi_allocate_dma_memory(inst->dev, vframe);
1080 		if (ret) {
1081 			dev_dbg(inst->dev->dev,
1082 				"%s: Allocating FBC buf of size %zu, fail: %d\n",
1083 				__func__, vframe->size, ret);
1084 			return ret;
1085 		}
1086 
1087 		frame->buf_y = vframe->daddr;
1088 		frame->buf_cb = vframe->daddr + luma_size;
1089 		frame->buf_cr = (dma_addr_t)-1;
1090 		frame->size = vframe->size;
1091 		frame->width = inst->src_fmt.width;
1092 		frame->stride = fb_stride;
1093 		frame->map_type = COMPRESSED_FRAME_MAP;
1094 		frame->update_fb_info = true;
1095 	}
1096 	/* In case the count has reduced, clean up leftover framebuffer memory */
1097 	for (i = non_linear_num; i < MAX_REG_FRAME; i++) {
1098 		ret = wave5_vpu_dec_reset_framebuffer(inst, i);
1099 		if (ret)
1100 			break;
1101 	}
1102 
1103 	for (i = 0; i < linear_num; i++) {
1104 		struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1105 		struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
1106 		struct vb2_buffer *vb = vb2_get_buffer(dst_vq, i);
1107 		struct frame_buffer *frame = &inst->frame_buf[non_linear_num + i];
1108 		dma_addr_t buf_addr_y = 0, buf_addr_cb = 0, buf_addr_cr = 0;
1109 		u32 buf_size = 0;
1110 		u32 fb_stride = inst->dst_fmt.width;
1111 		u32 luma_size = fb_stride * inst->dst_fmt.height;
1112 		u32 chroma_size;
1113 
1114 		if (inst->output_format == FORMAT_422)
1115 			chroma_size = fb_stride * inst->dst_fmt.height / 2;
1116 		else
1117 			chroma_size = fb_stride * inst->dst_fmt.height / 4;
1118 
1119 		if (inst->dst_fmt.num_planes == 1) {
1120 			buf_size = vb2_plane_size(vb, 0);
1121 			buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0);
1122 			buf_addr_cb = buf_addr_y + luma_size;
1123 			buf_addr_cr = buf_addr_cb + chroma_size;
1124 		} else if (inst->dst_fmt.num_planes == 2) {
1125 			buf_size = vb2_plane_size(vb, 0) +
1126 				vb2_plane_size(vb, 1);
1127 			buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0);
1128 			buf_addr_cb = vb2_dma_contig_plane_dma_addr(vb, 1);
1129 			buf_addr_cr = buf_addr_cb + chroma_size;
1130 		} else if (inst->dst_fmt.num_planes == 3) {
1131 			buf_size = vb2_plane_size(vb, 0) +
1132 				vb2_plane_size(vb, 1) +
1133 				vb2_plane_size(vb, 2);
1134 			buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0);
1135 			buf_addr_cb = vb2_dma_contig_plane_dma_addr(vb, 1);
1136 			buf_addr_cr = vb2_dma_contig_plane_dma_addr(vb, 2);
1137 		}
1138 
1139 		frame->buf_y = buf_addr_y;
1140 		frame->buf_cb = buf_addr_cb;
1141 		frame->buf_cr = buf_addr_cr;
1142 		frame->size = buf_size;
1143 		frame->width = inst->src_fmt.width;
1144 		frame->stride = fb_stride;
1145 		frame->map_type = LINEAR_FRAME_MAP;
1146 		frame->update_fb_info = true;
1147 	}
1148 
1149 	ret = wave5_vpu_dec_register_frame_buffer_ex(inst, non_linear_num, linear_num,
1150 						     fb_stride, inst->dst_fmt.height);
1151 	if (ret) {
1152 		dev_dbg(inst->dev->dev, "%s: vpu_dec_register_frame_buffer_ex fail: %d",
1153 			__func__, ret);
1154 		return ret;
1155 	}
1156 
1157 	/*
1158 	 * Mark all frame buffers as out of display, to avoid using them before
1159 	 * the application have them queued.
1160 	 */
1161 	for (i = 0; i < v4l2_m2m_num_dst_bufs_ready(m2m_ctx); i++) {
1162 		ret = wave5_vpu_dec_set_disp_flag(inst, i);
1163 		if (ret) {
1164 			dev_dbg(inst->dev->dev,
1165 				"%s: Setting display flag of buf index: %u, fail: %d\n",
1166 				__func__, i, ret);
1167 		}
1168 	}
1169 
1170 	v4l2_m2m_for_each_dst_buf_safe(m2m_ctx, buf, n) {
1171 		struct vb2_v4l2_buffer *vbuf = &buf->vb;
1172 
1173 		ret = wave5_vpu_dec_clr_disp_flag(inst, vbuf->vb2_buf.index);
1174 		if (ret)
1175 			dev_dbg(inst->dev->dev,
1176 				"%s: Clearing display flag of buf index: %u, fail: %d\n",
1177 				__func__, i, ret);
1178 	}
1179 
1180 	return 0;
1181 }
1182 
1183 static int write_to_ringbuffer(struct vpu_instance *inst, void *buffer, size_t buffer_size,
1184 			       struct vpu_buf *ring_buffer, dma_addr_t wr_ptr)
1185 {
1186 	size_t size;
1187 	size_t offset = wr_ptr - ring_buffer->daddr;
1188 	int ret;
1189 
1190 	if (wr_ptr + buffer_size > ring_buffer->daddr + ring_buffer->size) {
1191 		size = ring_buffer->daddr + ring_buffer->size - wr_ptr;
1192 		ret = wave5_vdi_write_memory(inst->dev, ring_buffer, offset, (u8 *)buffer, size);
1193 		if (ret < 0)
1194 			return ret;
1195 
1196 		ret = wave5_vdi_write_memory(inst->dev, ring_buffer, 0, (u8 *)buffer + size,
1197 					     buffer_size - size);
1198 		if (ret < 0)
1199 			return ret;
1200 	} else {
1201 		ret = wave5_vdi_write_memory(inst->dev, ring_buffer, offset, (u8 *)buffer,
1202 					     buffer_size);
1203 		if (ret < 0)
1204 			return ret;
1205 	}
1206 
1207 	return 0;
1208 }
1209 
1210 static int fill_ringbuffer(struct vpu_instance *inst)
1211 {
1212 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1213 	struct v4l2_m2m_buffer *buf, *n;
1214 	int ret;
1215 
1216 	if (m2m_ctx->last_src_buf)  {
1217 		struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(m2m_ctx->last_src_buf);
1218 
1219 		if (vpu_buf->consumed) {
1220 			dev_dbg(inst->dev->dev, "last src buffer already written\n");
1221 			return 0;
1222 		}
1223 	}
1224 
1225 	v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buf, n) {
1226 		struct vb2_v4l2_buffer *vbuf = &buf->vb;
1227 		struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(vbuf);
1228 		struct vpu_buf *ring_buffer = &inst->bitstream_vbuf;
1229 		size_t src_size = vb2_get_plane_payload(&vbuf->vb2_buf, 0);
1230 		void *src_buf = vb2_plane_vaddr(&vbuf->vb2_buf, 0);
1231 		dma_addr_t rd_ptr = 0;
1232 		dma_addr_t wr_ptr = 0;
1233 		size_t remain_size = 0;
1234 
1235 		if (vpu_buf->consumed) {
1236 			dev_dbg(inst->dev->dev, "already copied src buf (%u) to the ring buffer\n",
1237 				vbuf->vb2_buf.index);
1238 			continue;
1239 		}
1240 
1241 		if (!src_buf) {
1242 			dev_dbg(inst->dev->dev,
1243 				"%s: Acquiring kernel pointer to src buf (%u), fail\n",
1244 				__func__, vbuf->vb2_buf.index);
1245 			break;
1246 		}
1247 
1248 		ret = wave5_vpu_dec_get_bitstream_buffer(inst, &rd_ptr, &wr_ptr, &remain_size);
1249 		if (ret) {
1250 			/* Unable to acquire the mutex */
1251 			dev_err(inst->dev->dev, "Getting the bitstream buffer, fail: %d\n",
1252 				ret);
1253 			return ret;
1254 		}
1255 
1256 		dev_dbg(inst->dev->dev, "%s: rd_ptr %pad wr_ptr %pad", __func__, &rd_ptr, &wr_ptr);
1257 
1258 		if (remain_size < src_size) {
1259 			dev_dbg(inst->dev->dev,
1260 				"%s: remaining size: %zu < source size: %zu for src buf (%u)\n",
1261 				__func__, remain_size, src_size, vbuf->vb2_buf.index);
1262 			break;
1263 		}
1264 
1265 		ret = write_to_ringbuffer(inst, src_buf, src_size, ring_buffer, wr_ptr);
1266 		if (ret) {
1267 			dev_err(inst->dev->dev, "Write src buf (%u) to ring buffer, fail: %d\n",
1268 				vbuf->vb2_buf.index, ret);
1269 			return ret;
1270 		}
1271 
1272 		ret = wave5_vpu_dec_update_bitstream_buffer(inst, src_size);
1273 		if (ret) {
1274 			dev_dbg(inst->dev->dev,
1275 				"update_bitstream_buffer fail: %d for src buf (%u)\n",
1276 				ret, vbuf->vb2_buf.index);
1277 			break;
1278 		}
1279 
1280 		vpu_buf->consumed = true;
1281 
1282 		/* Don't write buffers passed the last one while draining. */
1283 		if (v4l2_m2m_is_last_draining_src_buf(m2m_ctx, vbuf)) {
1284 			dev_dbg(inst->dev->dev, "last src buffer written to the ring buffer\n");
1285 			break;
1286 		}
1287 	}
1288 
1289 	return 0;
1290 }
1291 
1292 static void wave5_vpu_dec_buf_queue_src(struct vb2_buffer *vb)
1293 {
1294 	struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue);
1295 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1296 	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
1297 	struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(vbuf);
1298 
1299 	vpu_buf->consumed = false;
1300 	vbuf->sequence = inst->queued_src_buf_num++;
1301 
1302 	v4l2_m2m_buf_queue(m2m_ctx, vbuf);
1303 }
1304 
1305 static void wave5_vpu_dec_buf_queue_dst(struct vb2_buffer *vb)
1306 {
1307 	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
1308 	struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue);
1309 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1310 
1311 	vbuf->sequence = inst->queued_dst_buf_num++;
1312 
1313 	if (inst->state == VPU_INST_STATE_PIC_RUN) {
1314 		struct vpu_dst_buffer *vpu_buf = wave5_to_vpu_dst_buf(vbuf);
1315 		int ret;
1316 
1317 		/*
1318 		 * The buffer is already registered just clear the display flag
1319 		 * to let the firmware know it can be used.
1320 		 */
1321 		vpu_buf->display = false;
1322 		ret = wave5_vpu_dec_clr_disp_flag(inst, vb->index);
1323 		if (ret) {
1324 			dev_dbg(inst->dev->dev,
1325 				"%s: Clearing the display flag of buffer index: %u, fail: %d\n",
1326 				__func__, vb->index, ret);
1327 		}
1328 	}
1329 
1330 	if (vb2_is_streaming(vb->vb2_queue) && v4l2_m2m_dst_buf_is_last(m2m_ctx)) {
1331 		unsigned int i;
1332 
1333 		for (i = 0; i < vb->num_planes; i++)
1334 			vb2_set_plane_payload(vb, i, 0);
1335 
1336 		vbuf->field = V4L2_FIELD_NONE;
1337 
1338 		send_eos_event(inst);
1339 		v4l2_m2m_last_buffer_done(m2m_ctx, vbuf);
1340 	} else {
1341 		v4l2_m2m_buf_queue(m2m_ctx, vbuf);
1342 	}
1343 }
1344 
1345 static void wave5_vpu_dec_buf_queue(struct vb2_buffer *vb)
1346 {
1347 	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
1348 	struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue);
1349 
1350 	dev_dbg(inst->dev->dev, "%s: type: %4u index: %4u size: ([0]=%4lu, [1]=%4lu, [2]=%4lu)\n",
1351 		__func__, vb->type, vb->index, vb2_plane_size(&vbuf->vb2_buf, 0),
1352 		vb2_plane_size(&vbuf->vb2_buf, 1), vb2_plane_size(&vbuf->vb2_buf, 2));
1353 
1354 	if (vb->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
1355 		wave5_vpu_dec_buf_queue_src(vb);
1356 	else if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE)
1357 		wave5_vpu_dec_buf_queue_dst(vb);
1358 }
1359 
1360 static int wave5_vpu_dec_allocate_ring_buffer(struct vpu_instance *inst)
1361 {
1362 	int ret;
1363 	struct vpu_buf *ring_buffer = &inst->bitstream_vbuf;
1364 
1365 	ring_buffer->size = ALIGN(inst->src_fmt.plane_fmt[0].sizeimage, 1024) * 4;
1366 	ret = wave5_vdi_allocate_dma_memory(inst->dev, ring_buffer);
1367 	if (ret) {
1368 		dev_dbg(inst->dev->dev, "%s: allocate ring buffer of size %zu fail: %d\n",
1369 			__func__, ring_buffer->size, ret);
1370 		return ret;
1371 	}
1372 
1373 	inst->last_rd_ptr = ring_buffer->daddr;
1374 
1375 	return 0;
1376 }
1377 
1378 static int wave5_vpu_dec_start_streaming(struct vb2_queue *q, unsigned int count)
1379 {
1380 	struct vpu_instance *inst = vb2_get_drv_priv(q);
1381 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1382 	int ret = 0;
1383 
1384 	dev_dbg(inst->dev->dev, "%s: type: %u\n", __func__, q->type);
1385 
1386 	v4l2_m2m_update_start_streaming_state(m2m_ctx, q);
1387 
1388 	if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE && inst->state == VPU_INST_STATE_NONE) {
1389 		struct dec_open_param open_param;
1390 
1391 		memset(&open_param, 0, sizeof(struct dec_open_param));
1392 
1393 		ret = wave5_vpu_dec_allocate_ring_buffer(inst);
1394 		if (ret)
1395 			goto return_buffers;
1396 
1397 		open_param.bitstream_buffer = inst->bitstream_vbuf.daddr;
1398 		open_param.bitstream_buffer_size = inst->bitstream_vbuf.size;
1399 
1400 		ret = wave5_vpu_dec_open(inst, &open_param);
1401 		if (ret) {
1402 			dev_dbg(inst->dev->dev, "%s: decoder opening, fail: %d\n",
1403 				__func__, ret);
1404 			goto free_bitstream_vbuf;
1405 		}
1406 
1407 		ret = switch_state(inst, VPU_INST_STATE_OPEN);
1408 		if (ret)
1409 			goto free_bitstream_vbuf;
1410 	} else if (q->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
1411 		struct dec_initial_info *initial_info =
1412 			&inst->codec_info->dec_info.initial_info;
1413 
1414 		if (inst->state == VPU_INST_STATE_STOP)
1415 			ret = switch_state(inst, VPU_INST_STATE_INIT_SEQ);
1416 		if (ret)
1417 			goto return_buffers;
1418 
1419 		if (inst->state == VPU_INST_STATE_INIT_SEQ) {
1420 			if (initial_info->luma_bitdepth != 8) {
1421 				dev_info(inst->dev->dev, "%s: no support for %d bit depth",
1422 					 __func__, initial_info->luma_bitdepth);
1423 				ret = -EINVAL;
1424 				goto return_buffers;
1425 			}
1426 		}
1427 	}
1428 
1429 	return ret;
1430 
1431 free_bitstream_vbuf:
1432 	wave5_vdi_free_dma_memory(inst->dev, &inst->bitstream_vbuf);
1433 return_buffers:
1434 	wave5_return_bufs(q, VB2_BUF_STATE_QUEUED);
1435 	return ret;
1436 }
1437 
1438 static int streamoff_output(struct vb2_queue *q)
1439 {
1440 	struct vpu_instance *inst = vb2_get_drv_priv(q);
1441 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1442 	struct vb2_v4l2_buffer *buf;
1443 	int ret;
1444 	dma_addr_t new_rd_ptr;
1445 
1446 	while ((buf = v4l2_m2m_src_buf_remove(m2m_ctx))) {
1447 		dev_dbg(inst->dev->dev, "%s: (Multiplanar) buf type %4u | index %4u\n",
1448 			__func__, buf->vb2_buf.type, buf->vb2_buf.index);
1449 		v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR);
1450 	}
1451 
1452 	ret = wave5_vpu_flush_instance(inst);
1453 	if (ret)
1454 		return ret;
1455 
1456 	/* Reset the ring buffer information */
1457 	new_rd_ptr = wave5_vpu_dec_get_rd_ptr(inst);
1458 	inst->last_rd_ptr = new_rd_ptr;
1459 	inst->codec_info->dec_info.stream_rd_ptr = new_rd_ptr;
1460 	inst->codec_info->dec_info.stream_wr_ptr = new_rd_ptr;
1461 
1462 	if (v4l2_m2m_has_stopped(m2m_ctx))
1463 		send_eos_event(inst);
1464 
1465 	/* streamoff on output cancels any draining operation */
1466 	inst->eos = false;
1467 
1468 	return 0;
1469 }
1470 
1471 static int streamoff_capture(struct vb2_queue *q)
1472 {
1473 	struct vpu_instance *inst = vb2_get_drv_priv(q);
1474 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1475 	struct vb2_v4l2_buffer *buf;
1476 	unsigned int i;
1477 	int ret = 0;
1478 
1479 	for (i = 0; i < v4l2_m2m_num_dst_bufs_ready(m2m_ctx); i++) {
1480 		ret = wave5_vpu_dec_set_disp_flag(inst, i);
1481 		if (ret)
1482 			dev_dbg(inst->dev->dev,
1483 				"%s: Setting display flag of buf index: %u, fail: %d\n",
1484 				__func__, i, ret);
1485 	}
1486 
1487 	while ((buf = v4l2_m2m_dst_buf_remove(m2m_ctx))) {
1488 		u32 plane;
1489 
1490 		dev_dbg(inst->dev->dev, "%s: buf type %4u | index %4u\n",
1491 			__func__, buf->vb2_buf.type, buf->vb2_buf.index);
1492 
1493 		for (plane = 0; plane < inst->dst_fmt.num_planes; plane++)
1494 			vb2_set_plane_payload(&buf->vb2_buf, plane, 0);
1495 
1496 		v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR);
1497 	}
1498 
1499 	if (inst->needs_reallocation) {
1500 		wave5_vpu_dec_give_command(inst, DEC_RESET_FRAMEBUF_INFO, NULL);
1501 		inst->needs_reallocation = false;
1502 	}
1503 
1504 	if (v4l2_m2m_has_stopped(m2m_ctx)) {
1505 		ret = switch_state(inst, VPU_INST_STATE_INIT_SEQ);
1506 		if (ret)
1507 			return ret;
1508 	}
1509 
1510 	return 0;
1511 }
1512 
1513 static void wave5_vpu_dec_stop_streaming(struct vb2_queue *q)
1514 {
1515 	struct vpu_instance *inst = vb2_get_drv_priv(q);
1516 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1517 	bool check_cmd = TRUE;
1518 
1519 	dev_dbg(inst->dev->dev, "%s: type: %u\n", __func__, q->type);
1520 
1521 	while (check_cmd) {
1522 		struct queue_status_info q_status;
1523 		struct dec_output_info dec_output_info;
1524 
1525 		wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status);
1526 
1527 		if (q_status.report_queue_count == 0)
1528 			break;
1529 
1530 		if (wave5_vpu_wait_interrupt(inst, VPU_DEC_TIMEOUT) < 0)
1531 			break;
1532 
1533 		if (wave5_vpu_dec_get_output_info(inst, &dec_output_info))
1534 			dev_dbg(inst->dev->dev, "Getting decoding results from fw, fail\n");
1535 	}
1536 
1537 	v4l2_m2m_update_stop_streaming_state(m2m_ctx, q);
1538 
1539 	if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
1540 		streamoff_output(q);
1541 	else
1542 		streamoff_capture(q);
1543 }
1544 
1545 static const struct vb2_ops wave5_vpu_dec_vb2_ops = {
1546 	.queue_setup = wave5_vpu_dec_queue_setup,
1547 	.wait_prepare = vb2_ops_wait_prepare,
1548 	.wait_finish = vb2_ops_wait_finish,
1549 	.buf_queue = wave5_vpu_dec_buf_queue,
1550 	.start_streaming = wave5_vpu_dec_start_streaming,
1551 	.stop_streaming = wave5_vpu_dec_stop_streaming,
1552 };
1553 
1554 static void wave5_set_default_format(struct v4l2_pix_format_mplane *src_fmt,
1555 				     struct v4l2_pix_format_mplane *dst_fmt)
1556 {
1557 	unsigned int dst_pix_fmt = dec_fmt_list[VPU_FMT_TYPE_RAW][0].v4l2_pix_fmt;
1558 	const struct v4l2_format_info *dst_fmt_info = v4l2_format_info(dst_pix_fmt);
1559 
1560 	src_fmt->pixelformat = dec_fmt_list[VPU_FMT_TYPE_CODEC][0].v4l2_pix_fmt;
1561 	src_fmt->field = V4L2_FIELD_NONE;
1562 	src_fmt->flags = 0;
1563 	src_fmt->num_planes = 1;
1564 	wave5_update_pix_fmt(src_fmt, 720, 480);
1565 
1566 	dst_fmt->pixelformat = dst_pix_fmt;
1567 	dst_fmt->field = V4L2_FIELD_NONE;
1568 	dst_fmt->flags = 0;
1569 	dst_fmt->num_planes = dst_fmt_info->mem_planes;
1570 	wave5_update_pix_fmt(dst_fmt, 736, 480);
1571 }
1572 
1573 static int wave5_vpu_dec_queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq)
1574 {
1575 	return wave5_vpu_queue_init(priv, src_vq, dst_vq, &wave5_vpu_dec_vb2_ops);
1576 }
1577 
1578 static const struct vpu_instance_ops wave5_vpu_dec_inst_ops = {
1579 	.finish_process = wave5_vpu_dec_finish_decode,
1580 };
1581 
1582 static int initialize_sequence(struct vpu_instance *inst)
1583 {
1584 	struct dec_initial_info initial_info;
1585 	int ret = 0;
1586 
1587 	memset(&initial_info, 0, sizeof(struct dec_initial_info));
1588 
1589 	ret = wave5_vpu_dec_issue_seq_init(inst);
1590 	if (ret) {
1591 		dev_dbg(inst->dev->dev, "%s: wave5_vpu_dec_issue_seq_init, fail: %d\n",
1592 			__func__, ret);
1593 		return ret;
1594 	}
1595 
1596 	if (wave5_vpu_wait_interrupt(inst, VPU_DEC_TIMEOUT) < 0)
1597 		dev_dbg(inst->dev->dev, "%s: failed to call vpu_wait_interrupt()\n", __func__);
1598 
1599 	ret = wave5_vpu_dec_complete_seq_init(inst, &initial_info);
1600 	if (ret) {
1601 		dev_dbg(inst->dev->dev, "%s: vpu_dec_complete_seq_init, fail: %d, reason: %u\n",
1602 			__func__, ret, initial_info.seq_init_err_reason);
1603 		wave5_handle_src_buffer(inst, initial_info.rd_ptr);
1604 		return ret;
1605 	}
1606 
1607 	handle_dynamic_resolution_change(inst);
1608 
1609 	return 0;
1610 }
1611 
1612 static bool wave5_is_draining_or_eos(struct vpu_instance *inst)
1613 {
1614 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1615 
1616 	lockdep_assert_held(&inst->state_spinlock);
1617 	return m2m_ctx->is_draining || inst->eos;
1618 }
1619 
1620 static void wave5_vpu_dec_device_run(void *priv)
1621 {
1622 	struct vpu_instance *inst = priv;
1623 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1624 	struct queue_status_info q_status;
1625 	u32 fail_res = 0;
1626 	int ret = 0;
1627 
1628 	dev_dbg(inst->dev->dev, "%s: Fill the ring buffer with new bitstream data", __func__);
1629 
1630 	ret = fill_ringbuffer(inst);
1631 	if (ret) {
1632 		dev_warn(inst->dev->dev, "Filling ring buffer failed\n");
1633 		goto finish_job_and_return;
1634 	}
1635 
1636 	switch (inst->state) {
1637 	case VPU_INST_STATE_OPEN:
1638 		ret = initialize_sequence(inst);
1639 		if (ret) {
1640 			unsigned long flags;
1641 
1642 			spin_lock_irqsave(&inst->state_spinlock, flags);
1643 			if (wave5_is_draining_or_eos(inst) &&
1644 			    wave5_last_src_buffer_consumed(m2m_ctx)) {
1645 				struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
1646 
1647 				switch_state(inst, VPU_INST_STATE_STOP);
1648 
1649 				if (vb2_is_streaming(dst_vq))
1650 					send_eos_event(inst);
1651 				else
1652 					handle_dynamic_resolution_change(inst);
1653 
1654 				flag_last_buffer_done(inst);
1655 			}
1656 			spin_unlock_irqrestore(&inst->state_spinlock, flags);
1657 		} else {
1658 			switch_state(inst, VPU_INST_STATE_INIT_SEQ);
1659 		}
1660 
1661 		break;
1662 
1663 	case VPU_INST_STATE_INIT_SEQ:
1664 		/*
1665 		 * Do this early, preparing the fb can trigger an IRQ before
1666 		 * we had a chance to switch, which leads to an invalid state
1667 		 * change.
1668 		 */
1669 		switch_state(inst, VPU_INST_STATE_PIC_RUN);
1670 
1671 		/*
1672 		 * During DRC, the picture decoding remains pending, so just leave the job
1673 		 * active until this decode operation completes.
1674 		 */
1675 		wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status);
1676 
1677 		/*
1678 		 * The sequence must be analyzed first to calculate the proper
1679 		 * size of the auxiliary buffers.
1680 		 */
1681 		ret = wave5_prepare_fb(inst);
1682 		if (ret) {
1683 			dev_warn(inst->dev->dev, "Framebuffer preparation, fail: %d\n", ret);
1684 			switch_state(inst, VPU_INST_STATE_STOP);
1685 			break;
1686 		}
1687 
1688 		if (q_status.instance_queue_count) {
1689 			dev_dbg(inst->dev->dev, "%s: leave with active job", __func__);
1690 			return;
1691 		}
1692 
1693 		fallthrough;
1694 	case VPU_INST_STATE_PIC_RUN:
1695 		ret = start_decode(inst, &fail_res);
1696 		if (ret) {
1697 			dev_err(inst->dev->dev,
1698 				"Frame decoding on m2m context (%p), fail: %d (result: %d)\n",
1699 				m2m_ctx, ret, fail_res);
1700 			break;
1701 		}
1702 		/* Return so that we leave this job active */
1703 		dev_dbg(inst->dev->dev, "%s: leave with active job", __func__);
1704 		return;
1705 	default:
1706 		WARN(1, "Execution of a job in state %s illegal.\n", state_to_str(inst->state));
1707 		break;
1708 	}
1709 
1710 finish_job_and_return:
1711 	dev_dbg(inst->dev->dev, "%s: leave and finish job", __func__);
1712 	v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
1713 }
1714 
1715 static void wave5_vpu_dec_job_abort(void *priv)
1716 {
1717 	struct vpu_instance *inst = priv;
1718 	int ret;
1719 
1720 	ret = switch_state(inst, VPU_INST_STATE_STOP);
1721 	if (ret)
1722 		return;
1723 
1724 	ret = wave5_vpu_dec_set_eos_on_firmware(inst);
1725 	if (ret)
1726 		dev_warn(inst->dev->dev,
1727 			 "Setting EOS for the bitstream, fail: %d\n", ret);
1728 }
1729 
1730 static int wave5_vpu_dec_job_ready(void *priv)
1731 {
1732 	struct vpu_instance *inst = priv;
1733 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1734 	unsigned long flags;
1735 	int ret = 0;
1736 
1737 	spin_lock_irqsave(&inst->state_spinlock, flags);
1738 
1739 	switch (inst->state) {
1740 	case VPU_INST_STATE_NONE:
1741 		dev_dbg(inst->dev->dev, "Decoder must be open to start queueing M2M jobs!\n");
1742 		break;
1743 	case VPU_INST_STATE_OPEN:
1744 		if (wave5_is_draining_or_eos(inst) || !v4l2_m2m_has_stopped(m2m_ctx) ||
1745 		    v4l2_m2m_num_src_bufs_ready(m2m_ctx) > 0) {
1746 			ret = 1;
1747 			break;
1748 		}
1749 
1750 		dev_dbg(inst->dev->dev,
1751 			"Decoder must be draining or >= 1 OUTPUT queue buffer must be queued!\n");
1752 		break;
1753 	case VPU_INST_STATE_INIT_SEQ:
1754 	case VPU_INST_STATE_PIC_RUN:
1755 		if (!m2m_ctx->cap_q_ctx.q.streaming) {
1756 			dev_dbg(inst->dev->dev, "CAPTURE queue must be streaming to queue jobs!\n");
1757 			break;
1758 		} else if (v4l2_m2m_num_dst_bufs_ready(m2m_ctx) < (inst->fbc_buf_count - 1)) {
1759 			dev_dbg(inst->dev->dev,
1760 				"No capture buffer ready to decode!\n");
1761 			break;
1762 		} else if (!wave5_is_draining_or_eos(inst) &&
1763 			   !v4l2_m2m_num_src_bufs_ready(m2m_ctx)) {
1764 			dev_dbg(inst->dev->dev,
1765 				"No bitstream data to decode!\n");
1766 			break;
1767 		}
1768 		ret = 1;
1769 		break;
1770 	case VPU_INST_STATE_STOP:
1771 		dev_dbg(inst->dev->dev, "Decoder is stopped, not running.\n");
1772 		break;
1773 	}
1774 
1775 	spin_unlock_irqrestore(&inst->state_spinlock, flags);
1776 
1777 	return ret;
1778 }
1779 
1780 static const struct v4l2_m2m_ops wave5_vpu_dec_m2m_ops = {
1781 	.device_run = wave5_vpu_dec_device_run,
1782 	.job_abort = wave5_vpu_dec_job_abort,
1783 	.job_ready = wave5_vpu_dec_job_ready,
1784 };
1785 
1786 static int wave5_vpu_open_dec(struct file *filp)
1787 {
1788 	struct video_device *vdev = video_devdata(filp);
1789 	struct vpu_device *dev = video_drvdata(filp);
1790 	struct vpu_instance *inst = NULL;
1791 	struct v4l2_m2m_ctx *m2m_ctx;
1792 	int ret = 0;
1793 
1794 	inst = kzalloc(sizeof(*inst), GFP_KERNEL);
1795 	if (!inst)
1796 		return -ENOMEM;
1797 
1798 	inst->dev = dev;
1799 	inst->type = VPU_INST_TYPE_DEC;
1800 	inst->ops = &wave5_vpu_dec_inst_ops;
1801 
1802 	spin_lock_init(&inst->state_spinlock);
1803 
1804 	inst->codec_info = kzalloc(sizeof(*inst->codec_info), GFP_KERNEL);
1805 	if (!inst->codec_info)
1806 		return -ENOMEM;
1807 
1808 	v4l2_fh_init(&inst->v4l2_fh, vdev);
1809 	filp->private_data = &inst->v4l2_fh;
1810 	v4l2_fh_add(&inst->v4l2_fh);
1811 
1812 	INIT_LIST_HEAD(&inst->list);
1813 	list_add_tail(&inst->list, &dev->instances);
1814 
1815 	inst->v4l2_m2m_dev = inst->dev->v4l2_m2m_dec_dev;
1816 	inst->v4l2_fh.m2m_ctx =
1817 		v4l2_m2m_ctx_init(inst->v4l2_m2m_dev, inst, wave5_vpu_dec_queue_init);
1818 	if (IS_ERR(inst->v4l2_fh.m2m_ctx)) {
1819 		ret = PTR_ERR(inst->v4l2_fh.m2m_ctx);
1820 		goto cleanup_inst;
1821 	}
1822 	m2m_ctx = inst->v4l2_fh.m2m_ctx;
1823 
1824 	v4l2_m2m_set_src_buffered(m2m_ctx, true);
1825 	v4l2_m2m_set_dst_buffered(m2m_ctx, true);
1826 	/*
1827 	 * We use the M2M job queue to ensure synchronization of steps where
1828 	 * needed, as IOCTLs can occur at anytime and we need to run commands on
1829 	 * the firmware in a specified order.
1830 	 * In order to initialize the sequence on the firmware within an M2M
1831 	 * job, the M2M framework needs to be able to queue jobs before
1832 	 * the CAPTURE queue has been started, because we need the results of the
1833 	 * initialization to properly prepare the CAPTURE queue with the correct
1834 	 * amount of buffers.
1835 	 * By setting ignore_cap_streaming to true the m2m framework will call
1836 	 * job_ready as soon as the OUTPUT queue is streaming, instead of
1837 	 * waiting until both the CAPTURE and OUTPUT queues are streaming.
1838 	 */
1839 	m2m_ctx->ignore_cap_streaming = true;
1840 
1841 	v4l2_ctrl_handler_init(&inst->v4l2_ctrl_hdl, 10);
1842 	v4l2_ctrl_new_std(&inst->v4l2_ctrl_hdl, NULL,
1843 			  V4L2_CID_MIN_BUFFERS_FOR_CAPTURE, 1, 32, 1, 1);
1844 
1845 	if (inst->v4l2_ctrl_hdl.error) {
1846 		ret = -ENODEV;
1847 		goto cleanup_inst;
1848 	}
1849 
1850 	inst->v4l2_fh.ctrl_handler = &inst->v4l2_ctrl_hdl;
1851 	v4l2_ctrl_handler_setup(&inst->v4l2_ctrl_hdl);
1852 
1853 	wave5_set_default_format(&inst->src_fmt, &inst->dst_fmt);
1854 	inst->colorspace = V4L2_COLORSPACE_REC709;
1855 	inst->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
1856 	inst->quantization = V4L2_QUANTIZATION_DEFAULT;
1857 	inst->xfer_func = V4L2_XFER_FUNC_DEFAULT;
1858 
1859 	init_completion(&inst->irq_done);
1860 
1861 	inst->id = ida_alloc(&inst->dev->inst_ida, GFP_KERNEL);
1862 	if (inst->id < 0) {
1863 		dev_warn(inst->dev->dev, "Allocating instance ID, fail: %d\n", inst->id);
1864 		ret = inst->id;
1865 		goto cleanup_inst;
1866 	}
1867 
1868 	wave5_vdi_allocate_sram(inst->dev);
1869 
1870 	return 0;
1871 
1872 cleanup_inst:
1873 	wave5_cleanup_instance(inst);
1874 	return ret;
1875 }
1876 
1877 static int wave5_vpu_dec_release(struct file *filp)
1878 {
1879 	return wave5_vpu_release_device(filp, wave5_vpu_dec_close, "decoder");
1880 }
1881 
1882 static const struct v4l2_file_operations wave5_vpu_dec_fops = {
1883 	.owner = THIS_MODULE,
1884 	.open = wave5_vpu_open_dec,
1885 	.release = wave5_vpu_dec_release,
1886 	.unlocked_ioctl = video_ioctl2,
1887 	.poll = v4l2_m2m_fop_poll,
1888 	.mmap = v4l2_m2m_fop_mmap,
1889 };
1890 
1891 int wave5_vpu_dec_register_device(struct vpu_device *dev)
1892 {
1893 	struct video_device *vdev_dec;
1894 	int ret;
1895 
1896 	vdev_dec = devm_kzalloc(dev->v4l2_dev.dev, sizeof(*vdev_dec), GFP_KERNEL);
1897 	if (!vdev_dec)
1898 		return -ENOMEM;
1899 
1900 	dev->v4l2_m2m_dec_dev = v4l2_m2m_init(&wave5_vpu_dec_m2m_ops);
1901 	if (IS_ERR(dev->v4l2_m2m_dec_dev)) {
1902 		ret = PTR_ERR(dev->v4l2_m2m_dec_dev);
1903 		dev_err(dev->dev, "v4l2_m2m_init, fail: %d\n", ret);
1904 		return -EINVAL;
1905 	}
1906 
1907 	dev->video_dev_dec = vdev_dec;
1908 
1909 	strscpy(vdev_dec->name, VPU_DEC_DEV_NAME, sizeof(vdev_dec->name));
1910 	vdev_dec->fops = &wave5_vpu_dec_fops;
1911 	vdev_dec->ioctl_ops = &wave5_vpu_dec_ioctl_ops;
1912 	vdev_dec->release = video_device_release_empty;
1913 	vdev_dec->v4l2_dev = &dev->v4l2_dev;
1914 	vdev_dec->vfl_dir = VFL_DIR_M2M;
1915 	vdev_dec->device_caps = V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_STREAMING;
1916 	vdev_dec->lock = &dev->dev_lock;
1917 
1918 	ret = video_register_device(vdev_dec, VFL_TYPE_VIDEO, -1);
1919 	if (ret)
1920 		return ret;
1921 
1922 	video_set_drvdata(vdev_dec, dev);
1923 
1924 	return 0;
1925 }
1926 
1927 void wave5_vpu_dec_unregister_device(struct vpu_device *dev)
1928 {
1929 	video_unregister_device(dev->video_dev_dec);
1930 	if (dev->v4l2_m2m_dec_dev)
1931 		v4l2_m2m_release(dev->v4l2_m2m_dec_dev);
1932 }
1933