xref: /linux/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c (revision b615879dbfea6cf1236acbc3f2fb25ae84e07071)
1 // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
2 /*
3  * Wave5 series multi-standard codec IP - decoder interface
4  *
5  * Copyright (C) 2021-2023 CHIPS&MEDIA INC
6  */
7 
8 #include <linux/pm_runtime.h>
9 #include "wave5-helper.h"
10 
11 #define VPU_DEC_DEV_NAME "C&M Wave5 VPU decoder"
12 #define VPU_DEC_DRV_NAME "wave5-dec"
13 
14 static const struct v4l2_frmsize_stepwise dec_hevc_frmsize = {
15 	.min_width = W5_MIN_DEC_PIC_8_WIDTH,
16 	.max_width = W5_MAX_DEC_PIC_WIDTH,
17 	.step_width = W5_DEC_CODEC_STEP_WIDTH,
18 	.min_height = W5_MIN_DEC_PIC_8_HEIGHT,
19 	.max_height = W5_MAX_DEC_PIC_HEIGHT,
20 	.step_height = W5_DEC_CODEC_STEP_HEIGHT,
21 };
22 
23 static const struct v4l2_frmsize_stepwise dec_h264_frmsize = {
24 	.min_width = W5_MIN_DEC_PIC_32_WIDTH,
25 	.max_width = W5_MAX_DEC_PIC_WIDTH,
26 	.step_width = W5_DEC_CODEC_STEP_WIDTH,
27 	.min_height = W5_MIN_DEC_PIC_32_HEIGHT,
28 	.max_height = W5_MAX_DEC_PIC_HEIGHT,
29 	.step_height = W5_DEC_CODEC_STEP_HEIGHT,
30 };
31 
32 static const struct v4l2_frmsize_stepwise dec_raw_frmsize = {
33 	.min_width = W5_MIN_DEC_PIC_8_WIDTH,
34 	.max_width = W5_MAX_DEC_PIC_WIDTH,
35 	.step_width = W5_DEC_RAW_STEP_WIDTH,
36 	.min_height = W5_MIN_DEC_PIC_8_HEIGHT,
37 	.max_height = W5_MAX_DEC_PIC_HEIGHT,
38 	.step_height = W5_DEC_RAW_STEP_HEIGHT,
39 };
40 
41 static const struct vpu_format dec_fmt_list[FMT_TYPES][MAX_FMTS] = {
42 	[VPU_FMT_TYPE_CODEC] = {
43 		{
44 			.v4l2_pix_fmt = V4L2_PIX_FMT_HEVC,
45 			.v4l2_frmsize = &dec_hevc_frmsize,
46 		},
47 		{
48 			.v4l2_pix_fmt = V4L2_PIX_FMT_H264,
49 			.v4l2_frmsize = &dec_h264_frmsize,
50 		},
51 	},
52 	[VPU_FMT_TYPE_RAW] = {
53 		{
54 			.v4l2_pix_fmt = V4L2_PIX_FMT_YUV420,
55 			.v4l2_frmsize = &dec_raw_frmsize,
56 		},
57 		{
58 			.v4l2_pix_fmt = V4L2_PIX_FMT_NV12,
59 			.v4l2_frmsize = &dec_raw_frmsize,
60 		},
61 		{
62 			.v4l2_pix_fmt = V4L2_PIX_FMT_NV21,
63 			.v4l2_frmsize = &dec_raw_frmsize,
64 		},
65 		{
66 			.v4l2_pix_fmt = V4L2_PIX_FMT_YUV422P,
67 			.v4l2_frmsize = &dec_raw_frmsize,
68 		},
69 		{
70 			.v4l2_pix_fmt = V4L2_PIX_FMT_NV16,
71 			.v4l2_frmsize = &dec_raw_frmsize,
72 		},
73 		{
74 			.v4l2_pix_fmt = V4L2_PIX_FMT_NV61,
75 			.v4l2_frmsize = &dec_raw_frmsize,
76 		},
77 		{
78 			.v4l2_pix_fmt = V4L2_PIX_FMT_YUV420M,
79 			.v4l2_frmsize = &dec_raw_frmsize,
80 		},
81 		{
82 			.v4l2_pix_fmt = V4L2_PIX_FMT_NV12M,
83 			.v4l2_frmsize = &dec_raw_frmsize,
84 		},
85 		{
86 			.v4l2_pix_fmt = V4L2_PIX_FMT_NV21M,
87 			.v4l2_frmsize = &dec_raw_frmsize,
88 		},
89 		{
90 			.v4l2_pix_fmt = V4L2_PIX_FMT_YUV422M,
91 			.v4l2_frmsize = &dec_raw_frmsize,
92 		},
93 		{
94 			.v4l2_pix_fmt = V4L2_PIX_FMT_NV16M,
95 			.v4l2_frmsize = &dec_raw_frmsize,
96 		},
97 		{
98 			.v4l2_pix_fmt = V4L2_PIX_FMT_NV61M,
99 			.v4l2_frmsize = &dec_raw_frmsize,
100 		},
101 	}
102 };
103 
104 /*
105  * Make sure that the state switch is allowed and add logging for debugging
106  * purposes
107  */
108 static int switch_state(struct vpu_instance *inst, enum vpu_instance_state state)
109 {
110 	switch (state) {
111 	case VPU_INST_STATE_NONE:
112 		break;
113 	case VPU_INST_STATE_OPEN:
114 		if (inst->state != VPU_INST_STATE_NONE)
115 			goto invalid_state_switch;
116 		goto valid_state_switch;
117 	case VPU_INST_STATE_INIT_SEQ:
118 		if (inst->state != VPU_INST_STATE_OPEN && inst->state != VPU_INST_STATE_STOP)
119 			goto invalid_state_switch;
120 		goto valid_state_switch;
121 	case VPU_INST_STATE_PIC_RUN:
122 		if (inst->state != VPU_INST_STATE_INIT_SEQ)
123 			goto invalid_state_switch;
124 		goto valid_state_switch;
125 	case VPU_INST_STATE_STOP:
126 		goto valid_state_switch;
127 	}
128 invalid_state_switch:
129 	WARN(1, "Invalid state switch from %s to %s.\n",
130 	     state_to_str(inst->state), state_to_str(state));
131 	return -EINVAL;
132 valid_state_switch:
133 	dev_dbg(inst->dev->dev, "Switch state from %s to %s.\n",
134 		state_to_str(inst->state), state_to_str(state));
135 	inst->state = state;
136 	return 0;
137 }
138 
139 static int wave5_vpu_dec_set_eos_on_firmware(struct vpu_instance *inst)
140 {
141 	int ret;
142 
143 	ret = wave5_vpu_dec_update_bitstream_buffer(inst, 0);
144 	if (ret) {
145 		/*
146 		 * To set the EOS flag, a command is sent to the firmware.
147 		 * That command may never return (timeout) or may report an error.
148 		 */
149 		dev_err(inst->dev->dev,
150 			"Setting EOS for the bitstream, fail: %d\n", ret);
151 		return ret;
152 	}
153 	return 0;
154 }
155 
156 static bool wave5_last_src_buffer_consumed(struct v4l2_m2m_ctx *m2m_ctx)
157 {
158 	struct vpu_src_buffer *vpu_buf;
159 
160 	if (!m2m_ctx->last_src_buf)
161 		return false;
162 
163 	vpu_buf = wave5_to_vpu_src_buf(m2m_ctx->last_src_buf);
164 	return vpu_buf->consumed;
165 }
166 
167 static void wave5_handle_src_buffer(struct vpu_instance *inst, dma_addr_t rd_ptr)
168 {
169 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
170 	struct v4l2_m2m_buffer *buf, *n;
171 	size_t consumed_bytes = 0;
172 
173 	if (rd_ptr >= inst->last_rd_ptr) {
174 		consumed_bytes = rd_ptr - inst->last_rd_ptr;
175 	} else {
176 		size_t rd_offs = rd_ptr - inst->bitstream_vbuf.daddr;
177 		size_t last_rd_offs = inst->last_rd_ptr - inst->bitstream_vbuf.daddr;
178 
179 		consumed_bytes = rd_offs + (inst->bitstream_vbuf.size - last_rd_offs);
180 	}
181 
182 	inst->last_rd_ptr = rd_ptr;
183 	consumed_bytes += inst->remaining_consumed_bytes;
184 
185 	dev_dbg(inst->dev->dev, "%s: %zu bytes of bitstream was consumed", __func__,
186 		consumed_bytes);
187 
188 	v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buf, n) {
189 		struct vb2_v4l2_buffer *src_buf = &buf->vb;
190 		size_t src_size = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
191 
192 		if (src_size > consumed_bytes)
193 			break;
194 
195 		dev_dbg(inst->dev->dev, "%s: removing src buffer %i",
196 			__func__, src_buf->vb2_buf.index);
197 		src_buf = v4l2_m2m_src_buf_remove(m2m_ctx);
198 		inst->timestamp = src_buf->vb2_buf.timestamp;
199 		v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
200 		consumed_bytes -= src_size;
201 
202 		/* Handle the case the last bitstream buffer has been picked */
203 		if (src_buf == m2m_ctx->last_src_buf) {
204 			int ret;
205 
206 			m2m_ctx->last_src_buf = NULL;
207 			ret = wave5_vpu_dec_set_eos_on_firmware(inst);
208 			if (ret)
209 				dev_warn(inst->dev->dev,
210 					 "Setting EOS for the bitstream, fail: %d\n", ret);
211 			break;
212 		}
213 	}
214 
215 	inst->remaining_consumed_bytes = consumed_bytes;
216 }
217 
218 static int start_decode(struct vpu_instance *inst, u32 *fail_res)
219 {
220 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
221 	int ret = 0;
222 
223 	ret = wave5_vpu_dec_start_one_frame(inst, fail_res);
224 	if (ret) {
225 		struct vb2_v4l2_buffer *src_buf;
226 
227 		src_buf = v4l2_m2m_src_buf_remove(m2m_ctx);
228 		if (src_buf)
229 			v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR);
230 		switch_state(inst, VPU_INST_STATE_STOP);
231 
232 		dev_dbg(inst->dev->dev, "%s: pic run failed / finish job", __func__);
233 		v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
234 	}
235 
236 	return ret;
237 }
238 
239 static void flag_last_buffer_done(struct vpu_instance *inst)
240 {
241 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
242 	struct vb2_v4l2_buffer *vb;
243 	int i;
244 
245 	lockdep_assert_held(&inst->state_spinlock);
246 
247 	vb = v4l2_m2m_dst_buf_remove(m2m_ctx);
248 	if (!vb) {
249 		m2m_ctx->is_draining = true;
250 		m2m_ctx->next_buf_last = true;
251 		return;
252 	}
253 
254 	for (i = 0; i < vb->vb2_buf.num_planes; i++)
255 		vb2_set_plane_payload(&vb->vb2_buf, i, 0);
256 	vb->field = V4L2_FIELD_NONE;
257 
258 	v4l2_m2m_last_buffer_done(m2m_ctx, vb);
259 }
260 
261 static void send_eos_event(struct vpu_instance *inst)
262 {
263 	static const struct v4l2_event vpu_event_eos = {
264 		.type = V4L2_EVENT_EOS
265 	};
266 
267 	lockdep_assert_held(&inst->state_spinlock);
268 
269 	v4l2_event_queue_fh(&inst->v4l2_fh, &vpu_event_eos);
270 	inst->eos = false;
271 }
272 
273 static int handle_dynamic_resolution_change(struct vpu_instance *inst)
274 {
275 	struct v4l2_fh *fh = &inst->v4l2_fh;
276 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
277 
278 	static const struct v4l2_event vpu_event_src_ch = {
279 		.type = V4L2_EVENT_SOURCE_CHANGE,
280 		.u.src_change.changes = V4L2_EVENT_SRC_CH_RESOLUTION,
281 	};
282 	struct dec_info *p_dec_info = &inst->codec_info->dec_info;
283 	struct dec_initial_info *initial_info = &inst->codec_info->dec_info.initial_info;
284 
285 	lockdep_assert_held(&inst->state_spinlock);
286 
287 	dev_dbg(inst->dev->dev, "%s: rd_ptr %pad", __func__, &initial_info->rd_ptr);
288 
289 	dev_dbg(inst->dev->dev, "%s: width: %u height: %u profile: %u | minbuffer: %u\n",
290 		__func__, initial_info->pic_width, initial_info->pic_height,
291 		initial_info->profile, initial_info->min_frame_buffer_count);
292 
293 	inst->needs_reallocation = true;
294 	inst->fbc_buf_count = initial_info->min_frame_buffer_count + 1;
295 	if (inst->fbc_buf_count != v4l2_m2m_num_dst_bufs_ready(m2m_ctx)) {
296 		struct v4l2_ctrl *ctrl;
297 
298 		ctrl = v4l2_ctrl_find(&inst->v4l2_ctrl_hdl,
299 				      V4L2_CID_MIN_BUFFERS_FOR_CAPTURE);
300 		if (ctrl)
301 			v4l2_ctrl_s_ctrl(ctrl, inst->fbc_buf_count);
302 	}
303 
304 	if (p_dec_info->initial_info_obtained) {
305 		const struct vpu_format *vpu_fmt;
306 
307 		inst->conf_win.left = initial_info->pic_crop_rect.left;
308 		inst->conf_win.top = initial_info->pic_crop_rect.top;
309 		inst->conf_win.width = initial_info->pic_width -
310 			initial_info->pic_crop_rect.left - initial_info->pic_crop_rect.right;
311 		inst->conf_win.height = initial_info->pic_height -
312 			initial_info->pic_crop_rect.top - initial_info->pic_crop_rect.bottom;
313 
314 		vpu_fmt = wave5_find_vpu_fmt(inst->src_fmt.pixelformat,
315 					     dec_fmt_list[VPU_FMT_TYPE_CODEC]);
316 		if (!vpu_fmt)
317 			return -EINVAL;
318 
319 		wave5_update_pix_fmt(&inst->src_fmt,
320 				     VPU_FMT_TYPE_CODEC,
321 				     initial_info->pic_width,
322 				     initial_info->pic_height,
323 				     vpu_fmt->v4l2_frmsize);
324 
325 		vpu_fmt = wave5_find_vpu_fmt(inst->dst_fmt.pixelformat,
326 					     dec_fmt_list[VPU_FMT_TYPE_RAW]);
327 		if (!vpu_fmt)
328 			return -EINVAL;
329 
330 		wave5_update_pix_fmt(&inst->dst_fmt,
331 				     VPU_FMT_TYPE_RAW,
332 				     initial_info->pic_width,
333 				     initial_info->pic_height,
334 				     vpu_fmt->v4l2_frmsize);
335 	}
336 
337 	v4l2_event_queue_fh(fh, &vpu_event_src_ch);
338 
339 	return 0;
340 }
341 
342 static void wave5_vpu_dec_finish_decode(struct vpu_instance *inst)
343 {
344 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
345 	struct dec_output_info dec_info;
346 	int ret;
347 	struct vb2_v4l2_buffer *dec_buf = NULL;
348 	struct vb2_v4l2_buffer *disp_buf = NULL;
349 	struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
350 	struct queue_status_info q_status;
351 
352 	dev_dbg(inst->dev->dev, "%s: Fetch output info from firmware.", __func__);
353 
354 	ret = wave5_vpu_dec_get_output_info(inst, &dec_info);
355 	if (ret) {
356 		dev_warn(inst->dev->dev, "%s: could not get output info.", __func__);
357 		v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
358 		return;
359 	}
360 
361 	dev_dbg(inst->dev->dev, "%s: rd_ptr %pad wr_ptr %pad", __func__, &dec_info.rd_ptr,
362 		&dec_info.wr_ptr);
363 	wave5_handle_src_buffer(inst, dec_info.rd_ptr);
364 
365 	dev_dbg(inst->dev->dev, "%s: dec_info dec_idx %i disp_idx %i", __func__,
366 		dec_info.index_frame_decoded, dec_info.index_frame_display);
367 
368 	if (!vb2_is_streaming(dst_vq)) {
369 		dev_dbg(inst->dev->dev, "%s: capture is not streaming..", __func__);
370 		v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
371 		return;
372 	}
373 
374 	/* Remove decoded buffer from the ready queue now that it has been
375 	 * decoded.
376 	 */
377 	if (dec_info.index_frame_decoded >= 0) {
378 		struct vb2_buffer *vb = vb2_get_buffer(dst_vq,
379 						       dec_info.index_frame_decoded);
380 		if (vb) {
381 			dec_buf = to_vb2_v4l2_buffer(vb);
382 			dec_buf->vb2_buf.timestamp = inst->timestamp;
383 		} else {
384 			dev_warn(inst->dev->dev, "%s: invalid decoded frame index %i",
385 				 __func__, dec_info.index_frame_decoded);
386 		}
387 	}
388 
389 	if (dec_info.index_frame_display >= 0) {
390 		disp_buf = v4l2_m2m_dst_buf_remove_by_idx(m2m_ctx, dec_info.index_frame_display);
391 		if (!disp_buf)
392 			dev_warn(inst->dev->dev, "%s: invalid display frame index %i",
393 				 __func__, dec_info.index_frame_display);
394 	}
395 
396 	/* If there is anything to display, do that now */
397 	if (disp_buf) {
398 		struct vpu_dst_buffer *dst_vpu_buf = wave5_to_vpu_dst_buf(disp_buf);
399 
400 		if (inst->dst_fmt.num_planes == 1) {
401 			vb2_set_plane_payload(&disp_buf->vb2_buf, 0,
402 					      inst->dst_fmt.plane_fmt[0].sizeimage);
403 		} else if (inst->dst_fmt.num_planes == 2) {
404 			vb2_set_plane_payload(&disp_buf->vb2_buf, 0,
405 					      inst->dst_fmt.plane_fmt[0].sizeimage);
406 			vb2_set_plane_payload(&disp_buf->vb2_buf, 1,
407 					      inst->dst_fmt.plane_fmt[1].sizeimage);
408 		} else if (inst->dst_fmt.num_planes == 3) {
409 			vb2_set_plane_payload(&disp_buf->vb2_buf, 0,
410 					      inst->dst_fmt.plane_fmt[0].sizeimage);
411 			vb2_set_plane_payload(&disp_buf->vb2_buf, 1,
412 					      inst->dst_fmt.plane_fmt[1].sizeimage);
413 			vb2_set_plane_payload(&disp_buf->vb2_buf, 2,
414 					      inst->dst_fmt.plane_fmt[2].sizeimage);
415 		}
416 
417 		/* TODO implement interlace support */
418 		disp_buf->field = V4L2_FIELD_NONE;
419 		dst_vpu_buf->display = true;
420 		v4l2_m2m_buf_done(disp_buf, VB2_BUF_STATE_DONE);
421 
422 		dev_dbg(inst->dev->dev, "%s: frame_cycle %8u (payload %lu)\n",
423 			__func__, dec_info.frame_cycle,
424 			vb2_get_plane_payload(&disp_buf->vb2_buf, 0));
425 	}
426 
427 	if ((dec_info.index_frame_display == DISPLAY_IDX_FLAG_SEQ_END ||
428 	     dec_info.sequence_changed)) {
429 		unsigned long flags;
430 
431 		spin_lock_irqsave(&inst->state_spinlock, flags);
432 		if (!v4l2_m2m_has_stopped(m2m_ctx)) {
433 			switch_state(inst, VPU_INST_STATE_STOP);
434 
435 			if (dec_info.sequence_changed)
436 				handle_dynamic_resolution_change(inst);
437 			else
438 				send_eos_event(inst);
439 
440 			flag_last_buffer_done(inst);
441 		}
442 		spin_unlock_irqrestore(&inst->state_spinlock, flags);
443 	}
444 
445 	/*
446 	 * During a resolution change and while draining, the firmware may flush
447 	 * the reorder queue regardless of having a matching decoding operation
448 	 * pending. Only terminate the job if there are no more IRQ coming.
449 	 */
450 	wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status);
451 	if (q_status.report_queue_count == 0 &&
452 	    (q_status.instance_queue_count == 0 || dec_info.sequence_changed)) {
453 		dev_dbg(inst->dev->dev, "%s: finishing job.\n", __func__);
454 		pm_runtime_put_autosuspend(inst->dev->dev);
455 		v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
456 	}
457 }
458 
459 static int wave5_vpu_dec_querycap(struct file *file, void *fh, struct v4l2_capability *cap)
460 {
461 	strscpy(cap->driver, VPU_DEC_DRV_NAME, sizeof(cap->driver));
462 	strscpy(cap->card, VPU_DEC_DRV_NAME, sizeof(cap->card));
463 
464 	return 0;
465 }
466 
467 static int wave5_vpu_dec_enum_framesizes(struct file *f, void *fh, struct v4l2_frmsizeenum *fsize)
468 {
469 	const struct vpu_format *vpu_fmt;
470 
471 	if (fsize->index)
472 		return -EINVAL;
473 
474 	vpu_fmt = wave5_find_vpu_fmt(fsize->pixel_format, dec_fmt_list[VPU_FMT_TYPE_CODEC]);
475 	if (!vpu_fmt) {
476 		vpu_fmt = wave5_find_vpu_fmt(fsize->pixel_format, dec_fmt_list[VPU_FMT_TYPE_RAW]);
477 		if (!vpu_fmt)
478 			return -EINVAL;
479 	}
480 
481 	fsize->type = V4L2_FRMSIZE_TYPE_CONTINUOUS;
482 	fsize->stepwise.min_width = vpu_fmt->v4l2_frmsize->min_width;
483 	fsize->stepwise.max_width = vpu_fmt->v4l2_frmsize->max_width;
484 	fsize->stepwise.step_width = W5_DEC_CODEC_STEP_WIDTH;
485 	fsize->stepwise.min_height = vpu_fmt->v4l2_frmsize->min_height;
486 	fsize->stepwise.max_height = vpu_fmt->v4l2_frmsize->max_height;
487 	fsize->stepwise.step_height = W5_DEC_CODEC_STEP_HEIGHT;
488 
489 	return 0;
490 }
491 
492 static int wave5_vpu_dec_enum_fmt_cap(struct file *file, void *fh, struct v4l2_fmtdesc *f)
493 {
494 	const struct vpu_format *vpu_fmt;
495 
496 	vpu_fmt = wave5_find_vpu_fmt_by_idx(f->index, dec_fmt_list[VPU_FMT_TYPE_RAW]);
497 	if (!vpu_fmt)
498 		return -EINVAL;
499 
500 	f->pixelformat = vpu_fmt->v4l2_pix_fmt;
501 	f->flags = 0;
502 
503 	return 0;
504 }
505 
506 static int wave5_vpu_dec_try_fmt_cap(struct file *file, void *fh, struct v4l2_format *f)
507 {
508 	struct vpu_instance *inst = file_to_vpu_inst(file);
509 	struct dec_info *p_dec_info = &inst->codec_info->dec_info;
510 	const struct v4l2_frmsize_stepwise *frmsize;
511 	const struct vpu_format *vpu_fmt;
512 	int width, height;
513 
514 	dev_dbg(inst->dev->dev,
515 		"%s: fourcc: %u width: %u height: %u nm planes: %u colorspace: %u field: %u\n",
516 		__func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height,
517 		f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field);
518 
519 	vpu_fmt = wave5_find_vpu_fmt(f->fmt.pix_mp.pixelformat, dec_fmt_list[VPU_FMT_TYPE_RAW]);
520 	if (!vpu_fmt) {
521 		width = inst->dst_fmt.width;
522 		height = inst->dst_fmt.height;
523 		f->fmt.pix_mp.pixelformat = inst->dst_fmt.pixelformat;
524 		frmsize = &dec_raw_frmsize;
525 	} else {
526 		width = f->fmt.pix_mp.width;
527 		height = f->fmt.pix_mp.height;
528 		f->fmt.pix_mp.pixelformat = vpu_fmt->v4l2_pix_fmt;
529 		frmsize = vpu_fmt->v4l2_frmsize;
530 	}
531 
532 	if (p_dec_info->initial_info_obtained) {
533 		width = inst->dst_fmt.width;
534 		height = inst->dst_fmt.height;
535 	}
536 
537 	wave5_update_pix_fmt(&f->fmt.pix_mp, VPU_FMT_TYPE_RAW,
538 			     width, height, frmsize);
539 	f->fmt.pix_mp.colorspace = inst->colorspace;
540 	f->fmt.pix_mp.ycbcr_enc = inst->ycbcr_enc;
541 	f->fmt.pix_mp.quantization = inst->quantization;
542 	f->fmt.pix_mp.xfer_func = inst->xfer_func;
543 
544 	return 0;
545 }
546 
547 static int wave5_vpu_dec_s_fmt_cap(struct file *file, void *fh, struct v4l2_format *f)
548 {
549 	struct vpu_instance *inst = file_to_vpu_inst(file);
550 	int i, ret;
551 
552 	dev_dbg(inst->dev->dev,
553 		"%s: fourcc: %u width: %u height: %u num_planes: %u colorspace: %u field: %u\n",
554 		__func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height,
555 		f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field);
556 
557 	ret = wave5_vpu_dec_try_fmt_cap(file, fh, f);
558 	if (ret)
559 		return ret;
560 
561 	inst->dst_fmt.width = f->fmt.pix_mp.width;
562 	inst->dst_fmt.height = f->fmt.pix_mp.height;
563 	inst->dst_fmt.pixelformat = f->fmt.pix_mp.pixelformat;
564 	inst->dst_fmt.field = f->fmt.pix_mp.field;
565 	inst->dst_fmt.flags = f->fmt.pix_mp.flags;
566 	inst->dst_fmt.num_planes = f->fmt.pix_mp.num_planes;
567 	for (i = 0; i < inst->dst_fmt.num_planes; i++) {
568 		inst->dst_fmt.plane_fmt[i].bytesperline = f->fmt.pix_mp.plane_fmt[i].bytesperline;
569 		inst->dst_fmt.plane_fmt[i].sizeimage = f->fmt.pix_mp.plane_fmt[i].sizeimage;
570 	}
571 
572 	if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV12 ||
573 	    inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV12M) {
574 		inst->cbcr_interleave = true;
575 		inst->nv21 = false;
576 		inst->output_format = FORMAT_420;
577 	} else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV21 ||
578 		   inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV21M) {
579 		inst->cbcr_interleave = true;
580 		inst->nv21 = true;
581 		inst->output_format = FORMAT_420;
582 	} else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV16 ||
583 		   inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV16M) {
584 		inst->cbcr_interleave = true;
585 		inst->nv21 = false;
586 		inst->output_format = FORMAT_422;
587 	} else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV61 ||
588 		   inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV61M) {
589 		inst->cbcr_interleave = true;
590 		inst->nv21 = true;
591 		inst->output_format = FORMAT_422;
592 	} else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_YUV422P ||
593 		   inst->dst_fmt.pixelformat == V4L2_PIX_FMT_YUV422M) {
594 		inst->cbcr_interleave = false;
595 		inst->nv21 = false;
596 		inst->output_format = FORMAT_422;
597 	} else {
598 		inst->cbcr_interleave = false;
599 		inst->nv21 = false;
600 		inst->output_format = FORMAT_420;
601 	}
602 
603 	return 0;
604 }
605 
606 static int wave5_vpu_dec_g_fmt_cap(struct file *file, void *fh, struct v4l2_format *f)
607 {
608 	struct vpu_instance *inst = file_to_vpu_inst(file);
609 	int i;
610 
611 	f->fmt.pix_mp.width = inst->dst_fmt.width;
612 	f->fmt.pix_mp.height = inst->dst_fmt.height;
613 	f->fmt.pix_mp.pixelformat = inst->dst_fmt.pixelformat;
614 	f->fmt.pix_mp.field = inst->dst_fmt.field;
615 	f->fmt.pix_mp.flags = inst->dst_fmt.flags;
616 	f->fmt.pix_mp.num_planes = inst->dst_fmt.num_planes;
617 	for (i = 0; i < f->fmt.pix_mp.num_planes; i++) {
618 		f->fmt.pix_mp.plane_fmt[i].bytesperline = inst->dst_fmt.plane_fmt[i].bytesperline;
619 		f->fmt.pix_mp.plane_fmt[i].sizeimage = inst->dst_fmt.plane_fmt[i].sizeimage;
620 	}
621 
622 	f->fmt.pix_mp.colorspace = inst->colorspace;
623 	f->fmt.pix_mp.ycbcr_enc = inst->ycbcr_enc;
624 	f->fmt.pix_mp.quantization = inst->quantization;
625 	f->fmt.pix_mp.xfer_func = inst->xfer_func;
626 
627 	return 0;
628 }
629 
630 static int wave5_vpu_dec_enum_fmt_out(struct file *file, void *fh, struct v4l2_fmtdesc *f)
631 {
632 	struct vpu_instance *inst = file_to_vpu_inst(file);
633 	const struct vpu_format *vpu_fmt;
634 
635 	dev_dbg(inst->dev->dev, "%s: index: %u\n", __func__, f->index);
636 
637 	vpu_fmt = wave5_find_vpu_fmt_by_idx(f->index, dec_fmt_list[VPU_FMT_TYPE_CODEC]);
638 	if (!vpu_fmt)
639 		return -EINVAL;
640 
641 	f->pixelformat = vpu_fmt->v4l2_pix_fmt;
642 	f->flags = V4L2_FMT_FLAG_DYN_RESOLUTION | V4L2_FMT_FLAG_COMPRESSED;
643 
644 	return 0;
645 }
646 
647 static int wave5_vpu_dec_try_fmt_out(struct file *file, void *fh, struct v4l2_format *f)
648 {
649 	struct vpu_instance *inst = file_to_vpu_inst(file);
650 	const struct v4l2_frmsize_stepwise *frmsize;
651 	const struct vpu_format *vpu_fmt;
652 	int width, height;
653 
654 	dev_dbg(inst->dev->dev,
655 		"%s: fourcc: %u width: %u height: %u num_planes: %u colorspace: %u field: %u\n",
656 		__func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height,
657 		f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field);
658 
659 	vpu_fmt = wave5_find_vpu_fmt(f->fmt.pix_mp.pixelformat, dec_fmt_list[VPU_FMT_TYPE_CODEC]);
660 	if (!vpu_fmt) {
661 		width = inst->src_fmt.width;
662 		height = inst->src_fmt.height;
663 		f->fmt.pix_mp.pixelformat = inst->src_fmt.pixelformat;
664 		frmsize = &dec_hevc_frmsize;
665 	} else {
666 		width = f->fmt.pix_mp.width;
667 		height = f->fmt.pix_mp.height;
668 		f->fmt.pix_mp.pixelformat = vpu_fmt->v4l2_pix_fmt;
669 		frmsize = vpu_fmt->v4l2_frmsize;
670 	}
671 
672 	wave5_update_pix_fmt(&f->fmt.pix_mp, VPU_FMT_TYPE_CODEC,
673 			     width, height, frmsize);
674 
675 	return 0;
676 }
677 
678 static int wave5_vpu_dec_s_fmt_out(struct file *file, void *fh, struct v4l2_format *f)
679 {
680 	struct vpu_instance *inst = file_to_vpu_inst(file);
681 	const struct vpu_format *vpu_fmt;
682 	int i, ret;
683 
684 	dev_dbg(inst->dev->dev,
685 		"%s: fourcc: %u width: %u height: %u num_planes: %u field: %u\n",
686 		__func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height,
687 		f->fmt.pix_mp.num_planes, f->fmt.pix_mp.field);
688 
689 	ret = wave5_vpu_dec_try_fmt_out(file, fh, f);
690 	if (ret)
691 		return ret;
692 
693 	inst->std = wave5_to_vpu_std(f->fmt.pix_mp.pixelformat, inst->type);
694 	if (inst->std == STD_UNKNOWN) {
695 		dev_warn(inst->dev->dev, "unsupported pixelformat: %.4s\n",
696 			 (char *)&f->fmt.pix_mp.pixelformat);
697 		return -EINVAL;
698 	}
699 
700 	inst->src_fmt.width = f->fmt.pix_mp.width;
701 	inst->src_fmt.height = f->fmt.pix_mp.height;
702 	inst->src_fmt.pixelformat = f->fmt.pix_mp.pixelformat;
703 	inst->src_fmt.field = f->fmt.pix_mp.field;
704 	inst->src_fmt.flags = f->fmt.pix_mp.flags;
705 	inst->src_fmt.num_planes = f->fmt.pix_mp.num_planes;
706 	for (i = 0; i < inst->src_fmt.num_planes; i++) {
707 		inst->src_fmt.plane_fmt[i].bytesperline = f->fmt.pix_mp.plane_fmt[i].bytesperline;
708 		inst->src_fmt.plane_fmt[i].sizeimage = f->fmt.pix_mp.plane_fmt[i].sizeimage;
709 	}
710 
711 	inst->colorspace = f->fmt.pix_mp.colorspace;
712 	inst->ycbcr_enc = f->fmt.pix_mp.ycbcr_enc;
713 	inst->quantization = f->fmt.pix_mp.quantization;
714 	inst->xfer_func = f->fmt.pix_mp.xfer_func;
715 
716 	vpu_fmt = wave5_find_vpu_fmt(inst->dst_fmt.pixelformat, dec_fmt_list[VPU_FMT_TYPE_RAW]);
717 	if (!vpu_fmt)
718 		return -EINVAL;
719 
720 	wave5_update_pix_fmt(&inst->dst_fmt, VPU_FMT_TYPE_RAW,
721 			     f->fmt.pix_mp.width, f->fmt.pix_mp.height,
722 			     vpu_fmt->v4l2_frmsize);
723 
724 	return 0;
725 }
726 
727 static int wave5_vpu_dec_g_selection(struct file *file, void *fh, struct v4l2_selection *s)
728 {
729 	struct vpu_instance *inst = file_to_vpu_inst(file);
730 
731 	dev_dbg(inst->dev->dev, "%s: type: %u | target: %u\n", __func__, s->type, s->target);
732 
733 	if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
734 		return -EINVAL;
735 	switch (s->target) {
736 	case V4L2_SEL_TGT_COMPOSE_BOUNDS:
737 	case V4L2_SEL_TGT_COMPOSE_PADDED:
738 		s->r.left = 0;
739 		s->r.top = 0;
740 		s->r.width = inst->dst_fmt.width;
741 		s->r.height = inst->dst_fmt.height;
742 		break;
743 	case V4L2_SEL_TGT_COMPOSE:
744 	case V4L2_SEL_TGT_COMPOSE_DEFAULT:
745 		s->r.left = 0;
746 		s->r.top = 0;
747 		if (inst->state > VPU_INST_STATE_OPEN) {
748 			s->r = inst->conf_win;
749 		} else {
750 			s->r.width = inst->src_fmt.width;
751 			s->r.height = inst->src_fmt.height;
752 		}
753 		break;
754 	default:
755 		return -EINVAL;
756 	}
757 
758 	return 0;
759 }
760 
761 static int wave5_vpu_dec_s_selection(struct file *file, void *fh, struct v4l2_selection *s)
762 {
763 	struct vpu_instance *inst = file_to_vpu_inst(file);
764 
765 	if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
766 		return -EINVAL;
767 
768 	if (s->target != V4L2_SEL_TGT_COMPOSE)
769 		return -EINVAL;
770 
771 	dev_dbg(inst->dev->dev, "V4L2_SEL_TGT_COMPOSE w: %u h: %u\n",
772 		s->r.width, s->r.height);
773 
774 	s->r.left = 0;
775 	s->r.top = 0;
776 	s->r.width = inst->dst_fmt.width;
777 	s->r.height = inst->dst_fmt.height;
778 
779 	return 0;
780 }
781 
782 static int wave5_vpu_dec_stop(struct vpu_instance *inst)
783 {
784 	int ret = 0;
785 	unsigned long flags;
786 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
787 
788 	spin_lock_irqsave(&inst->state_spinlock, flags);
789 
790 	if (m2m_ctx->is_draining) {
791 		ret = -EBUSY;
792 		goto unlock_and_return;
793 	}
794 
795 	if (inst->state != VPU_INST_STATE_NONE) {
796 		/*
797 		 * Temporarily release the state_spinlock so that subsequent
798 		 * calls do not block on a mutex while inside this spinlock.
799 		 */
800 		spin_unlock_irqrestore(&inst->state_spinlock, flags);
801 		ret = wave5_vpu_dec_set_eos_on_firmware(inst);
802 		if (ret)
803 			return ret;
804 
805 		spin_lock_irqsave(&inst->state_spinlock, flags);
806 		/*
807 		 * TODO eliminate this check by using a separate check for
808 		 * draining triggered by a resolution change.
809 		 */
810 		if (m2m_ctx->is_draining) {
811 			ret = -EBUSY;
812 			goto unlock_and_return;
813 		}
814 	}
815 
816 	/*
817 	 * Used to remember the EOS state after the streamoff/on transition on
818 	 * the capture queue.
819 	 */
820 	inst->eos = true;
821 
822 	if (m2m_ctx->has_stopped)
823 		goto unlock_and_return;
824 
825 	m2m_ctx->last_src_buf = v4l2_m2m_last_src_buf(m2m_ctx);
826 	m2m_ctx->is_draining = true;
827 
828 	/*
829 	 * Deferred to device run in case it wasn't in the ring buffer
830 	 * yet. In other case, we have to send the EOS signal to the
831 	 * firmware so that any pending PIC_RUN ends without new
832 	 * bitstream buffer.
833 	 */
834 	if (m2m_ctx->last_src_buf)
835 		goto unlock_and_return;
836 
837 	if (inst->state == VPU_INST_STATE_NONE) {
838 		send_eos_event(inst);
839 		flag_last_buffer_done(inst);
840 	}
841 
842 unlock_and_return:
843 	spin_unlock_irqrestore(&inst->state_spinlock, flags);
844 	return ret;
845 }
846 
847 static int wave5_vpu_dec_start(struct vpu_instance *inst)
848 {
849 	int ret = 0;
850 	unsigned long flags;
851 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
852 	struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
853 
854 	spin_lock_irqsave(&inst->state_spinlock, flags);
855 
856 	if (m2m_ctx->is_draining) {
857 		ret = -EBUSY;
858 		goto unlock_and_return;
859 	}
860 
861 	if (m2m_ctx->has_stopped)
862 		m2m_ctx->has_stopped = false;
863 
864 	vb2_clear_last_buffer_dequeued(dst_vq);
865 	inst->eos = false;
866 
867 unlock_and_return:
868 	spin_unlock_irqrestore(&inst->state_spinlock, flags);
869 	return ret;
870 }
871 
872 static int wave5_vpu_dec_decoder_cmd(struct file *file, void *fh, struct v4l2_decoder_cmd *dc)
873 {
874 	struct vpu_instance *inst = file_to_vpu_inst(file);
875 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
876 	int ret;
877 
878 	dev_dbg(inst->dev->dev, "decoder command: %u\n", dc->cmd);
879 
880 	ret = v4l2_m2m_ioctl_try_decoder_cmd(file, fh, dc);
881 	if (ret)
882 		return ret;
883 
884 	switch (dc->cmd) {
885 	case V4L2_DEC_CMD_STOP:
886 		ret = wave5_vpu_dec_stop(inst);
887 		/* Just in case we don't have anything to decode anymore */
888 		v4l2_m2m_try_schedule(m2m_ctx);
889 		break;
890 	case V4L2_DEC_CMD_START:
891 		ret = wave5_vpu_dec_start(inst);
892 		break;
893 	default:
894 		ret = -EINVAL;
895 	}
896 
897 	return ret;
898 }
899 
900 static const struct v4l2_ioctl_ops wave5_vpu_dec_ioctl_ops = {
901 	.vidioc_querycap = wave5_vpu_dec_querycap,
902 	.vidioc_enum_framesizes = wave5_vpu_dec_enum_framesizes,
903 
904 	.vidioc_enum_fmt_vid_cap	= wave5_vpu_dec_enum_fmt_cap,
905 	.vidioc_s_fmt_vid_cap_mplane = wave5_vpu_dec_s_fmt_cap,
906 	.vidioc_g_fmt_vid_cap_mplane = wave5_vpu_dec_g_fmt_cap,
907 	.vidioc_try_fmt_vid_cap_mplane = wave5_vpu_dec_try_fmt_cap,
908 
909 	.vidioc_enum_fmt_vid_out	= wave5_vpu_dec_enum_fmt_out,
910 	.vidioc_s_fmt_vid_out_mplane = wave5_vpu_dec_s_fmt_out,
911 	.vidioc_g_fmt_vid_out_mplane = wave5_vpu_g_fmt_out,
912 	.vidioc_try_fmt_vid_out_mplane = wave5_vpu_dec_try_fmt_out,
913 
914 	.vidioc_g_selection = wave5_vpu_dec_g_selection,
915 	.vidioc_s_selection = wave5_vpu_dec_s_selection,
916 
917 	.vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs,
918 	/*
919 	 * Firmware does not support CREATE_BUFS for CAPTURE queue. Since
920 	 * there is no immediate use-case for supporting CREATE_BUFS on
921 	 * just the OUTPUT queue, disable CREATE_BUFS altogether.
922 	 */
923 	.vidioc_querybuf = v4l2_m2m_ioctl_querybuf,
924 	.vidioc_prepare_buf = v4l2_m2m_ioctl_prepare_buf,
925 	.vidioc_qbuf = v4l2_m2m_ioctl_qbuf,
926 	.vidioc_expbuf = v4l2_m2m_ioctl_expbuf,
927 	.vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf,
928 	.vidioc_streamon = v4l2_m2m_ioctl_streamon,
929 	.vidioc_streamoff = v4l2_m2m_ioctl_streamoff,
930 
931 	.vidioc_try_decoder_cmd = v4l2_m2m_ioctl_try_decoder_cmd,
932 	.vidioc_decoder_cmd = wave5_vpu_dec_decoder_cmd,
933 
934 	.vidioc_subscribe_event = wave5_vpu_subscribe_event,
935 	.vidioc_unsubscribe_event = v4l2_event_unsubscribe,
936 };
937 
938 static int wave5_vpu_dec_queue_setup(struct vb2_queue *q, unsigned int *num_buffers,
939 				     unsigned int *num_planes, unsigned int sizes[],
940 				     struct device *alloc_devs[])
941 {
942 	struct vpu_instance *inst = vb2_get_drv_priv(q);
943 	struct v4l2_pix_format_mplane inst_format =
944 		(q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) ? inst->src_fmt : inst->dst_fmt;
945 	unsigned int i;
946 
947 	dev_dbg(inst->dev->dev, "%s: num_buffers: %u | num_planes: %u | type: %u\n", __func__,
948 		*num_buffers, *num_planes, q->type);
949 
950 	*num_planes = inst_format.num_planes;
951 
952 	if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
953 		sizes[0] = inst_format.plane_fmt[0].sizeimage;
954 		dev_dbg(inst->dev->dev, "%s: size[0]: %u\n", __func__, sizes[0]);
955 	} else if (q->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
956 		if (*num_buffers < inst->fbc_buf_count)
957 			*num_buffers = inst->fbc_buf_count;
958 
959 		for (i = 0; i < *num_planes; i++) {
960 			sizes[i] = inst_format.plane_fmt[i].sizeimage;
961 			dev_dbg(inst->dev->dev, "%s: size[%u]: %u\n", __func__, i, sizes[i]);
962 		}
963 	}
964 
965 	return 0;
966 }
967 
968 static int wave5_prepare_fb(struct vpu_instance *inst)
969 {
970 	int linear_num;
971 	int non_linear_num;
972 	int fb_stride = 0, fb_height = 0;
973 	int luma_size, chroma_size;
974 	int ret, i;
975 	struct v4l2_m2m_buffer *buf, *n;
976 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
977 	u32 bitdepth = inst->codec_info->dec_info.initial_info.luma_bitdepth;
978 
979 	switch (bitdepth) {
980 	case 8:
981 		break;
982 	case 10:
983 		if (inst->std == W_HEVC_DEC &&
984 		    inst->dev->attr.support_hevc10bit_dec)
985 			break;
986 
987 		fallthrough;
988 	default:
989 		dev_err(inst->dev->dev, "no support for %d bit depth\n", bitdepth);
990 
991 		return -EINVAL;
992 	}
993 
994 	linear_num = v4l2_m2m_num_dst_bufs_ready(m2m_ctx);
995 	non_linear_num = inst->fbc_buf_count;
996 
997 	for (i = 0; i < non_linear_num; i++) {
998 		struct frame_buffer *frame = &inst->frame_buf[i];
999 		struct vpu_buf *vframe = &inst->frame_vbuf[i];
1000 
1001 		fb_stride = ALIGN(inst->dst_fmt.width * bitdepth / 8, 32);
1002 		fb_height = ALIGN(inst->dst_fmt.height, 32);
1003 		luma_size = fb_stride * fb_height;
1004 
1005 		chroma_size = ALIGN(fb_stride / 2, 16) * fb_height;
1006 
1007 		if (vframe->size == (luma_size + chroma_size))
1008 			continue;
1009 
1010 		if (vframe->size)
1011 			wave5_vpu_dec_reset_framebuffer(inst, i);
1012 
1013 		vframe->size = luma_size + chroma_size;
1014 		ret = wave5_vdi_allocate_dma_memory(inst->dev, vframe);
1015 		if (ret) {
1016 			dev_dbg(inst->dev->dev,
1017 				"%s: Allocating FBC buf of size %zu, fail: %d\n",
1018 				__func__, vframe->size, ret);
1019 			return ret;
1020 		}
1021 
1022 		frame->buf_y = vframe->daddr;
1023 		frame->buf_cb = vframe->daddr + luma_size;
1024 		frame->buf_cr = (dma_addr_t)-1;
1025 		frame->size = vframe->size;
1026 		frame->width = inst->src_fmt.width;
1027 		frame->stride = fb_stride;
1028 		frame->map_type = COMPRESSED_FRAME_MAP;
1029 		frame->update_fb_info = true;
1030 	}
1031 	/* In case the count has reduced, clean up leftover framebuffer memory */
1032 	for (i = non_linear_num; i < MAX_REG_FRAME; i++) {
1033 		ret = wave5_vpu_dec_reset_framebuffer(inst, i);
1034 		if (ret)
1035 			break;
1036 	}
1037 
1038 	for (i = 0; i < linear_num; i++) {
1039 		struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1040 		struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
1041 		struct vb2_buffer *vb = vb2_get_buffer(dst_vq, i);
1042 		struct frame_buffer *frame = &inst->frame_buf[non_linear_num + i];
1043 		dma_addr_t buf_addr_y = 0, buf_addr_cb = 0, buf_addr_cr = 0;
1044 		u32 buf_size = 0;
1045 		u32 fb_stride = inst->dst_fmt.width;
1046 		u32 luma_size = fb_stride * inst->dst_fmt.height;
1047 		u32 chroma_size;
1048 
1049 		if (inst->output_format == FORMAT_422)
1050 			chroma_size = fb_stride * inst->dst_fmt.height / 2;
1051 		else
1052 			chroma_size = fb_stride * inst->dst_fmt.height / 4;
1053 
1054 		if (inst->dst_fmt.num_planes == 1) {
1055 			buf_size = vb2_plane_size(vb, 0);
1056 			buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0);
1057 			buf_addr_cb = buf_addr_y + luma_size;
1058 			buf_addr_cr = buf_addr_cb + chroma_size;
1059 		} else if (inst->dst_fmt.num_planes == 2) {
1060 			buf_size = vb2_plane_size(vb, 0) +
1061 				vb2_plane_size(vb, 1);
1062 			buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0);
1063 			buf_addr_cb = vb2_dma_contig_plane_dma_addr(vb, 1);
1064 			buf_addr_cr = buf_addr_cb + chroma_size;
1065 		} else if (inst->dst_fmt.num_planes == 3) {
1066 			buf_size = vb2_plane_size(vb, 0) +
1067 				vb2_plane_size(vb, 1) +
1068 				vb2_plane_size(vb, 2);
1069 			buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0);
1070 			buf_addr_cb = vb2_dma_contig_plane_dma_addr(vb, 1);
1071 			buf_addr_cr = vb2_dma_contig_plane_dma_addr(vb, 2);
1072 		}
1073 
1074 		frame->buf_y = buf_addr_y;
1075 		frame->buf_cb = buf_addr_cb;
1076 		frame->buf_cr = buf_addr_cr;
1077 		frame->size = buf_size;
1078 		frame->width = inst->src_fmt.width;
1079 		frame->stride = fb_stride;
1080 		frame->map_type = LINEAR_FRAME_MAP;
1081 		frame->update_fb_info = true;
1082 	}
1083 
1084 	ret = wave5_vpu_dec_register_frame_buffer_ex(inst, non_linear_num, linear_num,
1085 						     fb_stride, inst->dst_fmt.height);
1086 	if (ret) {
1087 		dev_dbg(inst->dev->dev, "%s: vpu_dec_register_frame_buffer_ex fail: %d",
1088 			__func__, ret);
1089 		return ret;
1090 	}
1091 
1092 	/*
1093 	 * Mark all frame buffers as out of display, to avoid using them before
1094 	 * the application have them queued.
1095 	 */
1096 	for (i = 0; i < v4l2_m2m_num_dst_bufs_ready(m2m_ctx); i++) {
1097 		ret = wave5_vpu_dec_set_disp_flag(inst, i);
1098 		if (ret) {
1099 			dev_dbg(inst->dev->dev,
1100 				"%s: Setting display flag of buf index: %u, fail: %d\n",
1101 				__func__, i, ret);
1102 		}
1103 	}
1104 
1105 	v4l2_m2m_for_each_dst_buf_safe(m2m_ctx, buf, n) {
1106 		struct vb2_v4l2_buffer *vbuf = &buf->vb;
1107 
1108 		ret = wave5_vpu_dec_clr_disp_flag(inst, vbuf->vb2_buf.index);
1109 		if (ret)
1110 			dev_dbg(inst->dev->dev,
1111 				"%s: Clearing display flag of buf index: %u, fail: %d\n",
1112 				__func__, i, ret);
1113 	}
1114 
1115 	return 0;
1116 }
1117 
1118 static int write_to_ringbuffer(struct vpu_instance *inst, void *buffer, size_t buffer_size,
1119 			       struct vpu_buf *ring_buffer, dma_addr_t wr_ptr)
1120 {
1121 	size_t size;
1122 	size_t offset = wr_ptr - ring_buffer->daddr;
1123 	int ret;
1124 
1125 	if (wr_ptr + buffer_size > ring_buffer->daddr + ring_buffer->size) {
1126 		size = ring_buffer->daddr + ring_buffer->size - wr_ptr;
1127 		ret = wave5_vdi_write_memory(inst->dev, ring_buffer, offset, (u8 *)buffer, size);
1128 		if (ret < 0)
1129 			return ret;
1130 
1131 		ret = wave5_vdi_write_memory(inst->dev, ring_buffer, 0, (u8 *)buffer + size,
1132 					     buffer_size - size);
1133 		if (ret < 0)
1134 			return ret;
1135 	} else {
1136 		ret = wave5_vdi_write_memory(inst->dev, ring_buffer, offset, (u8 *)buffer,
1137 					     buffer_size);
1138 		if (ret < 0)
1139 			return ret;
1140 	}
1141 
1142 	return 0;
1143 }
1144 
1145 static int fill_ringbuffer(struct vpu_instance *inst)
1146 {
1147 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1148 	struct v4l2_m2m_buffer *buf, *n;
1149 	int ret;
1150 
1151 	if (m2m_ctx->last_src_buf)  {
1152 		struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(m2m_ctx->last_src_buf);
1153 
1154 		if (vpu_buf->consumed) {
1155 			dev_dbg(inst->dev->dev, "last src buffer already written\n");
1156 			return 0;
1157 		}
1158 	}
1159 
1160 	v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buf, n) {
1161 		struct vb2_v4l2_buffer *vbuf = &buf->vb;
1162 		struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(vbuf);
1163 		struct vpu_buf *ring_buffer = &inst->bitstream_vbuf;
1164 		size_t src_size = vb2_get_plane_payload(&vbuf->vb2_buf, 0);
1165 		void *src_buf = vb2_plane_vaddr(&vbuf->vb2_buf, 0);
1166 		dma_addr_t rd_ptr = 0;
1167 		dma_addr_t wr_ptr = 0;
1168 		size_t remain_size = 0;
1169 
1170 		if (vpu_buf->consumed) {
1171 			dev_dbg(inst->dev->dev, "already copied src buf (%u) to the ring buffer\n",
1172 				vbuf->vb2_buf.index);
1173 			continue;
1174 		}
1175 
1176 		if (!src_buf) {
1177 			dev_dbg(inst->dev->dev,
1178 				"%s: Acquiring kernel pointer to src buf (%u), fail\n",
1179 				__func__, vbuf->vb2_buf.index);
1180 			break;
1181 		}
1182 
1183 		ret = wave5_vpu_dec_get_bitstream_buffer(inst, &rd_ptr, &wr_ptr, &remain_size);
1184 		if (ret) {
1185 			/* Unable to acquire the mutex */
1186 			dev_err(inst->dev->dev, "Getting the bitstream buffer, fail: %d\n",
1187 				ret);
1188 			return ret;
1189 		}
1190 
1191 		dev_dbg(inst->dev->dev, "%s: rd_ptr %pad wr_ptr %pad", __func__, &rd_ptr, &wr_ptr);
1192 
1193 		if (remain_size < src_size) {
1194 			dev_dbg(inst->dev->dev,
1195 				"%s: remaining size: %zu < source size: %zu for src buf (%u)\n",
1196 				__func__, remain_size, src_size, vbuf->vb2_buf.index);
1197 			break;
1198 		}
1199 
1200 		ret = write_to_ringbuffer(inst, src_buf, src_size, ring_buffer, wr_ptr);
1201 		if (ret) {
1202 			dev_err(inst->dev->dev, "Write src buf (%u) to ring buffer, fail: %d\n",
1203 				vbuf->vb2_buf.index, ret);
1204 			return ret;
1205 		}
1206 
1207 		ret = wave5_vpu_dec_update_bitstream_buffer(inst, src_size);
1208 		if (ret) {
1209 			dev_dbg(inst->dev->dev,
1210 				"update_bitstream_buffer fail: %d for src buf (%u)\n",
1211 				ret, vbuf->vb2_buf.index);
1212 			break;
1213 		}
1214 
1215 		vpu_buf->consumed = true;
1216 
1217 		/* Don't write buffers passed the last one while draining. */
1218 		if (v4l2_m2m_is_last_draining_src_buf(m2m_ctx, vbuf)) {
1219 			dev_dbg(inst->dev->dev, "last src buffer written to the ring buffer\n");
1220 			break;
1221 		}
1222 	}
1223 
1224 	return 0;
1225 }
1226 
1227 static void wave5_vpu_dec_buf_queue_src(struct vb2_buffer *vb)
1228 {
1229 	struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue);
1230 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1231 	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
1232 	struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(vbuf);
1233 
1234 	vpu_buf->consumed = false;
1235 	vbuf->sequence = inst->queued_src_buf_num++;
1236 
1237 	v4l2_m2m_buf_queue(m2m_ctx, vbuf);
1238 }
1239 
1240 static void wave5_vpu_dec_buf_queue_dst(struct vb2_buffer *vb)
1241 {
1242 	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
1243 	struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue);
1244 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1245 
1246 	vbuf->sequence = inst->queued_dst_buf_num++;
1247 
1248 	if (inst->state == VPU_INST_STATE_PIC_RUN) {
1249 		struct vpu_dst_buffer *vpu_buf = wave5_to_vpu_dst_buf(vbuf);
1250 		int ret;
1251 
1252 		/*
1253 		 * The buffer is already registered just clear the display flag
1254 		 * to let the firmware know it can be used.
1255 		 */
1256 		vpu_buf->display = false;
1257 		ret = wave5_vpu_dec_clr_disp_flag(inst, vb->index);
1258 		if (ret) {
1259 			dev_dbg(inst->dev->dev,
1260 				"%s: Clearing the display flag of buffer index: %u, fail: %d\n",
1261 				__func__, vb->index, ret);
1262 		}
1263 	}
1264 
1265 	if (vb2_is_streaming(vb->vb2_queue) && v4l2_m2m_dst_buf_is_last(m2m_ctx)) {
1266 		unsigned int i;
1267 
1268 		for (i = 0; i < vb->num_planes; i++)
1269 			vb2_set_plane_payload(vb, i, 0);
1270 
1271 		vbuf->field = V4L2_FIELD_NONE;
1272 
1273 		send_eos_event(inst);
1274 		v4l2_m2m_last_buffer_done(m2m_ctx, vbuf);
1275 	} else {
1276 		v4l2_m2m_buf_queue(m2m_ctx, vbuf);
1277 	}
1278 }
1279 
1280 static void wave5_vpu_dec_buf_queue(struct vb2_buffer *vb)
1281 {
1282 	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
1283 	struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue);
1284 
1285 	dev_dbg(inst->dev->dev, "%s: type: %4u index: %4u size: ([0]=%4lu, [1]=%4lu, [2]=%4lu)\n",
1286 		__func__, vb->type, vb->index, vb2_plane_size(&vbuf->vb2_buf, 0),
1287 		vb2_plane_size(&vbuf->vb2_buf, 1), vb2_plane_size(&vbuf->vb2_buf, 2));
1288 
1289 	if (vb->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
1290 		wave5_vpu_dec_buf_queue_src(vb);
1291 	else if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE)
1292 		wave5_vpu_dec_buf_queue_dst(vb);
1293 }
1294 
1295 static int wave5_vpu_dec_allocate_ring_buffer(struct vpu_instance *inst)
1296 {
1297 	int ret;
1298 	struct vpu_buf *ring_buffer = &inst->bitstream_vbuf;
1299 
1300 	ring_buffer->size = ALIGN(inst->src_fmt.plane_fmt[0].sizeimage, 1024) * 4;
1301 	ret = wave5_vdi_allocate_dma_memory(inst->dev, ring_buffer);
1302 	if (ret) {
1303 		dev_dbg(inst->dev->dev, "%s: allocate ring buffer of size %zu fail: %d\n",
1304 			__func__, ring_buffer->size, ret);
1305 		return ret;
1306 	}
1307 
1308 	inst->last_rd_ptr = ring_buffer->daddr;
1309 
1310 	return 0;
1311 }
1312 
1313 static int wave5_vpu_dec_start_streaming(struct vb2_queue *q, unsigned int count)
1314 {
1315 	struct vpu_instance *inst = vb2_get_drv_priv(q);
1316 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1317 	int ret = 0;
1318 
1319 	dev_dbg(inst->dev->dev, "%s: type: %u\n", __func__, q->type);
1320 	pm_runtime_resume_and_get(inst->dev->dev);
1321 
1322 	v4l2_m2m_update_start_streaming_state(m2m_ctx, q);
1323 
1324 	if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE && inst->state == VPU_INST_STATE_NONE) {
1325 		struct dec_open_param open_param;
1326 
1327 		memset(&open_param, 0, sizeof(struct dec_open_param));
1328 
1329 		ret = wave5_vpu_dec_allocate_ring_buffer(inst);
1330 		if (ret)
1331 			goto return_buffers;
1332 
1333 		open_param.bitstream_buffer = inst->bitstream_vbuf.daddr;
1334 		open_param.bitstream_buffer_size = inst->bitstream_vbuf.size;
1335 
1336 		ret = wave5_vpu_dec_open(inst, &open_param);
1337 		if (ret) {
1338 			dev_dbg(inst->dev->dev, "%s: decoder opening, fail: %d\n",
1339 				__func__, ret);
1340 			goto free_bitstream_vbuf;
1341 		}
1342 
1343 		ret = switch_state(inst, VPU_INST_STATE_OPEN);
1344 		if (ret)
1345 			goto free_bitstream_vbuf;
1346 	} else if (q->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
1347 		struct dec_initial_info *initial_info =
1348 			&inst->codec_info->dec_info.initial_info;
1349 
1350 		if (inst->state == VPU_INST_STATE_STOP)
1351 			ret = switch_state(inst, VPU_INST_STATE_INIT_SEQ);
1352 		if (ret)
1353 			goto return_buffers;
1354 
1355 		if (inst->state == VPU_INST_STATE_INIT_SEQ &&
1356 		    inst->dev->product_code == WAVE521C_CODE) {
1357 			if (initial_info->luma_bitdepth != 8) {
1358 				dev_info(inst->dev->dev, "%s: no support for %d bit depth",
1359 					 __func__, initial_info->luma_bitdepth);
1360 				ret = -EINVAL;
1361 				goto return_buffers;
1362 			}
1363 		}
1364 
1365 	}
1366 	pm_runtime_put_autosuspend(inst->dev->dev);
1367 	return ret;
1368 
1369 free_bitstream_vbuf:
1370 	wave5_vdi_free_dma_memory(inst->dev, &inst->bitstream_vbuf);
1371 return_buffers:
1372 	wave5_return_bufs(q, VB2_BUF_STATE_QUEUED);
1373 	pm_runtime_put_autosuspend(inst->dev->dev);
1374 	return ret;
1375 }
1376 
1377 static int streamoff_output(struct vb2_queue *q)
1378 {
1379 	struct vpu_instance *inst = vb2_get_drv_priv(q);
1380 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1381 	struct vb2_v4l2_buffer *buf;
1382 	int ret;
1383 	dma_addr_t new_rd_ptr;
1384 	struct dec_output_info dec_info;
1385 	unsigned int i;
1386 
1387 	for (i = 0; i < v4l2_m2m_num_dst_bufs_ready(m2m_ctx); i++) {
1388 		ret = wave5_vpu_dec_set_disp_flag(inst, i);
1389 		if (ret)
1390 			dev_dbg(inst->dev->dev,
1391 				"%s: Setting display flag of buf index: %u, fail: %d\n",
1392 				__func__, i, ret);
1393 	}
1394 
1395 	while ((buf = v4l2_m2m_src_buf_remove(m2m_ctx))) {
1396 		dev_dbg(inst->dev->dev, "%s: (Multiplanar) buf type %4u | index %4u\n",
1397 			__func__, buf->vb2_buf.type, buf->vb2_buf.index);
1398 		v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR);
1399 	}
1400 
1401 	while (wave5_vpu_dec_get_output_info(inst, &dec_info) == 0) {
1402 		if (dec_info.index_frame_display >= 0)
1403 			wave5_vpu_dec_set_disp_flag(inst, dec_info.index_frame_display);
1404 	}
1405 
1406 	ret = wave5_vpu_flush_instance(inst);
1407 	if (ret)
1408 		return ret;
1409 
1410 	/* Reset the ring buffer information */
1411 	new_rd_ptr = wave5_vpu_dec_get_rd_ptr(inst);
1412 	inst->last_rd_ptr = new_rd_ptr;
1413 	inst->codec_info->dec_info.stream_rd_ptr = new_rd_ptr;
1414 	inst->codec_info->dec_info.stream_wr_ptr = new_rd_ptr;
1415 
1416 	if (v4l2_m2m_has_stopped(m2m_ctx))
1417 		send_eos_event(inst);
1418 
1419 	/* streamoff on output cancels any draining operation */
1420 	inst->eos = false;
1421 
1422 	return 0;
1423 }
1424 
1425 static int streamoff_capture(struct vb2_queue *q)
1426 {
1427 	struct vpu_instance *inst = vb2_get_drv_priv(q);
1428 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1429 	struct vb2_v4l2_buffer *buf;
1430 	unsigned int i;
1431 	int ret = 0;
1432 
1433 	for (i = 0; i < v4l2_m2m_num_dst_bufs_ready(m2m_ctx); i++) {
1434 		ret = wave5_vpu_dec_set_disp_flag(inst, i);
1435 		if (ret)
1436 			dev_dbg(inst->dev->dev,
1437 				"%s: Setting display flag of buf index: %u, fail: %d\n",
1438 				__func__, i, ret);
1439 	}
1440 
1441 	while ((buf = v4l2_m2m_dst_buf_remove(m2m_ctx))) {
1442 		u32 plane;
1443 
1444 		dev_dbg(inst->dev->dev, "%s: buf type %4u | index %4u\n",
1445 			__func__, buf->vb2_buf.type, buf->vb2_buf.index);
1446 
1447 		for (plane = 0; plane < inst->dst_fmt.num_planes; plane++)
1448 			vb2_set_plane_payload(&buf->vb2_buf, plane, 0);
1449 
1450 		v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR);
1451 	}
1452 
1453 	if (inst->needs_reallocation) {
1454 		wave5_vpu_dec_give_command(inst, DEC_RESET_FRAMEBUF_INFO, NULL);
1455 		inst->needs_reallocation = false;
1456 	}
1457 
1458 	if (v4l2_m2m_has_stopped(m2m_ctx)) {
1459 		ret = switch_state(inst, VPU_INST_STATE_INIT_SEQ);
1460 		if (ret)
1461 			return ret;
1462 	}
1463 
1464 	return 0;
1465 }
1466 
1467 static void wave5_vpu_dec_stop_streaming(struct vb2_queue *q)
1468 {
1469 	struct vpu_instance *inst = vb2_get_drv_priv(q);
1470 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1471 	bool check_cmd = TRUE;
1472 
1473 	dev_dbg(inst->dev->dev, "%s: type: %u\n", __func__, q->type);
1474 	pm_runtime_resume_and_get(inst->dev->dev);
1475 
1476 	while (check_cmd) {
1477 		struct queue_status_info q_status;
1478 		struct dec_output_info dec_output_info;
1479 
1480 		wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status);
1481 
1482 		if (q_status.report_queue_count == 0)
1483 			break;
1484 
1485 		if (wave5_vpu_wait_interrupt(inst, VPU_DEC_TIMEOUT) < 0)
1486 			break;
1487 
1488 		if (wave5_vpu_dec_get_output_info(inst, &dec_output_info))
1489 			dev_dbg(inst->dev->dev, "there is no output info\n");
1490 	}
1491 
1492 	v4l2_m2m_update_stop_streaming_state(m2m_ctx, q);
1493 
1494 	if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
1495 		streamoff_output(q);
1496 	else
1497 		streamoff_capture(q);
1498 
1499 	pm_runtime_put_autosuspend(inst->dev->dev);
1500 }
1501 
1502 static const struct vb2_ops wave5_vpu_dec_vb2_ops = {
1503 	.queue_setup = wave5_vpu_dec_queue_setup,
1504 	.buf_queue = wave5_vpu_dec_buf_queue,
1505 	.start_streaming = wave5_vpu_dec_start_streaming,
1506 	.stop_streaming = wave5_vpu_dec_stop_streaming,
1507 };
1508 
1509 static void wave5_set_default_format(struct v4l2_pix_format_mplane *src_fmt,
1510 				     struct v4l2_pix_format_mplane *dst_fmt)
1511 {
1512 	src_fmt->pixelformat = dec_fmt_list[VPU_FMT_TYPE_CODEC][0].v4l2_pix_fmt;
1513 	wave5_update_pix_fmt(src_fmt, VPU_FMT_TYPE_CODEC,
1514 			     W5_DEF_DEC_PIC_WIDTH, W5_DEF_DEC_PIC_HEIGHT,
1515 			     &dec_hevc_frmsize);
1516 
1517 	dst_fmt->pixelformat = dec_fmt_list[VPU_FMT_TYPE_RAW][0].v4l2_pix_fmt;
1518 	wave5_update_pix_fmt(dst_fmt, VPU_FMT_TYPE_RAW,
1519 			     W5_DEF_DEC_PIC_WIDTH, W5_DEF_DEC_PIC_HEIGHT,
1520 			     &dec_raw_frmsize);
1521 }
1522 
1523 static int wave5_vpu_dec_queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq)
1524 {
1525 	return wave5_vpu_queue_init(priv, src_vq, dst_vq, &wave5_vpu_dec_vb2_ops);
1526 }
1527 
1528 static const struct vpu_instance_ops wave5_vpu_dec_inst_ops = {
1529 	.finish_process = wave5_vpu_dec_finish_decode,
1530 };
1531 
1532 static int initialize_sequence(struct vpu_instance *inst)
1533 {
1534 	struct dec_initial_info initial_info;
1535 	int ret = 0;
1536 
1537 	memset(&initial_info, 0, sizeof(struct dec_initial_info));
1538 
1539 	ret = wave5_vpu_dec_issue_seq_init(inst);
1540 	if (ret) {
1541 		dev_dbg(inst->dev->dev, "%s: wave5_vpu_dec_issue_seq_init, fail: %d\n",
1542 			__func__, ret);
1543 		return ret;
1544 	}
1545 
1546 	if (wave5_vpu_wait_interrupt(inst, VPU_DEC_TIMEOUT) < 0)
1547 		dev_dbg(inst->dev->dev, "%s: failed to call vpu_wait_interrupt()\n", __func__);
1548 
1549 	ret = wave5_vpu_dec_complete_seq_init(inst, &initial_info);
1550 	if (ret) {
1551 		dev_dbg(inst->dev->dev, "%s: vpu_dec_complete_seq_init, fail: %d, reason: %u\n",
1552 			__func__, ret, initial_info.seq_init_err_reason);
1553 		wave5_handle_src_buffer(inst, initial_info.rd_ptr);
1554 		return ret;
1555 	}
1556 
1557 	handle_dynamic_resolution_change(inst);
1558 
1559 	return 0;
1560 }
1561 
1562 static bool wave5_is_draining_or_eos(struct vpu_instance *inst)
1563 {
1564 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1565 
1566 	lockdep_assert_held(&inst->state_spinlock);
1567 	return m2m_ctx->is_draining || inst->eos;
1568 }
1569 
1570 static void wave5_vpu_dec_device_run(void *priv)
1571 {
1572 	struct vpu_instance *inst = priv;
1573 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1574 	struct queue_status_info q_status;
1575 	u32 fail_res = 0;
1576 	int ret = 0;
1577 
1578 	dev_dbg(inst->dev->dev, "%s: Fill the ring buffer with new bitstream data", __func__);
1579 	pm_runtime_resume_and_get(inst->dev->dev);
1580 	ret = fill_ringbuffer(inst);
1581 	if (ret) {
1582 		dev_warn(inst->dev->dev, "Filling ring buffer failed\n");
1583 		goto finish_job_and_return;
1584 	}
1585 
1586 	switch (inst->state) {
1587 	case VPU_INST_STATE_OPEN:
1588 		ret = initialize_sequence(inst);
1589 		if (ret) {
1590 			unsigned long flags;
1591 
1592 			spin_lock_irqsave(&inst->state_spinlock, flags);
1593 			if (wave5_is_draining_or_eos(inst) &&
1594 			    wave5_last_src_buffer_consumed(m2m_ctx)) {
1595 				struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
1596 
1597 				switch_state(inst, VPU_INST_STATE_STOP);
1598 
1599 				if (vb2_is_streaming(dst_vq))
1600 					send_eos_event(inst);
1601 				else
1602 					handle_dynamic_resolution_change(inst);
1603 
1604 				flag_last_buffer_done(inst);
1605 			}
1606 			spin_unlock_irqrestore(&inst->state_spinlock, flags);
1607 		} else {
1608 			switch_state(inst, VPU_INST_STATE_INIT_SEQ);
1609 		}
1610 
1611 		break;
1612 
1613 	case VPU_INST_STATE_INIT_SEQ:
1614 		/*
1615 		 * Do this early, preparing the fb can trigger an IRQ before
1616 		 * we had a chance to switch, which leads to an invalid state
1617 		 * change.
1618 		 */
1619 		switch_state(inst, VPU_INST_STATE_PIC_RUN);
1620 
1621 		/*
1622 		 * During DRC, the picture decoding remains pending, so just leave the job
1623 		 * active until this decode operation completes.
1624 		 */
1625 		wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status);
1626 
1627 		/*
1628 		 * The sequence must be analyzed first to calculate the proper
1629 		 * size of the auxiliary buffers.
1630 		 */
1631 		ret = wave5_prepare_fb(inst);
1632 		if (ret) {
1633 			dev_warn(inst->dev->dev, "Framebuffer preparation, fail: %d\n", ret);
1634 			switch_state(inst, VPU_INST_STATE_STOP);
1635 			break;
1636 		}
1637 
1638 		if (q_status.instance_queue_count) {
1639 			dev_dbg(inst->dev->dev, "%s: leave with active job", __func__);
1640 			return;
1641 		}
1642 
1643 		fallthrough;
1644 	case VPU_INST_STATE_PIC_RUN:
1645 		ret = start_decode(inst, &fail_res);
1646 		if (ret) {
1647 			dev_err(inst->dev->dev,
1648 				"Frame decoding on m2m context (%p), fail: %d (result: %d)\n",
1649 				m2m_ctx, ret, fail_res);
1650 			break;
1651 		}
1652 		/* Return so that we leave this job active */
1653 		dev_dbg(inst->dev->dev, "%s: leave with active job", __func__);
1654 		return;
1655 	default:
1656 		WARN(1, "Execution of a job in state %s illegal.\n", state_to_str(inst->state));
1657 		break;
1658 	}
1659 
1660 finish_job_and_return:
1661 	dev_dbg(inst->dev->dev, "%s: leave and finish job", __func__);
1662 	pm_runtime_put_autosuspend(inst->dev->dev);
1663 	v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
1664 }
1665 
1666 static void wave5_vpu_dec_job_abort(void *priv)
1667 {
1668 	struct vpu_instance *inst = priv;
1669 	int ret;
1670 
1671 	ret = switch_state(inst, VPU_INST_STATE_STOP);
1672 	if (ret)
1673 		return;
1674 
1675 	ret = wave5_vpu_dec_set_eos_on_firmware(inst);
1676 	if (ret)
1677 		dev_warn(inst->dev->dev,
1678 			 "Setting EOS for the bitstream, fail: %d\n", ret);
1679 }
1680 
1681 static int wave5_vpu_dec_job_ready(void *priv)
1682 {
1683 	struct vpu_instance *inst = priv;
1684 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1685 	unsigned long flags;
1686 	int ret = 0;
1687 
1688 	spin_lock_irqsave(&inst->state_spinlock, flags);
1689 
1690 	switch (inst->state) {
1691 	case VPU_INST_STATE_NONE:
1692 		dev_dbg(inst->dev->dev, "Decoder must be open to start queueing M2M jobs!\n");
1693 		break;
1694 	case VPU_INST_STATE_OPEN:
1695 		if (wave5_is_draining_or_eos(inst) || !v4l2_m2m_has_stopped(m2m_ctx) ||
1696 		    v4l2_m2m_num_src_bufs_ready(m2m_ctx) > 0) {
1697 			ret = 1;
1698 			break;
1699 		}
1700 
1701 		dev_dbg(inst->dev->dev,
1702 			"Decoder must be draining or >= 1 OUTPUT queue buffer must be queued!\n");
1703 		break;
1704 	case VPU_INST_STATE_INIT_SEQ:
1705 	case VPU_INST_STATE_PIC_RUN:
1706 		if (!m2m_ctx->cap_q_ctx.q.streaming) {
1707 			dev_dbg(inst->dev->dev, "CAPTURE queue must be streaming to queue jobs!\n");
1708 			break;
1709 		} else if (v4l2_m2m_num_dst_bufs_ready(m2m_ctx) < (inst->fbc_buf_count - 1)) {
1710 			dev_dbg(inst->dev->dev,
1711 				"No capture buffer ready to decode!\n");
1712 			break;
1713 		} else if (!wave5_is_draining_or_eos(inst) &&
1714 			   !v4l2_m2m_num_src_bufs_ready(m2m_ctx)) {
1715 			dev_dbg(inst->dev->dev,
1716 				"No bitstream data to decode!\n");
1717 			break;
1718 		}
1719 		ret = 1;
1720 		break;
1721 	case VPU_INST_STATE_STOP:
1722 		dev_dbg(inst->dev->dev, "Decoder is stopped, not running.\n");
1723 		break;
1724 	}
1725 
1726 	spin_unlock_irqrestore(&inst->state_spinlock, flags);
1727 
1728 	return ret;
1729 }
1730 
1731 static const struct v4l2_m2m_ops wave5_vpu_dec_m2m_ops = {
1732 	.device_run = wave5_vpu_dec_device_run,
1733 	.job_abort = wave5_vpu_dec_job_abort,
1734 	.job_ready = wave5_vpu_dec_job_ready,
1735 };
1736 
1737 static int wave5_vpu_open_dec(struct file *filp)
1738 {
1739 	struct video_device *vdev = video_devdata(filp);
1740 	struct vpu_device *dev = video_drvdata(filp);
1741 	struct vpu_instance *inst = NULL;
1742 	struct v4l2_m2m_ctx *m2m_ctx;
1743 	int ret = 0;
1744 
1745 	inst = kzalloc(sizeof(*inst), GFP_KERNEL);
1746 	if (!inst)
1747 		return -ENOMEM;
1748 
1749 	inst->dev = dev;
1750 	inst->type = VPU_INST_TYPE_DEC;
1751 	inst->ops = &wave5_vpu_dec_inst_ops;
1752 
1753 	spin_lock_init(&inst->state_spinlock);
1754 
1755 	inst->codec_info = kzalloc(sizeof(*inst->codec_info), GFP_KERNEL);
1756 	if (!inst->codec_info)
1757 		return -ENOMEM;
1758 
1759 	v4l2_fh_init(&inst->v4l2_fh, vdev);
1760 	v4l2_fh_add(&inst->v4l2_fh, filp);
1761 
1762 	INIT_LIST_HEAD(&inst->list);
1763 
1764 	inst->v4l2_m2m_dev = inst->dev->v4l2_m2m_dec_dev;
1765 	inst->v4l2_fh.m2m_ctx =
1766 		v4l2_m2m_ctx_init(inst->v4l2_m2m_dev, inst, wave5_vpu_dec_queue_init);
1767 	if (IS_ERR(inst->v4l2_fh.m2m_ctx)) {
1768 		ret = PTR_ERR(inst->v4l2_fh.m2m_ctx);
1769 		goto cleanup_inst;
1770 	}
1771 	m2m_ctx = inst->v4l2_fh.m2m_ctx;
1772 
1773 	v4l2_m2m_set_src_buffered(m2m_ctx, true);
1774 	v4l2_m2m_set_dst_buffered(m2m_ctx, true);
1775 	/*
1776 	 * We use the M2M job queue to ensure synchronization of steps where
1777 	 * needed, as IOCTLs can occur at anytime and we need to run commands on
1778 	 * the firmware in a specified order.
1779 	 * In order to initialize the sequence on the firmware within an M2M
1780 	 * job, the M2M framework needs to be able to queue jobs before
1781 	 * the CAPTURE queue has been started, because we need the results of the
1782 	 * initialization to properly prepare the CAPTURE queue with the correct
1783 	 * amount of buffers.
1784 	 * By setting ignore_cap_streaming to true the m2m framework will call
1785 	 * job_ready as soon as the OUTPUT queue is streaming, instead of
1786 	 * waiting until both the CAPTURE and OUTPUT queues are streaming.
1787 	 */
1788 	m2m_ctx->ignore_cap_streaming = true;
1789 
1790 	v4l2_ctrl_handler_init(&inst->v4l2_ctrl_hdl, 10);
1791 	v4l2_ctrl_new_std(&inst->v4l2_ctrl_hdl, NULL,
1792 			  V4L2_CID_MIN_BUFFERS_FOR_CAPTURE, 1, 32, 1, 1);
1793 
1794 	if (inst->v4l2_ctrl_hdl.error) {
1795 		ret = -ENODEV;
1796 		goto cleanup_inst;
1797 	}
1798 
1799 	inst->v4l2_fh.ctrl_handler = &inst->v4l2_ctrl_hdl;
1800 	v4l2_ctrl_handler_setup(&inst->v4l2_ctrl_hdl);
1801 
1802 	wave5_set_default_format(&inst->src_fmt, &inst->dst_fmt);
1803 	inst->colorspace = V4L2_COLORSPACE_REC709;
1804 	inst->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
1805 	inst->quantization = V4L2_QUANTIZATION_DEFAULT;
1806 	inst->xfer_func = V4L2_XFER_FUNC_DEFAULT;
1807 
1808 	init_completion(&inst->irq_done);
1809 
1810 	inst->id = ida_alloc(&inst->dev->inst_ida, GFP_KERNEL);
1811 	if (inst->id < 0) {
1812 		dev_warn(inst->dev->dev, "Allocating instance ID, fail: %d\n", inst->id);
1813 		ret = inst->id;
1814 		goto cleanup_inst;
1815 	}
1816 
1817 	/*
1818 	 * For Wave515 SRAM memory was already allocated
1819 	 * at wave5_vpu_dec_register_device()
1820 	 */
1821 	if (inst->dev->product_code != WAVE515_CODE)
1822 		wave5_vdi_allocate_sram(inst->dev);
1823 
1824 	ret = mutex_lock_interruptible(&dev->dev_lock);
1825 	if (ret)
1826 		goto cleanup_inst;
1827 
1828 	if (list_empty(&dev->instances))
1829 		pm_runtime_use_autosuspend(inst->dev->dev);
1830 
1831 	list_add_tail(&inst->list, &dev->instances);
1832 
1833 	mutex_unlock(&dev->dev_lock);
1834 
1835 	return 0;
1836 
1837 cleanup_inst:
1838 	wave5_cleanup_instance(inst, filp);
1839 	return ret;
1840 }
1841 
1842 static int wave5_vpu_dec_release(struct file *filp)
1843 {
1844 	return wave5_vpu_release_device(filp, wave5_vpu_dec_close, "decoder");
1845 }
1846 
1847 static const struct v4l2_file_operations wave5_vpu_dec_fops = {
1848 	.owner = THIS_MODULE,
1849 	.open = wave5_vpu_open_dec,
1850 	.release = wave5_vpu_dec_release,
1851 	.unlocked_ioctl = video_ioctl2,
1852 	.poll = v4l2_m2m_fop_poll,
1853 	.mmap = v4l2_m2m_fop_mmap,
1854 };
1855 
1856 int wave5_vpu_dec_register_device(struct vpu_device *dev)
1857 {
1858 	struct video_device *vdev_dec;
1859 	int ret;
1860 
1861 	/*
1862 	 * Secondary AXI setup for Wave515 is done by INIT_VPU command,
1863 	 * i.e. wave5_vpu_init(), that's why we allocate SRAM memory early.
1864 	 */
1865 	if (dev->product_code == WAVE515_CODE)
1866 		wave5_vdi_allocate_sram(dev);
1867 
1868 	vdev_dec = devm_kzalloc(dev->v4l2_dev.dev, sizeof(*vdev_dec), GFP_KERNEL);
1869 	if (!vdev_dec)
1870 		return -ENOMEM;
1871 
1872 	dev->v4l2_m2m_dec_dev = v4l2_m2m_init(&wave5_vpu_dec_m2m_ops);
1873 	if (IS_ERR(dev->v4l2_m2m_dec_dev)) {
1874 		ret = PTR_ERR(dev->v4l2_m2m_dec_dev);
1875 		dev_err(dev->dev, "v4l2_m2m_init, fail: %d\n", ret);
1876 		return -EINVAL;
1877 	}
1878 
1879 	dev->video_dev_dec = vdev_dec;
1880 
1881 	strscpy(vdev_dec->name, VPU_DEC_DEV_NAME, sizeof(vdev_dec->name));
1882 	vdev_dec->fops = &wave5_vpu_dec_fops;
1883 	vdev_dec->ioctl_ops = &wave5_vpu_dec_ioctl_ops;
1884 	vdev_dec->release = video_device_release_empty;
1885 	vdev_dec->v4l2_dev = &dev->v4l2_dev;
1886 	vdev_dec->vfl_dir = VFL_DIR_M2M;
1887 	vdev_dec->device_caps = V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_STREAMING;
1888 	vdev_dec->lock = &dev->dev_lock;
1889 
1890 	ret = video_register_device(vdev_dec, VFL_TYPE_VIDEO, -1);
1891 	if (ret)
1892 		return ret;
1893 
1894 	video_set_drvdata(vdev_dec, dev);
1895 
1896 	return 0;
1897 }
1898 
1899 void wave5_vpu_dec_unregister_device(struct vpu_device *dev)
1900 {
1901 	/*
1902 	 * Here is a freeing pair for Wave515 SRAM memory allocation
1903 	 * happened at wave5_vpu_dec_register_device().
1904 	 */
1905 	if (dev->product_code == WAVE515_CODE)
1906 		wave5_vdi_free_sram(dev);
1907 
1908 	video_unregister_device(dev->video_dev_dec);
1909 	if (dev->v4l2_m2m_dec_dev)
1910 		v4l2_m2m_release(dev->v4l2_m2m_dec_dev);
1911 }
1912