xref: /linux/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c (revision cb8bdd3ffca280d014311ab395651d33f58a8708)
1 // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
2 /*
3  * Wave5 series multi-standard codec IP - decoder interface
4  *
5  * Copyright (C) 2021-2023 CHIPS&MEDIA INC
6  */
7 
8 #include <linux/pm_runtime.h>
9 #include "wave5-helper.h"
10 
11 #define VPU_DEC_DEV_NAME "C&M Wave5 VPU decoder"
12 #define VPU_DEC_DRV_NAME "wave5-dec"
13 
14 static const struct v4l2_frmsize_stepwise dec_hevc_frmsize = {
15 	.min_width = W5_MIN_DEC_PIC_8_WIDTH,
16 	.max_width = W5_MAX_DEC_PIC_WIDTH,
17 	.step_width = W5_DEC_CODEC_STEP_WIDTH,
18 	.min_height = W5_MIN_DEC_PIC_8_HEIGHT,
19 	.max_height = W5_MAX_DEC_PIC_HEIGHT,
20 	.step_height = W5_DEC_CODEC_STEP_HEIGHT,
21 };
22 
23 static const struct v4l2_frmsize_stepwise dec_h264_frmsize = {
24 	.min_width = W5_MIN_DEC_PIC_32_WIDTH,
25 	.max_width = W5_MAX_DEC_PIC_WIDTH,
26 	.step_width = W5_DEC_CODEC_STEP_WIDTH,
27 	.min_height = W5_MIN_DEC_PIC_32_HEIGHT,
28 	.max_height = W5_MAX_DEC_PIC_HEIGHT,
29 	.step_height = W5_DEC_CODEC_STEP_HEIGHT,
30 };
31 
32 static const struct v4l2_frmsize_stepwise dec_raw_frmsize = {
33 	.min_width = W5_MIN_DEC_PIC_8_WIDTH,
34 	.max_width = W5_MAX_DEC_PIC_WIDTH,
35 	.step_width = W5_DEC_RAW_STEP_WIDTH,
36 	.min_height = W5_MIN_DEC_PIC_8_HEIGHT,
37 	.max_height = W5_MAX_DEC_PIC_HEIGHT,
38 	.step_height = W5_DEC_RAW_STEP_HEIGHT,
39 };
40 
41 static const struct vpu_format dec_fmt_list[FMT_TYPES][MAX_FMTS] = {
42 	[VPU_FMT_TYPE_CODEC] = {
43 		{
44 			.v4l2_pix_fmt = V4L2_PIX_FMT_HEVC,
45 			.v4l2_frmsize = &dec_hevc_frmsize,
46 		},
47 		{
48 			.v4l2_pix_fmt = V4L2_PIX_FMT_H264,
49 			.v4l2_frmsize = &dec_h264_frmsize,
50 		},
51 	},
52 	[VPU_FMT_TYPE_RAW] = {
53 		{
54 			.v4l2_pix_fmt = V4L2_PIX_FMT_YUV420,
55 			.v4l2_frmsize = &dec_raw_frmsize,
56 		},
57 		{
58 			.v4l2_pix_fmt = V4L2_PIX_FMT_NV12,
59 			.v4l2_frmsize = &dec_raw_frmsize,
60 		},
61 		{
62 			.v4l2_pix_fmt = V4L2_PIX_FMT_NV21,
63 			.v4l2_frmsize = &dec_raw_frmsize,
64 		},
65 		{
66 			.v4l2_pix_fmt = V4L2_PIX_FMT_YUV422P,
67 			.v4l2_frmsize = &dec_raw_frmsize,
68 		},
69 		{
70 			.v4l2_pix_fmt = V4L2_PIX_FMT_NV16,
71 			.v4l2_frmsize = &dec_raw_frmsize,
72 		},
73 		{
74 			.v4l2_pix_fmt = V4L2_PIX_FMT_NV61,
75 			.v4l2_frmsize = &dec_raw_frmsize,
76 		},
77 		{
78 			.v4l2_pix_fmt = V4L2_PIX_FMT_YUV420M,
79 			.v4l2_frmsize = &dec_raw_frmsize,
80 		},
81 		{
82 			.v4l2_pix_fmt = V4L2_PIX_FMT_NV12M,
83 			.v4l2_frmsize = &dec_raw_frmsize,
84 		},
85 		{
86 			.v4l2_pix_fmt = V4L2_PIX_FMT_NV21M,
87 			.v4l2_frmsize = &dec_raw_frmsize,
88 		},
89 		{
90 			.v4l2_pix_fmt = V4L2_PIX_FMT_YUV422M,
91 			.v4l2_frmsize = &dec_raw_frmsize,
92 		},
93 		{
94 			.v4l2_pix_fmt = V4L2_PIX_FMT_NV16M,
95 			.v4l2_frmsize = &dec_raw_frmsize,
96 		},
97 		{
98 			.v4l2_pix_fmt = V4L2_PIX_FMT_NV61M,
99 			.v4l2_frmsize = &dec_raw_frmsize,
100 		},
101 	}
102 };
103 
104 /*
105  * Make sure that the state switch is allowed and add logging for debugging
106  * purposes
107  */
108 static int switch_state(struct vpu_instance *inst, enum vpu_instance_state state)
109 {
110 	switch (state) {
111 	case VPU_INST_STATE_NONE:
112 		break;
113 	case VPU_INST_STATE_OPEN:
114 		if (inst->state != VPU_INST_STATE_NONE)
115 			goto invalid_state_switch;
116 		goto valid_state_switch;
117 	case VPU_INST_STATE_INIT_SEQ:
118 		if (inst->state != VPU_INST_STATE_OPEN && inst->state != VPU_INST_STATE_STOP)
119 			goto invalid_state_switch;
120 		goto valid_state_switch;
121 	case VPU_INST_STATE_PIC_RUN:
122 		if (inst->state != VPU_INST_STATE_INIT_SEQ)
123 			goto invalid_state_switch;
124 		goto valid_state_switch;
125 	case VPU_INST_STATE_STOP:
126 		goto valid_state_switch;
127 	}
128 invalid_state_switch:
129 	WARN(1, "Invalid state switch from %s to %s.\n",
130 	     state_to_str(inst->state), state_to_str(state));
131 	return -EINVAL;
132 valid_state_switch:
133 	dev_dbg(inst->dev->dev, "Switch state from %s to %s.\n",
134 		state_to_str(inst->state), state_to_str(state));
135 	inst->state = state;
136 	return 0;
137 }
138 
139 static int set_instance_state(struct vpu_instance *inst, enum vpu_instance_state state)
140 {
141 	unsigned long flags;
142 	int ret;
143 
144 	spin_lock_irqsave(&inst->state_spinlock, flags);
145 	ret = switch_state(inst, state);
146 	spin_unlock_irqrestore(&inst->state_spinlock, flags);
147 
148 	return ret;
149 }
150 
151 static int wave5_vpu_dec_set_eos_on_firmware(struct vpu_instance *inst)
152 {
153 	int ret;
154 
155 	ret = wave5_vpu_dec_update_bitstream_buffer(inst, 0);
156 	if (ret) {
157 		/*
158 		 * To set the EOS flag, a command is sent to the firmware.
159 		 * That command may never return (timeout) or may report an error.
160 		 */
161 		dev_err(inst->dev->dev,
162 			"Setting EOS for the bitstream, fail: %d\n", ret);
163 		return ret;
164 	}
165 	return 0;
166 }
167 
168 static bool wave5_last_src_buffer_consumed(struct v4l2_m2m_ctx *m2m_ctx)
169 {
170 	struct vpu_src_buffer *vpu_buf;
171 
172 	if (!m2m_ctx->last_src_buf)
173 		return false;
174 
175 	vpu_buf = wave5_to_vpu_src_buf(m2m_ctx->last_src_buf);
176 	return vpu_buf->consumed;
177 }
178 
179 static void wave5_handle_src_buffer(struct vpu_instance *inst, dma_addr_t rd_ptr)
180 {
181 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
182 	struct v4l2_m2m_buffer *buf, *n;
183 	size_t consumed_bytes = 0;
184 
185 	if (rd_ptr >= inst->last_rd_ptr) {
186 		consumed_bytes = rd_ptr - inst->last_rd_ptr;
187 	} else {
188 		size_t rd_offs = rd_ptr - inst->bitstream_vbuf.daddr;
189 		size_t last_rd_offs = inst->last_rd_ptr - inst->bitstream_vbuf.daddr;
190 
191 		consumed_bytes = rd_offs + (inst->bitstream_vbuf.size - last_rd_offs);
192 	}
193 
194 	inst->last_rd_ptr = rd_ptr;
195 	consumed_bytes += inst->remaining_consumed_bytes;
196 
197 	dev_dbg(inst->dev->dev, "%s: %zu bytes of bitstream was consumed", __func__,
198 		consumed_bytes);
199 
200 	v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buf, n) {
201 		struct vb2_v4l2_buffer *src_buf = &buf->vb;
202 		size_t src_size = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
203 
204 		if (src_size > consumed_bytes)
205 			break;
206 
207 		dev_dbg(inst->dev->dev, "%s: removing src buffer %i",
208 			__func__, src_buf->vb2_buf.index);
209 		src_buf = v4l2_m2m_src_buf_remove(m2m_ctx);
210 		inst->timestamp = src_buf->vb2_buf.timestamp;
211 		v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
212 		consumed_bytes -= src_size;
213 
214 		/* Handle the case the last bitstream buffer has been picked */
215 		if (src_buf == m2m_ctx->last_src_buf) {
216 			int ret;
217 
218 			m2m_ctx->last_src_buf = NULL;
219 			ret = wave5_vpu_dec_set_eos_on_firmware(inst);
220 			if (ret)
221 				dev_warn(inst->dev->dev,
222 					 "Setting EOS for the bitstream, fail: %d\n", ret);
223 			break;
224 		}
225 	}
226 
227 	inst->remaining_consumed_bytes = consumed_bytes;
228 }
229 
230 static int start_decode(struct vpu_instance *inst, u32 *fail_res)
231 {
232 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
233 	int ret = 0;
234 
235 	ret = wave5_vpu_dec_start_one_frame(inst, fail_res);
236 	if (ret) {
237 		struct vb2_v4l2_buffer *src_buf;
238 
239 		src_buf = v4l2_m2m_src_buf_remove(m2m_ctx);
240 		if (src_buf)
241 			v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR);
242 		set_instance_state(inst, VPU_INST_STATE_STOP);
243 
244 		dev_dbg(inst->dev->dev, "%s: pic run failed / finish job", __func__);
245 		v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
246 	}
247 
248 	return ret;
249 }
250 
251 static void flag_last_buffer_done(struct vpu_instance *inst)
252 {
253 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
254 	struct vb2_v4l2_buffer *vb;
255 	int i;
256 
257 	lockdep_assert_held(&inst->state_spinlock);
258 
259 	vb = v4l2_m2m_dst_buf_remove(m2m_ctx);
260 	if (!vb) {
261 		m2m_ctx->is_draining = true;
262 		m2m_ctx->next_buf_last = true;
263 		return;
264 	}
265 
266 	for (i = 0; i < vb->vb2_buf.num_planes; i++)
267 		vb2_set_plane_payload(&vb->vb2_buf, i, 0);
268 	vb->field = V4L2_FIELD_NONE;
269 
270 	v4l2_m2m_last_buffer_done(m2m_ctx, vb);
271 }
272 
273 static void send_eos_event(struct vpu_instance *inst)
274 {
275 	static const struct v4l2_event vpu_event_eos = {
276 		.type = V4L2_EVENT_EOS
277 	};
278 
279 	lockdep_assert_held(&inst->state_spinlock);
280 
281 	v4l2_event_queue_fh(&inst->v4l2_fh, &vpu_event_eos);
282 	inst->eos = false;
283 	inst->sent_eos = true;
284 }
285 
286 static int handle_dynamic_resolution_change(struct vpu_instance *inst)
287 {
288 	struct v4l2_fh *fh = &inst->v4l2_fh;
289 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
290 
291 	static const struct v4l2_event vpu_event_src_ch = {
292 		.type = V4L2_EVENT_SOURCE_CHANGE,
293 		.u.src_change.changes = V4L2_EVENT_SRC_CH_RESOLUTION,
294 	};
295 	struct dec_info *p_dec_info = &inst->codec_info->dec_info;
296 	struct dec_initial_info *initial_info = &inst->codec_info->dec_info.initial_info;
297 
298 	lockdep_assert_held(&inst->state_spinlock);
299 
300 	dev_dbg(inst->dev->dev, "%s: rd_ptr %pad", __func__, &initial_info->rd_ptr);
301 
302 	dev_dbg(inst->dev->dev, "%s: width: %u height: %u profile: %u | minbuffer: %u\n",
303 		__func__, initial_info->pic_width, initial_info->pic_height,
304 		initial_info->profile, initial_info->min_frame_buffer_count);
305 
306 	inst->needs_reallocation = true;
307 	inst->fbc_buf_count = initial_info->min_frame_buffer_count + 1;
308 	if (inst->fbc_buf_count != v4l2_m2m_num_dst_bufs_ready(m2m_ctx)) {
309 		struct v4l2_ctrl *ctrl;
310 
311 		ctrl = v4l2_ctrl_find(&inst->v4l2_ctrl_hdl,
312 				      V4L2_CID_MIN_BUFFERS_FOR_CAPTURE);
313 		if (ctrl)
314 			v4l2_ctrl_s_ctrl(ctrl, inst->fbc_buf_count);
315 	}
316 
317 	if (p_dec_info->initial_info_obtained) {
318 		const struct vpu_format *vpu_fmt;
319 
320 		inst->conf_win.left = initial_info->pic_crop_rect.left;
321 		inst->conf_win.top = initial_info->pic_crop_rect.top;
322 		inst->conf_win.width = initial_info->pic_width -
323 			initial_info->pic_crop_rect.left - initial_info->pic_crop_rect.right;
324 		inst->conf_win.height = initial_info->pic_height -
325 			initial_info->pic_crop_rect.top - initial_info->pic_crop_rect.bottom;
326 
327 		vpu_fmt = wave5_find_vpu_fmt(inst->src_fmt.pixelformat,
328 					     dec_fmt_list[VPU_FMT_TYPE_CODEC]);
329 		if (!vpu_fmt)
330 			return -EINVAL;
331 
332 		wave5_update_pix_fmt(&inst->src_fmt,
333 				     VPU_FMT_TYPE_CODEC,
334 				     initial_info->pic_width,
335 				     initial_info->pic_height,
336 				     vpu_fmt->v4l2_frmsize);
337 
338 		vpu_fmt = wave5_find_vpu_fmt(inst->dst_fmt.pixelformat,
339 					     dec_fmt_list[VPU_FMT_TYPE_RAW]);
340 		if (!vpu_fmt)
341 			return -EINVAL;
342 
343 		wave5_update_pix_fmt(&inst->dst_fmt,
344 				     VPU_FMT_TYPE_RAW,
345 				     initial_info->pic_width,
346 				     initial_info->pic_height,
347 				     vpu_fmt->v4l2_frmsize);
348 	}
349 
350 	v4l2_event_queue_fh(fh, &vpu_event_src_ch);
351 
352 	return 0;
353 }
354 
355 static void wave5_vpu_dec_finish_decode(struct vpu_instance *inst)
356 {
357 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
358 	struct dec_output_info dec_info;
359 	int ret;
360 	struct vb2_v4l2_buffer *dec_buf = NULL;
361 	struct vb2_v4l2_buffer *disp_buf = NULL;
362 	struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
363 
364 	dev_dbg(inst->dev->dev, "%s: Fetch output info from firmware.", __func__);
365 
366 	ret = wave5_vpu_dec_get_output_info(inst, &dec_info);
367 	if (ret) {
368 		dev_dbg(inst->dev->dev, "%s: could not get output info.", __func__);
369 		v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
370 		return;
371 	}
372 
373 	dev_dbg(inst->dev->dev, "%s: rd_ptr %pad wr_ptr %pad", __func__, &dec_info.rd_ptr,
374 		&dec_info.wr_ptr);
375 	wave5_handle_src_buffer(inst, dec_info.rd_ptr);
376 
377 	dev_dbg(inst->dev->dev, "%s: dec_info dec_idx %i disp_idx %i", __func__,
378 		dec_info.index_frame_decoded, dec_info.index_frame_display);
379 
380 	if (!vb2_is_streaming(dst_vq)) {
381 		dev_dbg(inst->dev->dev, "%s: capture is not streaming..", __func__);
382 		v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
383 		return;
384 	}
385 
386 	/* Remove decoded buffer from the ready queue now that it has been
387 	 * decoded.
388 	 */
389 	if (dec_info.index_frame_decoded >= 0) {
390 		struct vb2_buffer *vb = vb2_get_buffer(dst_vq,
391 						       dec_info.index_frame_decoded);
392 		if (vb) {
393 			dec_buf = to_vb2_v4l2_buffer(vb);
394 			dec_buf->vb2_buf.timestamp = inst->timestamp;
395 		} else {
396 			dev_warn(inst->dev->dev, "%s: invalid decoded frame index %i",
397 				 __func__, dec_info.index_frame_decoded);
398 		}
399 	}
400 
401 	if (dec_info.index_frame_display >= 0) {
402 		disp_buf = v4l2_m2m_dst_buf_remove_by_idx(m2m_ctx, dec_info.index_frame_display);
403 		if (!disp_buf)
404 			dev_warn(inst->dev->dev, "%s: invalid display frame index %i",
405 				 __func__, dec_info.index_frame_display);
406 	}
407 
408 	/* If there is anything to display, do that now */
409 	if (disp_buf) {
410 		struct vpu_dst_buffer *dst_vpu_buf = wave5_to_vpu_dst_buf(disp_buf);
411 
412 		if (inst->dst_fmt.num_planes == 1) {
413 			vb2_set_plane_payload(&disp_buf->vb2_buf, 0,
414 					      inst->dst_fmt.plane_fmt[0].sizeimage);
415 		} else if (inst->dst_fmt.num_planes == 2) {
416 			vb2_set_plane_payload(&disp_buf->vb2_buf, 0,
417 					      inst->dst_fmt.plane_fmt[0].sizeimage);
418 			vb2_set_plane_payload(&disp_buf->vb2_buf, 1,
419 					      inst->dst_fmt.plane_fmt[1].sizeimage);
420 		} else if (inst->dst_fmt.num_planes == 3) {
421 			vb2_set_plane_payload(&disp_buf->vb2_buf, 0,
422 					      inst->dst_fmt.plane_fmt[0].sizeimage);
423 			vb2_set_plane_payload(&disp_buf->vb2_buf, 1,
424 					      inst->dst_fmt.plane_fmt[1].sizeimage);
425 			vb2_set_plane_payload(&disp_buf->vb2_buf, 2,
426 					      inst->dst_fmt.plane_fmt[2].sizeimage);
427 		}
428 
429 		/* TODO implement interlace support */
430 		disp_buf->field = V4L2_FIELD_NONE;
431 		dst_vpu_buf->display = true;
432 		v4l2_m2m_buf_done(disp_buf, VB2_BUF_STATE_DONE);
433 
434 		dev_dbg(inst->dev->dev, "%s: frame_cycle %8u (payload %lu)\n",
435 			__func__, dec_info.frame_cycle,
436 			vb2_get_plane_payload(&disp_buf->vb2_buf, 0));
437 	}
438 
439 	if ((dec_info.index_frame_display == DISPLAY_IDX_FLAG_SEQ_END ||
440 	     dec_info.sequence_changed)) {
441 		unsigned long flags;
442 
443 		spin_lock_irqsave(&inst->state_spinlock, flags);
444 		if (!v4l2_m2m_has_stopped(m2m_ctx)) {
445 			switch_state(inst, VPU_INST_STATE_STOP);
446 
447 			if (dec_info.sequence_changed)
448 				handle_dynamic_resolution_change(inst);
449 			else
450 				send_eos_event(inst);
451 
452 			flag_last_buffer_done(inst);
453 		}
454 		spin_unlock_irqrestore(&inst->state_spinlock, flags);
455 	}
456 
457 	if (inst->sent_eos &&
458 	    v4l2_m2m_get_curr_priv(inst->v4l2_m2m_dev)) {
459 		struct queue_status_info q_status;
460 
461 		wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status);
462 		if (q_status.report_queue_count == 0 &&
463 		    q_status.instance_queue_count == 0)
464 			v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
465 	}
466 
467 	inst->queuing_fail = false;
468 }
469 
470 static int wave5_vpu_dec_querycap(struct file *file, void *fh, struct v4l2_capability *cap)
471 {
472 	strscpy(cap->driver, VPU_DEC_DRV_NAME, sizeof(cap->driver));
473 	strscpy(cap->card, VPU_DEC_DRV_NAME, sizeof(cap->card));
474 
475 	return 0;
476 }
477 
478 static int wave5_vpu_dec_enum_framesizes(struct file *f, void *fh, struct v4l2_frmsizeenum *fsize)
479 {
480 	const struct vpu_format *vpu_fmt;
481 
482 	if (fsize->index)
483 		return -EINVAL;
484 
485 	vpu_fmt = wave5_find_vpu_fmt(fsize->pixel_format, dec_fmt_list[VPU_FMT_TYPE_CODEC]);
486 	if (!vpu_fmt) {
487 		vpu_fmt = wave5_find_vpu_fmt(fsize->pixel_format, dec_fmt_list[VPU_FMT_TYPE_RAW]);
488 		if (!vpu_fmt)
489 			return -EINVAL;
490 	}
491 
492 	fsize->type = V4L2_FRMSIZE_TYPE_CONTINUOUS;
493 	fsize->stepwise.min_width = vpu_fmt->v4l2_frmsize->min_width;
494 	fsize->stepwise.max_width = vpu_fmt->v4l2_frmsize->max_width;
495 	fsize->stepwise.step_width = W5_DEC_CODEC_STEP_WIDTH;
496 	fsize->stepwise.min_height = vpu_fmt->v4l2_frmsize->min_height;
497 	fsize->stepwise.max_height = vpu_fmt->v4l2_frmsize->max_height;
498 	fsize->stepwise.step_height = W5_DEC_CODEC_STEP_HEIGHT;
499 
500 	return 0;
501 }
502 
503 static int wave5_vpu_dec_enum_fmt_cap(struct file *file, void *fh, struct v4l2_fmtdesc *f)
504 {
505 	const struct vpu_format *vpu_fmt;
506 
507 	vpu_fmt = wave5_find_vpu_fmt_by_idx(f->index, dec_fmt_list[VPU_FMT_TYPE_RAW]);
508 	if (!vpu_fmt)
509 		return -EINVAL;
510 
511 	f->pixelformat = vpu_fmt->v4l2_pix_fmt;
512 	f->flags = 0;
513 
514 	return 0;
515 }
516 
517 static int wave5_vpu_dec_try_fmt_cap(struct file *file, void *fh, struct v4l2_format *f)
518 {
519 	struct vpu_instance *inst = file_to_vpu_inst(file);
520 	struct dec_info *p_dec_info = &inst->codec_info->dec_info;
521 	const struct v4l2_frmsize_stepwise *frmsize;
522 	const struct vpu_format *vpu_fmt;
523 	int width, height;
524 
525 	dev_dbg(inst->dev->dev,
526 		"%s: fourcc: %u width: %u height: %u nm planes: %u colorspace: %u field: %u\n",
527 		__func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height,
528 		f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field);
529 
530 	vpu_fmt = wave5_find_vpu_fmt(f->fmt.pix_mp.pixelformat, dec_fmt_list[VPU_FMT_TYPE_RAW]);
531 	if (!vpu_fmt) {
532 		width = inst->dst_fmt.width;
533 		height = inst->dst_fmt.height;
534 		f->fmt.pix_mp.pixelformat = inst->dst_fmt.pixelformat;
535 		frmsize = &dec_raw_frmsize;
536 	} else {
537 		width = f->fmt.pix_mp.width;
538 		height = f->fmt.pix_mp.height;
539 		f->fmt.pix_mp.pixelformat = vpu_fmt->v4l2_pix_fmt;
540 		frmsize = vpu_fmt->v4l2_frmsize;
541 	}
542 
543 	if (p_dec_info->initial_info_obtained) {
544 		width = inst->dst_fmt.width;
545 		height = inst->dst_fmt.height;
546 	}
547 
548 	wave5_update_pix_fmt(&f->fmt.pix_mp, VPU_FMT_TYPE_RAW,
549 			     width, height, frmsize);
550 	f->fmt.pix_mp.colorspace = inst->colorspace;
551 	f->fmt.pix_mp.ycbcr_enc = inst->ycbcr_enc;
552 	f->fmt.pix_mp.quantization = inst->quantization;
553 	f->fmt.pix_mp.xfer_func = inst->xfer_func;
554 
555 	return 0;
556 }
557 
558 static int wave5_vpu_dec_s_fmt_cap(struct file *file, void *fh, struct v4l2_format *f)
559 {
560 	struct vpu_instance *inst = file_to_vpu_inst(file);
561 	int i, ret;
562 
563 	dev_dbg(inst->dev->dev,
564 		"%s: fourcc: %u width: %u height: %u num_planes: %u colorspace: %u field: %u\n",
565 		__func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height,
566 		f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field);
567 
568 	ret = wave5_vpu_dec_try_fmt_cap(file, fh, f);
569 	if (ret)
570 		return ret;
571 
572 	inst->dst_fmt.width = f->fmt.pix_mp.width;
573 	inst->dst_fmt.height = f->fmt.pix_mp.height;
574 	inst->dst_fmt.pixelformat = f->fmt.pix_mp.pixelformat;
575 	inst->dst_fmt.field = f->fmt.pix_mp.field;
576 	inst->dst_fmt.flags = f->fmt.pix_mp.flags;
577 	inst->dst_fmt.num_planes = f->fmt.pix_mp.num_planes;
578 	for (i = 0; i < inst->dst_fmt.num_planes; i++) {
579 		inst->dst_fmt.plane_fmt[i].bytesperline = f->fmt.pix_mp.plane_fmt[i].bytesperline;
580 		inst->dst_fmt.plane_fmt[i].sizeimage = f->fmt.pix_mp.plane_fmt[i].sizeimage;
581 	}
582 
583 	if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV12 ||
584 	    inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV12M) {
585 		inst->cbcr_interleave = true;
586 		inst->nv21 = false;
587 		inst->output_format = FORMAT_420;
588 	} else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV21 ||
589 		   inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV21M) {
590 		inst->cbcr_interleave = true;
591 		inst->nv21 = true;
592 		inst->output_format = FORMAT_420;
593 	} else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV16 ||
594 		   inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV16M) {
595 		inst->cbcr_interleave = true;
596 		inst->nv21 = false;
597 		inst->output_format = FORMAT_422;
598 	} else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV61 ||
599 		   inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV61M) {
600 		inst->cbcr_interleave = true;
601 		inst->nv21 = true;
602 		inst->output_format = FORMAT_422;
603 	} else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_YUV422P ||
604 		   inst->dst_fmt.pixelformat == V4L2_PIX_FMT_YUV422M) {
605 		inst->cbcr_interleave = false;
606 		inst->nv21 = false;
607 		inst->output_format = FORMAT_422;
608 	} else {
609 		inst->cbcr_interleave = false;
610 		inst->nv21 = false;
611 		inst->output_format = FORMAT_420;
612 	}
613 
614 	return 0;
615 }
616 
617 static int wave5_vpu_dec_g_fmt_cap(struct file *file, void *fh, struct v4l2_format *f)
618 {
619 	struct vpu_instance *inst = file_to_vpu_inst(file);
620 	int i;
621 
622 	f->fmt.pix_mp.width = inst->dst_fmt.width;
623 	f->fmt.pix_mp.height = inst->dst_fmt.height;
624 	f->fmt.pix_mp.pixelformat = inst->dst_fmt.pixelformat;
625 	f->fmt.pix_mp.field = inst->dst_fmt.field;
626 	f->fmt.pix_mp.flags = inst->dst_fmt.flags;
627 	f->fmt.pix_mp.num_planes = inst->dst_fmt.num_planes;
628 	for (i = 0; i < f->fmt.pix_mp.num_planes; i++) {
629 		f->fmt.pix_mp.plane_fmt[i].bytesperline = inst->dst_fmt.plane_fmt[i].bytesperline;
630 		f->fmt.pix_mp.plane_fmt[i].sizeimage = inst->dst_fmt.plane_fmt[i].sizeimage;
631 	}
632 
633 	f->fmt.pix_mp.colorspace = inst->colorspace;
634 	f->fmt.pix_mp.ycbcr_enc = inst->ycbcr_enc;
635 	f->fmt.pix_mp.quantization = inst->quantization;
636 	f->fmt.pix_mp.xfer_func = inst->xfer_func;
637 
638 	return 0;
639 }
640 
641 static int wave5_vpu_dec_enum_fmt_out(struct file *file, void *fh, struct v4l2_fmtdesc *f)
642 {
643 	struct vpu_instance *inst = file_to_vpu_inst(file);
644 	const struct vpu_format *vpu_fmt;
645 
646 	dev_dbg(inst->dev->dev, "%s: index: %u\n", __func__, f->index);
647 
648 	vpu_fmt = wave5_find_vpu_fmt_by_idx(f->index, dec_fmt_list[VPU_FMT_TYPE_CODEC]);
649 	if (!vpu_fmt)
650 		return -EINVAL;
651 
652 	f->pixelformat = vpu_fmt->v4l2_pix_fmt;
653 	f->flags = V4L2_FMT_FLAG_DYN_RESOLUTION | V4L2_FMT_FLAG_COMPRESSED;
654 
655 	return 0;
656 }
657 
658 static int wave5_vpu_dec_try_fmt_out(struct file *file, void *fh, struct v4l2_format *f)
659 {
660 	struct vpu_instance *inst = file_to_vpu_inst(file);
661 	const struct v4l2_frmsize_stepwise *frmsize;
662 	const struct vpu_format *vpu_fmt;
663 	int width, height;
664 
665 	dev_dbg(inst->dev->dev,
666 		"%s: fourcc: %u width: %u height: %u num_planes: %u colorspace: %u field: %u\n",
667 		__func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height,
668 		f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field);
669 
670 	vpu_fmt = wave5_find_vpu_fmt(f->fmt.pix_mp.pixelformat, dec_fmt_list[VPU_FMT_TYPE_CODEC]);
671 	if (!vpu_fmt) {
672 		width = inst->src_fmt.width;
673 		height = inst->src_fmt.height;
674 		f->fmt.pix_mp.pixelformat = inst->src_fmt.pixelformat;
675 		frmsize = &dec_hevc_frmsize;
676 	} else {
677 		width = f->fmt.pix_mp.width;
678 		height = f->fmt.pix_mp.height;
679 		f->fmt.pix_mp.pixelformat = vpu_fmt->v4l2_pix_fmt;
680 		frmsize = vpu_fmt->v4l2_frmsize;
681 	}
682 
683 	wave5_update_pix_fmt(&f->fmt.pix_mp, VPU_FMT_TYPE_CODEC,
684 			     width, height, frmsize);
685 
686 	return 0;
687 }
688 
689 static int wave5_vpu_dec_s_fmt_out(struct file *file, void *fh, struct v4l2_format *f)
690 {
691 	struct vpu_instance *inst = file_to_vpu_inst(file);
692 	const struct vpu_format *vpu_fmt;
693 	int i, ret;
694 
695 	dev_dbg(inst->dev->dev,
696 		"%s: fourcc: %u width: %u height: %u num_planes: %u field: %u\n",
697 		__func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height,
698 		f->fmt.pix_mp.num_planes, f->fmt.pix_mp.field);
699 
700 	ret = wave5_vpu_dec_try_fmt_out(file, fh, f);
701 	if (ret)
702 		return ret;
703 
704 	inst->std = wave5_to_vpu_std(f->fmt.pix_mp.pixelformat, inst->type);
705 	if (inst->std == STD_UNKNOWN) {
706 		dev_warn(inst->dev->dev, "unsupported pixelformat: %.4s\n",
707 			 (char *)&f->fmt.pix_mp.pixelformat);
708 		return -EINVAL;
709 	}
710 
711 	inst->src_fmt.width = f->fmt.pix_mp.width;
712 	inst->src_fmt.height = f->fmt.pix_mp.height;
713 	inst->src_fmt.pixelformat = f->fmt.pix_mp.pixelformat;
714 	inst->src_fmt.field = f->fmt.pix_mp.field;
715 	inst->src_fmt.flags = f->fmt.pix_mp.flags;
716 	inst->src_fmt.num_planes = f->fmt.pix_mp.num_planes;
717 	for (i = 0; i < inst->src_fmt.num_planes; i++) {
718 		inst->src_fmt.plane_fmt[i].bytesperline = f->fmt.pix_mp.plane_fmt[i].bytesperline;
719 		inst->src_fmt.plane_fmt[i].sizeimage = f->fmt.pix_mp.plane_fmt[i].sizeimage;
720 	}
721 
722 	inst->colorspace = f->fmt.pix_mp.colorspace;
723 	inst->ycbcr_enc = f->fmt.pix_mp.ycbcr_enc;
724 	inst->quantization = f->fmt.pix_mp.quantization;
725 	inst->xfer_func = f->fmt.pix_mp.xfer_func;
726 
727 	vpu_fmt = wave5_find_vpu_fmt(inst->dst_fmt.pixelformat, dec_fmt_list[VPU_FMT_TYPE_RAW]);
728 	if (!vpu_fmt)
729 		return -EINVAL;
730 
731 	wave5_update_pix_fmt(&inst->dst_fmt, VPU_FMT_TYPE_RAW,
732 			     f->fmt.pix_mp.width, f->fmt.pix_mp.height,
733 			     vpu_fmt->v4l2_frmsize);
734 
735 	return 0;
736 }
737 
738 static int wave5_vpu_dec_g_selection(struct file *file, void *fh, struct v4l2_selection *s)
739 {
740 	struct vpu_instance *inst = file_to_vpu_inst(file);
741 
742 	dev_dbg(inst->dev->dev, "%s: type: %u | target: %u\n", __func__, s->type, s->target);
743 
744 	if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
745 		return -EINVAL;
746 	switch (s->target) {
747 	case V4L2_SEL_TGT_COMPOSE_BOUNDS:
748 	case V4L2_SEL_TGT_COMPOSE_PADDED:
749 		s->r.left = 0;
750 		s->r.top = 0;
751 		s->r.width = inst->dst_fmt.width;
752 		s->r.height = inst->dst_fmt.height;
753 		break;
754 	case V4L2_SEL_TGT_COMPOSE:
755 	case V4L2_SEL_TGT_COMPOSE_DEFAULT:
756 		s->r.left = 0;
757 		s->r.top = 0;
758 		if (inst->state > VPU_INST_STATE_OPEN) {
759 			s->r = inst->conf_win;
760 		} else {
761 			s->r.width = inst->src_fmt.width;
762 			s->r.height = inst->src_fmt.height;
763 		}
764 		break;
765 	default:
766 		return -EINVAL;
767 	}
768 
769 	return 0;
770 }
771 
772 static int wave5_vpu_dec_s_selection(struct file *file, void *fh, struct v4l2_selection *s)
773 {
774 	struct vpu_instance *inst = file_to_vpu_inst(file);
775 
776 	if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
777 		return -EINVAL;
778 
779 	if (s->target != V4L2_SEL_TGT_COMPOSE)
780 		return -EINVAL;
781 
782 	dev_dbg(inst->dev->dev, "V4L2_SEL_TGT_COMPOSE w: %u h: %u\n",
783 		s->r.width, s->r.height);
784 
785 	s->r.left = 0;
786 	s->r.top = 0;
787 	s->r.width = inst->dst_fmt.width;
788 	s->r.height = inst->dst_fmt.height;
789 
790 	return 0;
791 }
792 
793 static int wave5_vpu_dec_stop(struct vpu_instance *inst)
794 {
795 	int ret = 0;
796 	unsigned long flags;
797 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
798 
799 	spin_lock_irqsave(&inst->state_spinlock, flags);
800 
801 	if (m2m_ctx->is_draining) {
802 		ret = -EBUSY;
803 		goto unlock_and_return;
804 	}
805 
806 	if (inst->state != VPU_INST_STATE_NONE) {
807 		/*
808 		 * Temporarily release the state_spinlock so that subsequent
809 		 * calls do not block on a mutex while inside this spinlock.
810 		 */
811 		spin_unlock_irqrestore(&inst->state_spinlock, flags);
812 		ret = wave5_vpu_dec_set_eos_on_firmware(inst);
813 		if (ret)
814 			return ret;
815 
816 		spin_lock_irqsave(&inst->state_spinlock, flags);
817 		/*
818 		 * TODO eliminate this check by using a separate check for
819 		 * draining triggered by a resolution change.
820 		 */
821 		if (m2m_ctx->is_draining) {
822 			ret = -EBUSY;
823 			goto unlock_and_return;
824 		}
825 	}
826 
827 	/*
828 	 * Used to remember the EOS state after the streamoff/on transition on
829 	 * the capture queue.
830 	 */
831 	inst->eos = true;
832 
833 	if (m2m_ctx->has_stopped)
834 		goto unlock_and_return;
835 
836 	m2m_ctx->last_src_buf = v4l2_m2m_last_src_buf(m2m_ctx);
837 	m2m_ctx->is_draining = true;
838 
839 	/*
840 	 * Deferred to device run in case it wasn't in the ring buffer
841 	 * yet. In other case, we have to send the EOS signal to the
842 	 * firmware so that any pending PIC_RUN ends without new
843 	 * bitstream buffer.
844 	 */
845 	if (m2m_ctx->last_src_buf)
846 		goto unlock_and_return;
847 
848 	if (inst->state == VPU_INST_STATE_NONE) {
849 		send_eos_event(inst);
850 		flag_last_buffer_done(inst);
851 	}
852 
853 unlock_and_return:
854 	spin_unlock_irqrestore(&inst->state_spinlock, flags);
855 	return ret;
856 }
857 
858 static int wave5_vpu_dec_start(struct vpu_instance *inst)
859 {
860 	int ret = 0;
861 	unsigned long flags;
862 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
863 	struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
864 
865 	spin_lock_irqsave(&inst->state_spinlock, flags);
866 
867 	if (m2m_ctx->is_draining) {
868 		ret = -EBUSY;
869 		goto unlock_and_return;
870 	}
871 
872 	if (m2m_ctx->has_stopped)
873 		m2m_ctx->has_stopped = false;
874 
875 	vb2_clear_last_buffer_dequeued(dst_vq);
876 	inst->eos = false;
877 
878 unlock_and_return:
879 	spin_unlock_irqrestore(&inst->state_spinlock, flags);
880 	return ret;
881 }
882 
883 static int wave5_vpu_dec_decoder_cmd(struct file *file, void *fh, struct v4l2_decoder_cmd *dc)
884 {
885 	struct vpu_instance *inst = file_to_vpu_inst(file);
886 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
887 	int ret;
888 
889 	dev_dbg(inst->dev->dev, "decoder command: %u\n", dc->cmd);
890 
891 	ret = v4l2_m2m_ioctl_try_decoder_cmd(file, fh, dc);
892 	if (ret)
893 		return ret;
894 
895 	switch (dc->cmd) {
896 	case V4L2_DEC_CMD_STOP:
897 		ret = wave5_vpu_dec_stop(inst);
898 		/* Just in case we don't have anything to decode anymore */
899 		v4l2_m2m_try_schedule(m2m_ctx);
900 		break;
901 	case V4L2_DEC_CMD_START:
902 		ret = wave5_vpu_dec_start(inst);
903 		break;
904 	default:
905 		ret = -EINVAL;
906 	}
907 
908 	return ret;
909 }
910 
911 static const struct v4l2_ioctl_ops wave5_vpu_dec_ioctl_ops = {
912 	.vidioc_querycap = wave5_vpu_dec_querycap,
913 	.vidioc_enum_framesizes = wave5_vpu_dec_enum_framesizes,
914 
915 	.vidioc_enum_fmt_vid_cap	= wave5_vpu_dec_enum_fmt_cap,
916 	.vidioc_s_fmt_vid_cap_mplane = wave5_vpu_dec_s_fmt_cap,
917 	.vidioc_g_fmt_vid_cap_mplane = wave5_vpu_dec_g_fmt_cap,
918 	.vidioc_try_fmt_vid_cap_mplane = wave5_vpu_dec_try_fmt_cap,
919 
920 	.vidioc_enum_fmt_vid_out	= wave5_vpu_dec_enum_fmt_out,
921 	.vidioc_s_fmt_vid_out_mplane = wave5_vpu_dec_s_fmt_out,
922 	.vidioc_g_fmt_vid_out_mplane = wave5_vpu_g_fmt_out,
923 	.vidioc_try_fmt_vid_out_mplane = wave5_vpu_dec_try_fmt_out,
924 
925 	.vidioc_g_selection = wave5_vpu_dec_g_selection,
926 	.vidioc_s_selection = wave5_vpu_dec_s_selection,
927 
928 	.vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs,
929 	/*
930 	 * Firmware does not support CREATE_BUFS for CAPTURE queue. Since
931 	 * there is no immediate use-case for supporting CREATE_BUFS on
932 	 * just the OUTPUT queue, disable CREATE_BUFS altogether.
933 	 */
934 	.vidioc_querybuf = v4l2_m2m_ioctl_querybuf,
935 	.vidioc_prepare_buf = v4l2_m2m_ioctl_prepare_buf,
936 	.vidioc_qbuf = v4l2_m2m_ioctl_qbuf,
937 	.vidioc_expbuf = v4l2_m2m_ioctl_expbuf,
938 	.vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf,
939 	.vidioc_streamon = v4l2_m2m_ioctl_streamon,
940 	.vidioc_streamoff = v4l2_m2m_ioctl_streamoff,
941 
942 	.vidioc_try_decoder_cmd = v4l2_m2m_ioctl_try_decoder_cmd,
943 	.vidioc_decoder_cmd = wave5_vpu_dec_decoder_cmd,
944 
945 	.vidioc_subscribe_event = wave5_vpu_subscribe_event,
946 	.vidioc_unsubscribe_event = v4l2_event_unsubscribe,
947 };
948 
949 static int wave5_vpu_dec_queue_setup(struct vb2_queue *q, unsigned int *num_buffers,
950 				     unsigned int *num_planes, unsigned int sizes[],
951 				     struct device *alloc_devs[])
952 {
953 	struct vpu_instance *inst = vb2_get_drv_priv(q);
954 	struct v4l2_pix_format_mplane inst_format =
955 		(q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) ? inst->src_fmt : inst->dst_fmt;
956 	unsigned int i;
957 
958 	dev_dbg(inst->dev->dev, "%s: num_buffers: %u | num_planes: %u | type: %u\n", __func__,
959 		*num_buffers, *num_planes, q->type);
960 
961 	*num_planes = inst_format.num_planes;
962 
963 	if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
964 		sizes[0] = inst_format.plane_fmt[0].sizeimage;
965 		dev_dbg(inst->dev->dev, "%s: size[0]: %u\n", __func__, sizes[0]);
966 	} else if (q->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
967 		if (*num_buffers < inst->fbc_buf_count)
968 			*num_buffers = inst->fbc_buf_count;
969 
970 		for (i = 0; i < *num_planes; i++) {
971 			sizes[i] = inst_format.plane_fmt[i].sizeimage;
972 			dev_dbg(inst->dev->dev, "%s: size[%u]: %u\n", __func__, i, sizes[i]);
973 		}
974 	}
975 
976 	return 0;
977 }
978 
979 static int wave5_prepare_fb(struct vpu_instance *inst)
980 {
981 	int linear_num;
982 	int non_linear_num;
983 	int fb_stride = 0, fb_height = 0;
984 	int luma_size, chroma_size;
985 	int ret, i;
986 	struct v4l2_m2m_buffer *buf, *n;
987 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
988 	u32 bitdepth = inst->codec_info->dec_info.initial_info.luma_bitdepth;
989 
990 	switch (bitdepth) {
991 	case 8:
992 		break;
993 	case 10:
994 		if (inst->std == W_HEVC_DEC &&
995 		    inst->dev->attr.support_hevc10bit_dec)
996 			break;
997 
998 		fallthrough;
999 	default:
1000 		dev_err(inst->dev->dev, "no support for %d bit depth\n", bitdepth);
1001 
1002 		return -EINVAL;
1003 	}
1004 
1005 	linear_num = v4l2_m2m_num_dst_bufs_ready(m2m_ctx);
1006 	non_linear_num = inst->fbc_buf_count;
1007 
1008 	for (i = 0; i < non_linear_num; i++) {
1009 		struct frame_buffer *frame = &inst->frame_buf[i];
1010 		struct vpu_buf *vframe = &inst->frame_vbuf[i];
1011 
1012 		fb_stride = ALIGN(inst->dst_fmt.width * bitdepth / 8, 32);
1013 		fb_height = ALIGN(inst->dst_fmt.height, 32);
1014 		luma_size = fb_stride * fb_height;
1015 
1016 		chroma_size = ALIGN(fb_stride / 2, 16) * fb_height;
1017 
1018 		if (vframe->size == (luma_size + chroma_size))
1019 			continue;
1020 
1021 		if (vframe->size)
1022 			wave5_vpu_dec_reset_framebuffer(inst, i);
1023 
1024 		vframe->size = luma_size + chroma_size;
1025 		ret = wave5_vdi_allocate_dma_memory(inst->dev, vframe);
1026 		if (ret) {
1027 			dev_dbg(inst->dev->dev,
1028 				"%s: Allocating FBC buf of size %zu, fail: %d\n",
1029 				__func__, vframe->size, ret);
1030 			return ret;
1031 		}
1032 
1033 		frame->buf_y = vframe->daddr;
1034 		frame->buf_cb = vframe->daddr + luma_size;
1035 		frame->buf_cr = (dma_addr_t)-1;
1036 		frame->size = vframe->size;
1037 		frame->width = inst->src_fmt.width;
1038 		frame->stride = fb_stride;
1039 		frame->map_type = COMPRESSED_FRAME_MAP;
1040 		frame->update_fb_info = true;
1041 	}
1042 	/* In case the count has reduced, clean up leftover framebuffer memory */
1043 	for (i = non_linear_num; i < MAX_REG_FRAME; i++) {
1044 		ret = wave5_vpu_dec_reset_framebuffer(inst, i);
1045 		if (ret)
1046 			break;
1047 	}
1048 
1049 	for (i = 0; i < linear_num; i++) {
1050 		struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1051 		struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
1052 		struct vb2_buffer *vb = vb2_get_buffer(dst_vq, i);
1053 		struct frame_buffer *frame = &inst->frame_buf[non_linear_num + i];
1054 		dma_addr_t buf_addr_y = 0, buf_addr_cb = 0, buf_addr_cr = 0;
1055 		u32 buf_size = 0;
1056 		u32 fb_stride = inst->dst_fmt.width;
1057 		u32 luma_size = fb_stride * inst->dst_fmt.height;
1058 		u32 chroma_size;
1059 
1060 		if (inst->output_format == FORMAT_422)
1061 			chroma_size = fb_stride * inst->dst_fmt.height / 2;
1062 		else
1063 			chroma_size = fb_stride * inst->dst_fmt.height / 4;
1064 
1065 		if (inst->dst_fmt.num_planes == 1) {
1066 			buf_size = vb2_plane_size(vb, 0);
1067 			buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0);
1068 			buf_addr_cb = buf_addr_y + luma_size;
1069 			buf_addr_cr = buf_addr_cb + chroma_size;
1070 		} else if (inst->dst_fmt.num_planes == 2) {
1071 			buf_size = vb2_plane_size(vb, 0) +
1072 				vb2_plane_size(vb, 1);
1073 			buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0);
1074 			buf_addr_cb = vb2_dma_contig_plane_dma_addr(vb, 1);
1075 			buf_addr_cr = buf_addr_cb + chroma_size;
1076 		} else if (inst->dst_fmt.num_planes == 3) {
1077 			buf_size = vb2_plane_size(vb, 0) +
1078 				vb2_plane_size(vb, 1) +
1079 				vb2_plane_size(vb, 2);
1080 			buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0);
1081 			buf_addr_cb = vb2_dma_contig_plane_dma_addr(vb, 1);
1082 			buf_addr_cr = vb2_dma_contig_plane_dma_addr(vb, 2);
1083 		}
1084 
1085 		frame->buf_y = buf_addr_y;
1086 		frame->buf_cb = buf_addr_cb;
1087 		frame->buf_cr = buf_addr_cr;
1088 		frame->size = buf_size;
1089 		frame->width = inst->src_fmt.width;
1090 		frame->stride = fb_stride;
1091 		frame->map_type = LINEAR_FRAME_MAP;
1092 		frame->update_fb_info = true;
1093 	}
1094 
1095 	ret = wave5_vpu_dec_register_frame_buffer_ex(inst, non_linear_num, linear_num,
1096 						     fb_stride, inst->dst_fmt.height);
1097 	if (ret) {
1098 		dev_dbg(inst->dev->dev, "%s: vpu_dec_register_frame_buffer_ex fail: %d",
1099 			__func__, ret);
1100 		return ret;
1101 	}
1102 
1103 	/*
1104 	 * Mark all frame buffers as out of display, to avoid using them before
1105 	 * the application have them queued.
1106 	 */
1107 	for (i = 0; i < v4l2_m2m_num_dst_bufs_ready(m2m_ctx); i++) {
1108 		ret = wave5_vpu_dec_set_disp_flag(inst, i);
1109 		if (ret) {
1110 			dev_dbg(inst->dev->dev,
1111 				"%s: Setting display flag of buf index: %u, fail: %d\n",
1112 				__func__, i, ret);
1113 		}
1114 	}
1115 
1116 	v4l2_m2m_for_each_dst_buf_safe(m2m_ctx, buf, n) {
1117 		struct vb2_v4l2_buffer *vbuf = &buf->vb;
1118 
1119 		ret = wave5_vpu_dec_clr_disp_flag(inst, vbuf->vb2_buf.index);
1120 		if (ret)
1121 			dev_dbg(inst->dev->dev,
1122 				"%s: Clearing display flag of buf index: %u, fail: %d\n",
1123 				__func__, i, ret);
1124 	}
1125 
1126 	return 0;
1127 }
1128 
1129 static int write_to_ringbuffer(struct vpu_instance *inst, void *buffer, size_t buffer_size,
1130 			       struct vpu_buf *ring_buffer, dma_addr_t wr_ptr)
1131 {
1132 	size_t size;
1133 	size_t offset = wr_ptr - ring_buffer->daddr;
1134 	int ret;
1135 
1136 	if (wr_ptr + buffer_size > ring_buffer->daddr + ring_buffer->size) {
1137 		size = ring_buffer->daddr + ring_buffer->size - wr_ptr;
1138 		ret = wave5_vdi_write_memory(inst->dev, ring_buffer, offset, (u8 *)buffer, size);
1139 		if (ret < 0)
1140 			return ret;
1141 
1142 		ret = wave5_vdi_write_memory(inst->dev, ring_buffer, 0, (u8 *)buffer + size,
1143 					     buffer_size - size);
1144 		if (ret < 0)
1145 			return ret;
1146 	} else {
1147 		ret = wave5_vdi_write_memory(inst->dev, ring_buffer, offset, (u8 *)buffer,
1148 					     buffer_size);
1149 		if (ret < 0)
1150 			return ret;
1151 	}
1152 
1153 	return 0;
1154 }
1155 
1156 static struct vpu_src_buffer *inst_src_buf_remove(struct vpu_instance *inst)
1157 {
1158 	struct vpu_src_buffer *b;
1159 	int ret;
1160 
1161 	ret = mutex_lock_interruptible(&inst->feed_lock);
1162 	if (ret)
1163 		return NULL;
1164 
1165 	if (list_empty(&inst->avail_src_bufs)) {
1166 		mutex_unlock(&inst->feed_lock);
1167 		return NULL;
1168 	}
1169 	b = list_first_entry(&inst->avail_src_bufs, struct vpu_src_buffer, list);
1170 	list_del_init(&b->list);
1171 	mutex_unlock(&inst->feed_lock);
1172 	return b;
1173 }
1174 
1175 static int fill_ringbuffer(struct vpu_instance *inst)
1176 {
1177 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1178 	struct vpu_src_buffer *vpu_buf;
1179 	int ret = 0;
1180 
1181 	if (m2m_ctx->last_src_buf)  {
1182 		struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(m2m_ctx->last_src_buf);
1183 
1184 		if (vpu_buf->consumed) {
1185 			dev_dbg(inst->dev->dev, "last src buffer already written\n");
1186 			return 0;
1187 		}
1188 	}
1189 
1190 	while ((vpu_buf = inst_src_buf_remove(inst)) != NULL) {
1191 		struct vb2_v4l2_buffer *vbuf = &vpu_buf->v4l2_m2m_buf.vb;
1192 		struct vpu_buf *ring_buffer = &inst->bitstream_vbuf;
1193 		size_t src_size = vb2_get_plane_payload(&vbuf->vb2_buf, 0);
1194 		void *src_buf = vb2_plane_vaddr(&vbuf->vb2_buf, 0);
1195 		dma_addr_t rd_ptr = 0;
1196 		dma_addr_t wr_ptr = 0;
1197 		size_t remain_size = 0;
1198 
1199 		if (vpu_buf->consumed) {
1200 			dev_dbg(inst->dev->dev, "already copied src buf (%u) to the ring buffer\n",
1201 				vbuf->vb2_buf.index);
1202 			continue;
1203 		}
1204 
1205 		if (!src_buf) {
1206 			dev_dbg(inst->dev->dev,
1207 				"%s: Acquiring kernel pointer to src buf (%u), fail\n",
1208 				__func__, vbuf->vb2_buf.index);
1209 			break;
1210 		}
1211 
1212 		ret = wave5_vpu_dec_get_bitstream_buffer(inst, &rd_ptr, &wr_ptr, &remain_size);
1213 		if (ret) {
1214 			/* Unable to acquire the mutex */
1215 			dev_err(inst->dev->dev, "Getting the bitstream buffer, fail: %d\n",
1216 				ret);
1217 			return ret;
1218 		}
1219 
1220 		dev_dbg(inst->dev->dev, "%s: rd_ptr %pad wr_ptr %pad", __func__, &rd_ptr, &wr_ptr);
1221 
1222 		if (remain_size < src_size) {
1223 			dev_dbg(inst->dev->dev,
1224 				"%s: remaining size: %zu < source size: %zu for src buf (%u)\n",
1225 				__func__, remain_size, src_size, vbuf->vb2_buf.index);
1226 			break;
1227 		}
1228 
1229 		ret = write_to_ringbuffer(inst, src_buf, src_size, ring_buffer, wr_ptr);
1230 		if (ret) {
1231 			dev_err(inst->dev->dev, "Write src buf (%u) to ring buffer, fail: %d\n",
1232 				vbuf->vb2_buf.index, ret);
1233 			return ret;
1234 		}
1235 
1236 		ret = wave5_vpu_dec_update_bitstream_buffer(inst, src_size);
1237 		if (ret) {
1238 			dev_dbg(inst->dev->dev,
1239 				"update_bitstream_buffer fail: %d for src buf (%u)\n",
1240 				ret, vbuf->vb2_buf.index);
1241 			break;
1242 		}
1243 
1244 		vpu_buf->consumed = true;
1245 
1246 		/* Don't write buffers passed the last one while draining. */
1247 		if (v4l2_m2m_is_last_draining_src_buf(m2m_ctx, vbuf)) {
1248 			dev_dbg(inst->dev->dev, "last src buffer written to the ring buffer\n");
1249 			break;
1250 		}
1251 
1252 		inst->queuing_num++;
1253 		break;
1254 	}
1255 
1256 	return ret;
1257 }
1258 
1259 static void wave5_vpu_dec_buf_queue_src(struct vb2_buffer *vb)
1260 {
1261 	struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue);
1262 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1263 	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
1264 	struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(vbuf);
1265 	int ret;
1266 
1267 	vpu_buf->consumed = false;
1268 	vbuf->sequence = inst->queued_src_buf_num++;
1269 	ret = mutex_lock_interruptible(&inst->feed_lock);
1270 	if (ret)
1271 		return;
1272 	INIT_LIST_HEAD(&vpu_buf->list);
1273 	list_add_tail(&vpu_buf->list, &inst->avail_src_bufs);
1274 	mutex_unlock(&inst->feed_lock);
1275 	v4l2_m2m_buf_queue(m2m_ctx, vbuf);
1276 }
1277 
1278 static void wave5_vpu_dec_buf_queue_dst(struct vb2_buffer *vb)
1279 {
1280 	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
1281 	struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue);
1282 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1283 
1284 	pm_runtime_resume_and_get(inst->dev->dev);
1285 	vbuf->sequence = inst->queued_dst_buf_num++;
1286 
1287 	if (inst->state == VPU_INST_STATE_PIC_RUN) {
1288 		struct vpu_dst_buffer *vpu_buf = wave5_to_vpu_dst_buf(vbuf);
1289 		int ret;
1290 
1291 		/*
1292 		 * The buffer is already registered just clear the display flag
1293 		 * to let the firmware know it can be used.
1294 		 */
1295 		vpu_buf->display = false;
1296 		ret = wave5_vpu_dec_clr_disp_flag(inst, vb->index);
1297 		if (ret) {
1298 			dev_dbg(inst->dev->dev,
1299 				"%s: Clearing the display flag of buffer index: %u, fail: %d\n",
1300 				__func__, vb->index, ret);
1301 		}
1302 	}
1303 
1304 	if (vb2_is_streaming(vb->vb2_queue) && v4l2_m2m_dst_buf_is_last(m2m_ctx)) {
1305 		unsigned int i;
1306 		unsigned long flags;
1307 
1308 		for (i = 0; i < vb->num_planes; i++)
1309 			vb2_set_plane_payload(vb, i, 0);
1310 
1311 		vbuf->field = V4L2_FIELD_NONE;
1312 
1313 		spin_lock_irqsave(&inst->state_spinlock, flags);
1314 		send_eos_event(inst);
1315 		spin_unlock_irqrestore(&inst->state_spinlock, flags);
1316 
1317 		v4l2_m2m_last_buffer_done(m2m_ctx, vbuf);
1318 	} else {
1319 		v4l2_m2m_buf_queue(m2m_ctx, vbuf);
1320 	}
1321 	pm_runtime_put_autosuspend(inst->dev->dev);
1322 }
1323 
1324 static void wave5_vpu_dec_buf_queue(struct vb2_buffer *vb)
1325 {
1326 	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
1327 	struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue);
1328 
1329 	dev_dbg(inst->dev->dev, "%s: type: %4u index: %4u size: ([0]=%4lu, [1]=%4lu, [2]=%4lu)\n",
1330 		__func__, vb->type, vb->index, vb2_plane_size(&vbuf->vb2_buf, 0),
1331 		vb2_plane_size(&vbuf->vb2_buf, 1), vb2_plane_size(&vbuf->vb2_buf, 2));
1332 
1333 	if (vb->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
1334 		wave5_vpu_dec_buf_queue_src(vb);
1335 		if (inst->empty_queue)
1336 			inst->empty_queue = false;
1337 	} else if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
1338 		wave5_vpu_dec_buf_queue_dst(vb);
1339 	}
1340 }
1341 
1342 static int wave5_vpu_dec_allocate_ring_buffer(struct vpu_instance *inst)
1343 {
1344 	int ret;
1345 	struct vpu_buf *ring_buffer = &inst->bitstream_vbuf;
1346 
1347 	ring_buffer->size = ALIGN(inst->src_fmt.plane_fmt[0].sizeimage, 1024) * 4;
1348 	ret = wave5_vdi_allocate_dma_memory(inst->dev, ring_buffer);
1349 	if (ret) {
1350 		dev_dbg(inst->dev->dev, "%s: allocate ring buffer of size %zu fail: %d\n",
1351 			__func__, ring_buffer->size, ret);
1352 		return ret;
1353 	}
1354 
1355 	inst->last_rd_ptr = ring_buffer->daddr;
1356 
1357 	return 0;
1358 }
1359 
1360 static int wave5_vpu_dec_start_streaming(struct vb2_queue *q, unsigned int count)
1361 {
1362 	struct vpu_instance *inst = vb2_get_drv_priv(q);
1363 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1364 	int ret = 0;
1365 
1366 	dev_dbg(inst->dev->dev, "%s: type: %u\n", __func__, q->type);
1367 	pm_runtime_resume_and_get(inst->dev->dev);
1368 
1369 	v4l2_m2m_update_start_streaming_state(m2m_ctx, q);
1370 
1371 	if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE && inst->state == VPU_INST_STATE_NONE) {
1372 		struct dec_open_param open_param;
1373 
1374 		memset(&open_param, 0, sizeof(struct dec_open_param));
1375 
1376 		ret = wave5_vpu_dec_allocate_ring_buffer(inst);
1377 		if (ret)
1378 			goto return_buffers;
1379 
1380 		open_param.bitstream_buffer = inst->bitstream_vbuf.daddr;
1381 		open_param.bitstream_buffer_size = inst->bitstream_vbuf.size;
1382 
1383 		ret = wave5_vpu_dec_open(inst, &open_param);
1384 		if (ret) {
1385 			dev_dbg(inst->dev->dev, "%s: decoder opening, fail: %d\n",
1386 				__func__, ret);
1387 			goto free_bitstream_vbuf;
1388 		}
1389 
1390 		ret = switch_state(inst, VPU_INST_STATE_OPEN);
1391 		if (ret)
1392 			goto free_bitstream_vbuf;
1393 	} else if (q->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
1394 		struct dec_initial_info *initial_info =
1395 			&inst->codec_info->dec_info.initial_info;
1396 
1397 		if (inst->state == VPU_INST_STATE_STOP)
1398 			ret = switch_state(inst, VPU_INST_STATE_INIT_SEQ);
1399 		if (ret)
1400 			goto return_buffers;
1401 
1402 		if (inst->state == VPU_INST_STATE_INIT_SEQ &&
1403 		    inst->dev->product_code == WAVE521C_CODE) {
1404 			if (initial_info->luma_bitdepth != 8) {
1405 				dev_info(inst->dev->dev, "%s: no support for %d bit depth",
1406 					 __func__, initial_info->luma_bitdepth);
1407 				ret = -EINVAL;
1408 				goto return_buffers;
1409 			}
1410 		}
1411 
1412 	}
1413 	pm_runtime_put_autosuspend(inst->dev->dev);
1414 	return ret;
1415 
1416 free_bitstream_vbuf:
1417 	wave5_vdi_free_dma_memory(inst->dev, &inst->bitstream_vbuf);
1418 return_buffers:
1419 	wave5_return_bufs(q, VB2_BUF_STATE_QUEUED);
1420 	pm_runtime_put_autosuspend(inst->dev->dev);
1421 	return ret;
1422 }
1423 
1424 static int streamoff_output(struct vb2_queue *q)
1425 {
1426 	struct vpu_instance *inst = vb2_get_drv_priv(q);
1427 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1428 	struct vb2_v4l2_buffer *buf;
1429 	int ret;
1430 	dma_addr_t new_rd_ptr;
1431 	struct dec_output_info dec_info;
1432 	unsigned int i;
1433 	struct vpu_src_buffer *vpu_buf;
1434 
1435 	inst->retry = false;
1436 	inst->queuing_num = 0;
1437 	while ((vpu_buf = inst_src_buf_remove(inst)) != NULL)
1438 		;
1439 
1440 	for (i = 0; i < v4l2_m2m_num_dst_bufs_ready(m2m_ctx); i++) {
1441 		ret = wave5_vpu_dec_set_disp_flag(inst, i);
1442 		if (ret)
1443 			dev_dbg(inst->dev->dev,
1444 				"%s: Setting display flag of buf index: %u, fail: %d\n",
1445 				__func__, i, ret);
1446 	}
1447 
1448 	while ((buf = v4l2_m2m_src_buf_remove(m2m_ctx))) {
1449 		dev_dbg(inst->dev->dev, "%s: (Multiplanar) buf type %4u | index %4u\n",
1450 			__func__, buf->vb2_buf.type, buf->vb2_buf.index);
1451 		v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR);
1452 	}
1453 
1454 	while (wave5_vpu_dec_get_output_info(inst, &dec_info) == 0) {
1455 		if (dec_info.index_frame_display >= 0)
1456 			wave5_vpu_dec_set_disp_flag(inst, dec_info.index_frame_display);
1457 	}
1458 
1459 	ret = wave5_vpu_flush_instance(inst);
1460 	if (ret)
1461 		return ret;
1462 
1463 	/* Reset the ring buffer information */
1464 	new_rd_ptr = wave5_vpu_dec_get_rd_ptr(inst);
1465 	inst->last_rd_ptr = new_rd_ptr;
1466 	inst->codec_info->dec_info.stream_rd_ptr = new_rd_ptr;
1467 	inst->codec_info->dec_info.stream_wr_ptr = new_rd_ptr;
1468 
1469 	if (v4l2_m2m_has_stopped(m2m_ctx)) {
1470 		unsigned long flags;
1471 
1472 		spin_lock_irqsave(&inst->state_spinlock, flags);
1473 		send_eos_event(inst);
1474 		spin_unlock_irqrestore(&inst->state_spinlock, flags);
1475 	}
1476 
1477 	/* streamoff on output cancels any draining operation */
1478 	inst->eos = false;
1479 
1480 	return 0;
1481 }
1482 
1483 static int streamoff_capture(struct vb2_queue *q)
1484 {
1485 	struct vpu_instance *inst = vb2_get_drv_priv(q);
1486 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1487 	struct vb2_v4l2_buffer *buf;
1488 	unsigned int i;
1489 	int ret = 0;
1490 
1491 	for (i = 0; i < v4l2_m2m_num_dst_bufs_ready(m2m_ctx); i++) {
1492 		ret = wave5_vpu_dec_set_disp_flag(inst, i);
1493 		if (ret)
1494 			dev_dbg(inst->dev->dev,
1495 				"%s: Setting display flag of buf index: %u, fail: %d\n",
1496 				__func__, i, ret);
1497 	}
1498 
1499 	while ((buf = v4l2_m2m_dst_buf_remove(m2m_ctx))) {
1500 		u32 plane;
1501 
1502 		dev_dbg(inst->dev->dev, "%s: buf type %4u | index %4u\n",
1503 			__func__, buf->vb2_buf.type, buf->vb2_buf.index);
1504 
1505 		for (plane = 0; plane < inst->dst_fmt.num_planes; plane++)
1506 			vb2_set_plane_payload(&buf->vb2_buf, plane, 0);
1507 
1508 		v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR);
1509 	}
1510 
1511 	if (inst->needs_reallocation) {
1512 		wave5_vpu_dec_give_command(inst, DEC_RESET_FRAMEBUF_INFO, NULL);
1513 		inst->needs_reallocation = false;
1514 	}
1515 
1516 	if (v4l2_m2m_has_stopped(m2m_ctx)) {
1517 		ret = switch_state(inst, VPU_INST_STATE_INIT_SEQ);
1518 		if (ret)
1519 			return ret;
1520 	}
1521 
1522 	return 0;
1523 }
1524 
1525 static void wave5_vpu_dec_stop_streaming(struct vb2_queue *q)
1526 {
1527 	struct vpu_instance *inst = vb2_get_drv_priv(q);
1528 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1529 
1530 	bool check_cmd = TRUE;
1531 
1532 	dev_dbg(inst->dev->dev, "%s: type: %u\n", __func__, q->type);
1533 	pm_runtime_resume_and_get(inst->dev->dev);
1534 	inst->empty_queue = true;
1535 	while (check_cmd) {
1536 		struct queue_status_info q_status;
1537 		struct dec_output_info dec_output_info;
1538 
1539 		wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status);
1540 		if ((inst->state == VPU_INST_STATE_STOP ||
1541 		     inst->state == VPU_INST_STATE_INIT_SEQ ||
1542 		     q_status.instance_queue_count == 0) &&
1543 			q_status.report_queue_count == 0)
1544 			break;
1545 
1546 		if (wave5_vpu_dec_get_output_info(inst, &dec_output_info))
1547 			dev_dbg(inst->dev->dev, "there is no output info\n");
1548 	}
1549 
1550 	v4l2_m2m_update_stop_streaming_state(m2m_ctx, q);
1551 
1552 	if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
1553 		streamoff_output(q);
1554 	else
1555 		streamoff_capture(q);
1556 
1557 	inst->empty_queue = false;
1558 	inst->sent_eos = false;
1559 	pm_runtime_put_autosuspend(inst->dev->dev);
1560 }
1561 
1562 static const struct vb2_ops wave5_vpu_dec_vb2_ops = {
1563 	.queue_setup = wave5_vpu_dec_queue_setup,
1564 	.buf_queue = wave5_vpu_dec_buf_queue,
1565 	.start_streaming = wave5_vpu_dec_start_streaming,
1566 	.stop_streaming = wave5_vpu_dec_stop_streaming,
1567 };
1568 
1569 static void wave5_set_default_format(struct v4l2_pix_format_mplane *src_fmt,
1570 				     struct v4l2_pix_format_mplane *dst_fmt)
1571 {
1572 	src_fmt->pixelformat = dec_fmt_list[VPU_FMT_TYPE_CODEC][0].v4l2_pix_fmt;
1573 	wave5_update_pix_fmt(src_fmt, VPU_FMT_TYPE_CODEC,
1574 			     W5_DEF_DEC_PIC_WIDTH, W5_DEF_DEC_PIC_HEIGHT,
1575 			     &dec_hevc_frmsize);
1576 
1577 	dst_fmt->pixelformat = dec_fmt_list[VPU_FMT_TYPE_RAW][0].v4l2_pix_fmt;
1578 	wave5_update_pix_fmt(dst_fmt, VPU_FMT_TYPE_RAW,
1579 			     W5_DEF_DEC_PIC_WIDTH, W5_DEF_DEC_PIC_HEIGHT,
1580 			     &dec_raw_frmsize);
1581 }
1582 
1583 static int wave5_vpu_dec_queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq)
1584 {
1585 	return wave5_vpu_queue_init(priv, src_vq, dst_vq, &wave5_vpu_dec_vb2_ops);
1586 }
1587 
1588 static const struct vpu_instance_ops wave5_vpu_dec_inst_ops = {
1589 	.finish_process = wave5_vpu_dec_finish_decode,
1590 };
1591 
1592 static int initialize_sequence(struct vpu_instance *inst)
1593 {
1594 	struct dec_initial_info initial_info;
1595 	int ret = 0;
1596 	unsigned long flags;
1597 
1598 	memset(&initial_info, 0, sizeof(struct dec_initial_info));
1599 
1600 	ret = wave5_vpu_dec_issue_seq_init(inst);
1601 	if (ret) {
1602 		dev_dbg(inst->dev->dev, "%s: wave5_vpu_dec_issue_seq_init, fail: %d\n",
1603 			__func__, ret);
1604 		return ret;
1605 	}
1606 
1607 	if (wave5_vpu_wait_interrupt(inst, VPU_DEC_TIMEOUT) < 0)
1608 		dev_dbg(inst->dev->dev, "%s: failed to call vpu_wait_interrupt()\n", __func__);
1609 
1610 	ret = wave5_vpu_dec_complete_seq_init(inst, &initial_info);
1611 	if (ret) {
1612 		dev_dbg(inst->dev->dev, "%s: vpu_dec_complete_seq_init, fail: %d, reason: %u\n",
1613 			__func__, ret, initial_info.seq_init_err_reason);
1614 		wave5_handle_src_buffer(inst, initial_info.rd_ptr);
1615 		return ret;
1616 	}
1617 
1618 	spin_lock_irqsave(&inst->state_spinlock, flags);
1619 	handle_dynamic_resolution_change(inst);
1620 	spin_unlock_irqrestore(&inst->state_spinlock, flags);
1621 
1622 	return 0;
1623 }
1624 
1625 static bool wave5_is_draining_or_eos(struct vpu_instance *inst)
1626 {
1627 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1628 
1629 	lockdep_assert_held(&inst->state_spinlock);
1630 	return m2m_ctx->is_draining || inst->eos;
1631 }
1632 
1633 static void wave5_vpu_dec_device_run(void *priv)
1634 {
1635 	struct vpu_instance *inst = priv;
1636 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1637 	struct queue_status_info q_status;
1638 	u32 fail_res = 0;
1639 	int ret = 0;
1640 
1641 	dev_dbg(inst->dev->dev, "%s: Fill the ring buffer with new bitstream data", __func__);
1642 	pm_runtime_resume_and_get(inst->dev->dev);
1643 	if (!inst->retry) {
1644 		ret = fill_ringbuffer(inst);
1645 		if (ret < 0) {
1646 			dev_warn(inst->dev->dev, "Filling ring buffer failed\n");
1647 			goto finish_job_and_return;
1648 		} else if (!inst->eos &&
1649 				inst->queuing_num == 0 &&
1650 				inst->state == VPU_INST_STATE_PIC_RUN) {
1651 			dev_dbg(inst->dev->dev, "%s: no bitstream for feeding, so skip ", __func__);
1652 			inst->empty_queue = true;
1653 			goto finish_job_and_return;
1654 		}
1655 	}
1656 
1657 	switch (inst->state) {
1658 	case VPU_INST_STATE_OPEN:
1659 		ret = initialize_sequence(inst);
1660 		if (ret) {
1661 			unsigned long flags;
1662 
1663 			spin_lock_irqsave(&inst->state_spinlock, flags);
1664 			if (wave5_is_draining_or_eos(inst) &&
1665 			    wave5_last_src_buffer_consumed(m2m_ctx)) {
1666 				struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
1667 
1668 				switch_state(inst, VPU_INST_STATE_STOP);
1669 
1670 				if (vb2_is_streaming(dst_vq))
1671 					send_eos_event(inst);
1672 				else
1673 					handle_dynamic_resolution_change(inst);
1674 
1675 				flag_last_buffer_done(inst);
1676 			}
1677 			spin_unlock_irqrestore(&inst->state_spinlock, flags);
1678 		} else {
1679 			set_instance_state(inst, VPU_INST_STATE_INIT_SEQ);
1680 		}
1681 
1682 		break;
1683 
1684 	case VPU_INST_STATE_INIT_SEQ:
1685 		/*
1686 		 * Do this early, preparing the fb can trigger an IRQ before
1687 		 * we had a chance to switch, which leads to an invalid state
1688 		 * change.
1689 		 */
1690 		set_instance_state(inst, VPU_INST_STATE_PIC_RUN);
1691 		/*
1692 		 * During DRC, the picture decoding remains pending, so just leave the job
1693 		 * active until this decode operation completes.
1694 		 */
1695 		wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status);
1696 
1697 		/*
1698 		 * The sequence must be analyzed first to calculate the proper
1699 		 * size of the auxiliary buffers.
1700 		 */
1701 		ret = wave5_prepare_fb(inst);
1702 		if (ret) {
1703 			dev_warn(inst->dev->dev, "Framebuffer preparation, fail: %d\n", ret);
1704 			set_instance_state(inst, VPU_INST_STATE_STOP);
1705 			break;
1706 		}
1707 
1708 		if (q_status.instance_queue_count)
1709 			goto finish_job_and_return;
1710 
1711 		fallthrough;
1712 	case VPU_INST_STATE_PIC_RUN:
1713 		ret = start_decode(inst, &fail_res);
1714 		if (ret) {
1715 			dev_err(inst->dev->dev,
1716 				"Frame decoding on m2m context (%p), fail: %d (result: %d)\n",
1717 				m2m_ctx, ret, fail_res);
1718 			goto finish_job_and_return;
1719 		}
1720 
1721 		if (fail_res == WAVE5_SYSERR_QUEUEING_FAIL) {
1722 			inst->retry = true;
1723 			inst->queuing_fail = true;
1724 		} else {
1725 			inst->retry = false;
1726 			if (!inst->eos)
1727 				inst->queuing_num--;
1728 		}
1729 		break;
1730 	default:
1731 		dev_dbg(inst->dev->dev, "Execution of a job in state %s illegal.\n",
1732 			state_to_str(inst->state));
1733 	}
1734 
1735 finish_job_and_return:
1736 	dev_dbg(inst->dev->dev, "%s: leave and finish job", __func__);
1737 	pm_runtime_put_autosuspend(inst->dev->dev);
1738 	/*
1739 	 * After receiving CMD_STOP, there is no input, but we have to run device_run
1740 	 * to send DEC_PIC command until display index == -1, so job_finish was always
1741 	 * called in the device_run to archive it, the logic was very wasteful
1742 	 * in power and CPU time.
1743 	 * If EOS is passed, device_run will not call job_finish no more, it is called
1744 	 * only if HW is idle status in order to reduce overhead.
1745 	 */
1746 	if (!inst->sent_eos)
1747 		v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
1748 }
1749 
1750 static void wave5_vpu_dec_job_abort(void *priv)
1751 {
1752 	struct vpu_instance *inst = priv;
1753 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1754 	int ret;
1755 
1756 	ret = set_instance_state(inst, VPU_INST_STATE_STOP);
1757 	if (ret)
1758 		return;
1759 
1760 	ret = wave5_vpu_dec_set_eos_on_firmware(inst);
1761 	if (ret)
1762 		dev_warn(inst->dev->dev,
1763 			 "Setting EOS for the bitstream, fail: %d\n", ret);
1764 
1765 	v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
1766 }
1767 
1768 static int wave5_vpu_dec_job_ready(void *priv)
1769 {
1770 	struct vpu_instance *inst = priv;
1771 	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1772 	unsigned long flags;
1773 	int ret = 0;
1774 
1775 	spin_lock_irqsave(&inst->state_spinlock, flags);
1776 
1777 	switch (inst->state) {
1778 	case VPU_INST_STATE_NONE:
1779 		dev_dbg(inst->dev->dev, "Decoder must be open to start queueing M2M jobs!\n");
1780 		break;
1781 	case VPU_INST_STATE_OPEN:
1782 		if (wave5_is_draining_or_eos(inst) || !v4l2_m2m_has_stopped(m2m_ctx) ||
1783 		    v4l2_m2m_num_src_bufs_ready(m2m_ctx) > 0) {
1784 			ret = 1;
1785 			break;
1786 		}
1787 
1788 		dev_dbg(inst->dev->dev,
1789 			"Decoder must be draining or >= 1 OUTPUT queue buffer must be queued!\n");
1790 		break;
1791 	case VPU_INST_STATE_INIT_SEQ:
1792 	case VPU_INST_STATE_PIC_RUN:
1793 		if (!m2m_ctx->cap_q_ctx.q.streaming) {
1794 			dev_dbg(inst->dev->dev, "CAPTURE queue must be streaming to queue jobs!\n");
1795 			break;
1796 		} else if (v4l2_m2m_num_dst_bufs_ready(m2m_ctx) < (inst->fbc_buf_count - 1)) {
1797 			dev_dbg(inst->dev->dev,
1798 				"No capture buffer ready to decode!\n");
1799 			break;
1800 		} else if (!wave5_is_draining_or_eos(inst) &&
1801 			   (!v4l2_m2m_num_src_bufs_ready(m2m_ctx) ||
1802 			    inst->empty_queue)) {
1803 			dev_dbg(inst->dev->dev,
1804 				"No bitstream data to decode!\n");
1805 			break;
1806 		} else if (inst->state == VPU_INST_STATE_PIC_RUN &&
1807 			   !wave5_is_draining_or_eos(inst) &&
1808 			   inst->queuing_fail) {
1809 			break;
1810 		}
1811 		ret = 1;
1812 		break;
1813 	case VPU_INST_STATE_STOP:
1814 		dev_dbg(inst->dev->dev, "Decoder is stopped, not running.\n");
1815 		break;
1816 	}
1817 
1818 	spin_unlock_irqrestore(&inst->state_spinlock, flags);
1819 
1820 	return ret;
1821 }
1822 
1823 static const struct v4l2_m2m_ops wave5_vpu_dec_m2m_ops = {
1824 	.device_run = wave5_vpu_dec_device_run,
1825 	.job_abort = wave5_vpu_dec_job_abort,
1826 	.job_ready = wave5_vpu_dec_job_ready,
1827 };
1828 
1829 static int wave5_vpu_open_dec(struct file *filp)
1830 {
1831 	struct video_device *vdev = video_devdata(filp);
1832 	struct vpu_device *dev = video_drvdata(filp);
1833 	struct vpu_instance *inst = NULL;
1834 	struct v4l2_m2m_ctx *m2m_ctx;
1835 	int ret = 0;
1836 
1837 	inst = kzalloc_obj(*inst);
1838 	if (!inst)
1839 		return -ENOMEM;
1840 
1841 	inst->dev = dev;
1842 	inst->type = VPU_INST_TYPE_DEC;
1843 	inst->ops = &wave5_vpu_dec_inst_ops;
1844 
1845 	spin_lock_init(&inst->state_spinlock);
1846 	mutex_init(&inst->feed_lock);
1847 	INIT_LIST_HEAD(&inst->avail_src_bufs);
1848 
1849 	inst->codec_info = kzalloc_obj(*inst->codec_info);
1850 	if (!inst->codec_info) {
1851 		kfree(inst);
1852 		return -ENOMEM;
1853 	}
1854 
1855 	v4l2_fh_init(&inst->v4l2_fh, vdev);
1856 	v4l2_fh_add(&inst->v4l2_fh, filp);
1857 
1858 	INIT_LIST_HEAD(&inst->list);
1859 
1860 	inst->v4l2_m2m_dev = inst->dev->v4l2_m2m_dec_dev;
1861 	inst->v4l2_fh.m2m_ctx =
1862 		v4l2_m2m_ctx_init(inst->v4l2_m2m_dev, inst, wave5_vpu_dec_queue_init);
1863 	if (IS_ERR(inst->v4l2_fh.m2m_ctx)) {
1864 		ret = PTR_ERR(inst->v4l2_fh.m2m_ctx);
1865 		goto cleanup_inst;
1866 	}
1867 	m2m_ctx = inst->v4l2_fh.m2m_ctx;
1868 
1869 	v4l2_m2m_set_src_buffered(m2m_ctx, true);
1870 	v4l2_m2m_set_dst_buffered(m2m_ctx, true);
1871 	/*
1872 	 * We use the M2M job queue to ensure synchronization of steps where
1873 	 * needed, as IOCTLs can occur at anytime and we need to run commands on
1874 	 * the firmware in a specified order.
1875 	 * In order to initialize the sequence on the firmware within an M2M
1876 	 * job, the M2M framework needs to be able to queue jobs before
1877 	 * the CAPTURE queue has been started, because we need the results of the
1878 	 * initialization to properly prepare the CAPTURE queue with the correct
1879 	 * amount of buffers.
1880 	 * By setting ignore_cap_streaming to true the m2m framework will call
1881 	 * job_ready as soon as the OUTPUT queue is streaming, instead of
1882 	 * waiting until both the CAPTURE and OUTPUT queues are streaming.
1883 	 */
1884 	m2m_ctx->ignore_cap_streaming = true;
1885 
1886 	v4l2_ctrl_handler_init(&inst->v4l2_ctrl_hdl, 10);
1887 	v4l2_ctrl_new_std(&inst->v4l2_ctrl_hdl, NULL,
1888 			  V4L2_CID_MIN_BUFFERS_FOR_CAPTURE, 1, 32, 1, 1);
1889 
1890 	if (inst->v4l2_ctrl_hdl.error) {
1891 		ret = -ENODEV;
1892 		goto cleanup_inst;
1893 	}
1894 
1895 	inst->v4l2_fh.ctrl_handler = &inst->v4l2_ctrl_hdl;
1896 	v4l2_ctrl_handler_setup(&inst->v4l2_ctrl_hdl);
1897 
1898 	wave5_set_default_format(&inst->src_fmt, &inst->dst_fmt);
1899 	inst->colorspace = V4L2_COLORSPACE_REC709;
1900 	inst->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
1901 	inst->quantization = V4L2_QUANTIZATION_DEFAULT;
1902 	inst->xfer_func = V4L2_XFER_FUNC_DEFAULT;
1903 
1904 	init_completion(&inst->irq_done);
1905 	ret = wave5_kfifo_alloc(inst);
1906 	if (ret) {
1907 		dev_err(inst->dev->dev, "failed to allocate fifo\n");
1908 		goto cleanup_inst;
1909 	}
1910 
1911 	inst->id = ida_alloc(&inst->dev->inst_ida, GFP_KERNEL);
1912 	if (inst->id < 0) {
1913 		dev_warn(inst->dev->dev, "Allocating instance ID, fail: %d\n", inst->id);
1914 		ret = inst->id;
1915 		goto cleanup_inst;
1916 	}
1917 
1918 	/*
1919 	 * For Wave515 SRAM memory was already allocated
1920 	 * at wave5_vpu_dec_register_device()
1921 	 */
1922 	if (inst->dev->product_code != WAVE515_CODE)
1923 		wave5_vdi_allocate_sram(inst->dev);
1924 
1925 	ret = mutex_lock_interruptible(&dev->dev_lock);
1926 	if (ret)
1927 		goto cleanup_inst;
1928 
1929 	list_add_tail(&inst->list, &dev->instances);
1930 
1931 	mutex_unlock(&dev->dev_lock);
1932 
1933 	return 0;
1934 
1935 cleanup_inst:
1936 	wave5_cleanup_instance(inst, filp);
1937 	return ret;
1938 }
1939 
1940 static int wave5_vpu_dec_release(struct file *filp)
1941 {
1942 	return wave5_vpu_release_device(filp, wave5_vpu_dec_close, "decoder");
1943 }
1944 
1945 static const struct v4l2_file_operations wave5_vpu_dec_fops = {
1946 	.owner = THIS_MODULE,
1947 	.open = wave5_vpu_open_dec,
1948 	.release = wave5_vpu_dec_release,
1949 	.unlocked_ioctl = video_ioctl2,
1950 	.poll = v4l2_m2m_fop_poll,
1951 	.mmap = v4l2_m2m_fop_mmap,
1952 };
1953 
1954 int wave5_vpu_dec_register_device(struct vpu_device *dev)
1955 {
1956 	struct video_device *vdev_dec;
1957 	int ret;
1958 
1959 	/*
1960 	 * Secondary AXI setup for Wave515 is done by INIT_VPU command,
1961 	 * i.e. wave5_vpu_init(), that's why we allocate SRAM memory early.
1962 	 */
1963 	if (dev->product_code == WAVE515_CODE)
1964 		wave5_vdi_allocate_sram(dev);
1965 
1966 	vdev_dec = devm_kzalloc(dev->v4l2_dev.dev, sizeof(*vdev_dec), GFP_KERNEL);
1967 	if (!vdev_dec)
1968 		return -ENOMEM;
1969 
1970 	dev->v4l2_m2m_dec_dev = v4l2_m2m_init(&wave5_vpu_dec_m2m_ops);
1971 	if (IS_ERR(dev->v4l2_m2m_dec_dev)) {
1972 		ret = PTR_ERR(dev->v4l2_m2m_dec_dev);
1973 		dev_err(dev->dev, "v4l2_m2m_init, fail: %d\n", ret);
1974 		return -EINVAL;
1975 	}
1976 
1977 	dev->video_dev_dec = vdev_dec;
1978 
1979 	strscpy(vdev_dec->name, VPU_DEC_DEV_NAME, sizeof(vdev_dec->name));
1980 	vdev_dec->fops = &wave5_vpu_dec_fops;
1981 	vdev_dec->ioctl_ops = &wave5_vpu_dec_ioctl_ops;
1982 	vdev_dec->release = video_device_release_empty;
1983 	vdev_dec->v4l2_dev = &dev->v4l2_dev;
1984 	vdev_dec->vfl_dir = VFL_DIR_M2M;
1985 	vdev_dec->device_caps = V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_STREAMING;
1986 	vdev_dec->lock = &dev->dev_lock;
1987 
1988 	ret = video_register_device(vdev_dec, VFL_TYPE_VIDEO, -1);
1989 	if (ret)
1990 		return ret;
1991 
1992 	video_set_drvdata(vdev_dec, dev);
1993 
1994 	return 0;
1995 }
1996 
1997 void wave5_vpu_dec_unregister_device(struct vpu_device *dev)
1998 {
1999 	/*
2000 	 * Here is a freeing pair for Wave515 SRAM memory allocation
2001 	 * happened at wave5_vpu_dec_register_device().
2002 	 */
2003 	if (dev->product_code == WAVE515_CODE)
2004 		wave5_vdi_free_sram(dev);
2005 
2006 	video_unregister_device(dev->video_dev_dec);
2007 	if (dev->v4l2_m2m_dec_dev)
2008 		v4l2_m2m_release(dev->v4l2_m2m_dec_dev);
2009 }
2010