xref: /linux/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c (revision 3fd6c59042dbba50391e30862beac979491145fe)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2023, Collabora
4  *
5  * Author: Benjamin Gaignard <benjamin.gaignard@collabora.com>
6  */
7 
8 #include <media/v4l2-mem2mem.h>
9 #include "hantro.h"
10 #include "hantro_v4l2.h"
11 #include "rockchip_vpu981_regs.h"
12 
13 #define AV1_DEC_MODE		17
14 #define GM_GLOBAL_MODELS_PER_FRAME	7
15 #define GLOBAL_MODEL_TOTAL_SIZE	(6 * 4 + 4 * 2)
16 #define GLOBAL_MODEL_SIZE	ALIGN(GM_GLOBAL_MODELS_PER_FRAME * GLOBAL_MODEL_TOTAL_SIZE, 2048)
17 #define AV1_MAX_TILES		128
18 #define AV1_TILE_INFO_SIZE	(AV1_MAX_TILES * 16)
19 #define AV1DEC_MAX_PIC_BUFFERS	24
20 #define AV1_REF_SCALE_SHIFT	14
21 #define AV1_INVALID_IDX		-1
22 #define MAX_FRAME_DISTANCE	31
23 #define AV1_PRIMARY_REF_NONE	7
24 #define AV1_TILE_SIZE		ALIGN(32 * 128, 4096)
25 /*
26  * These 3 values aren't defined enum v4l2_av1_segment_feature because
27  * they are not part of the specification
28  */
29 #define V4L2_AV1_SEG_LVL_ALT_LF_Y_H	2
30 #define V4L2_AV1_SEG_LVL_ALT_LF_U	3
31 #define V4L2_AV1_SEG_LVL_ALT_LF_V	4
32 
33 #define SUPERRES_SCALE_BITS 3
34 #define SCALE_NUMERATOR 8
35 #define SUPERRES_SCALE_DENOMINATOR_MIN (SCALE_NUMERATOR + 1)
36 
37 #define RS_SUBPEL_BITS 6
38 #define RS_SUBPEL_MASK ((1 << RS_SUBPEL_BITS) - 1)
39 #define RS_SCALE_SUBPEL_BITS 14
40 #define RS_SCALE_SUBPEL_MASK ((1 << RS_SCALE_SUBPEL_BITS) - 1)
41 #define RS_SCALE_EXTRA_BITS (RS_SCALE_SUBPEL_BITS - RS_SUBPEL_BITS)
42 #define RS_SCALE_EXTRA_OFF (1 << (RS_SCALE_EXTRA_BITS - 1))
43 
44 #define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
45 
46 #define LST_BUF_IDX (V4L2_AV1_REF_LAST_FRAME - V4L2_AV1_REF_LAST_FRAME)
47 #define LST2_BUF_IDX (V4L2_AV1_REF_LAST2_FRAME - V4L2_AV1_REF_LAST_FRAME)
48 #define LST3_BUF_IDX (V4L2_AV1_REF_LAST3_FRAME - V4L2_AV1_REF_LAST_FRAME)
49 #define GLD_BUF_IDX (V4L2_AV1_REF_GOLDEN_FRAME - V4L2_AV1_REF_LAST_FRAME)
50 #define BWD_BUF_IDX (V4L2_AV1_REF_BWDREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
51 #define ALT2_BUF_IDX (V4L2_AV1_REF_ALTREF2_FRAME - V4L2_AV1_REF_LAST_FRAME)
52 #define ALT_BUF_IDX (V4L2_AV1_REF_ALTREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
53 
54 #define DIV_LUT_PREC_BITS 14
55 #define DIV_LUT_BITS 8
56 #define DIV_LUT_NUM BIT(DIV_LUT_BITS)
57 #define WARP_PARAM_REDUCE_BITS 6
58 #define WARPEDMODEL_PREC_BITS 16
59 
60 #define AV1_DIV_ROUND_UP_POW2(value, n)			\
61 ({							\
62 	typeof(n) _n  = n;				\
63 	typeof(value) _value = value;			\
64 	(_value + (BIT(_n) >> 1)) >> _n;		\
65 })
66 
67 #define AV1_DIV_ROUND_UP_POW2_SIGNED(value, n)				\
68 ({									\
69 	typeof(n) _n_  = n;						\
70 	typeof(value) _value_ = value;					\
71 	(((_value_) < 0) ? -AV1_DIV_ROUND_UP_POW2(-(_value_), (_n_))	\
72 		: AV1_DIV_ROUND_UP_POW2((_value_), (_n_)));		\
73 })
74 
75 struct rockchip_av1_film_grain {
76 	u8 scaling_lut_y[256];
77 	u8 scaling_lut_cb[256];
78 	u8 scaling_lut_cr[256];
79 	s16 cropped_luma_grain_block[4096];
80 	s16 cropped_chroma_grain_block[1024 * 2];
81 };
82 
83 static const short div_lut[DIV_LUT_NUM + 1] = {
84 	16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
85 	15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
86 	15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
87 	14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
88 	13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
89 	13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
90 	13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
91 	12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
92 	12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
93 	11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
94 	11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
95 	11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
96 	10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
97 	10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
98 	10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
99 	9963,  9939,  9916,  9892,  9869,  9846,  9823,  9800,  9777,  9754,  9732,
100 	9709,  9687,  9664,  9642,  9620,  9598,  9576,  9554,  9533,  9511,  9489,
101 	9468,  9447,  9425,  9404,  9383,  9362,  9341,  9321,  9300,  9279,  9259,
102 	9239,  9218,  9198,  9178,  9158,  9138,  9118,  9098,  9079,  9059,  9039,
103 	9020,  9001,  8981,  8962,  8943,  8924,  8905,  8886,  8867,  8849,  8830,
104 	8812,  8793,  8775,  8756,  8738,  8720,  8702,  8684,  8666,  8648,  8630,
105 	8613,  8595,  8577,  8560,  8542,  8525,  8508,  8490,  8473,  8456,  8439,
106 	8422,  8405,  8389,  8372,  8355,  8339,  8322,  8306,  8289,  8273,  8257,
107 	8240,  8224,  8208,  8192,
108 };
109 
rockchip_vpu981_get_frame_index(struct hantro_ctx * ctx,int ref)110 static int rockchip_vpu981_get_frame_index(struct hantro_ctx *ctx, int ref)
111 {
112 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
113 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
114 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
115 	u64 timestamp;
116 	int i, idx = frame->ref_frame_idx[ref];
117 
118 	if (idx >= V4L2_AV1_TOTAL_REFS_PER_FRAME || idx < 0)
119 		return AV1_INVALID_IDX;
120 
121 	timestamp = frame->reference_frame_ts[idx];
122 	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
123 		if (!av1_dec->frame_refs[i].used)
124 			continue;
125 		if (av1_dec->frame_refs[i].timestamp == timestamp)
126 			return i;
127 	}
128 
129 	return AV1_INVALID_IDX;
130 }
131 
rockchip_vpu981_get_order_hint(struct hantro_ctx * ctx,int ref)132 static int rockchip_vpu981_get_order_hint(struct hantro_ctx *ctx, int ref)
133 {
134 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
135 	int idx = rockchip_vpu981_get_frame_index(ctx, ref);
136 
137 	if (idx != AV1_INVALID_IDX)
138 		return av1_dec->frame_refs[idx].order_hint;
139 
140 	return 0;
141 }
142 
rockchip_vpu981_av1_dec_frame_ref(struct hantro_ctx * ctx,u64 timestamp)143 static int rockchip_vpu981_av1_dec_frame_ref(struct hantro_ctx *ctx,
144 					     u64 timestamp)
145 {
146 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
147 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
148 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
149 	int i;
150 
151 	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
152 		int j;
153 
154 		if (av1_dec->frame_refs[i].used)
155 			continue;
156 
157 		av1_dec->frame_refs[i].width = frame->frame_width_minus_1 + 1;
158 		av1_dec->frame_refs[i].height = frame->frame_height_minus_1 + 1;
159 		av1_dec->frame_refs[i].mi_cols = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
160 		av1_dec->frame_refs[i].mi_rows = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
161 		av1_dec->frame_refs[i].timestamp = timestamp;
162 		av1_dec->frame_refs[i].frame_type = frame->frame_type;
163 		av1_dec->frame_refs[i].order_hint = frame->order_hint;
164 		av1_dec->frame_refs[i].vb2_ref = hantro_get_dst_buf(ctx);
165 
166 		for (j = 0; j < V4L2_AV1_TOTAL_REFS_PER_FRAME; j++)
167 			av1_dec->frame_refs[i].order_hints[j] = frame->order_hints[j];
168 		av1_dec->frame_refs[i].used = true;
169 		av1_dec->current_frame_index = i;
170 
171 		return i;
172 	}
173 
174 	return AV1_INVALID_IDX;
175 }
176 
rockchip_vpu981_av1_dec_frame_unref(struct hantro_ctx * ctx,int idx)177 static void rockchip_vpu981_av1_dec_frame_unref(struct hantro_ctx *ctx, int idx)
178 {
179 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
180 
181 	if (idx >= 0)
182 		av1_dec->frame_refs[idx].used = false;
183 }
184 
rockchip_vpu981_av1_dec_clean_refs(struct hantro_ctx * ctx)185 static void rockchip_vpu981_av1_dec_clean_refs(struct hantro_ctx *ctx)
186 {
187 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
188 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
189 
190 	int ref, idx;
191 
192 	for (idx = 0; idx < AV1_MAX_FRAME_BUF_COUNT; idx++) {
193 		u64 timestamp = av1_dec->frame_refs[idx].timestamp;
194 		bool used = false;
195 
196 		if (!av1_dec->frame_refs[idx].used)
197 			continue;
198 
199 		for (ref = 0; ref < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref++) {
200 			if (ctrls->frame->reference_frame_ts[ref] == timestamp)
201 				used = true;
202 		}
203 
204 		if (!used)
205 			rockchip_vpu981_av1_dec_frame_unref(ctx, idx);
206 	}
207 }
208 
rockchip_vpu981_av1_dec_luma_size(struct hantro_ctx * ctx)209 static size_t rockchip_vpu981_av1_dec_luma_size(struct hantro_ctx *ctx)
210 {
211 	return ctx->dst_fmt.width * ctx->dst_fmt.height * ctx->bit_depth / 8;
212 }
213 
rockchip_vpu981_av1_dec_chroma_size(struct hantro_ctx * ctx)214 static size_t rockchip_vpu981_av1_dec_chroma_size(struct hantro_ctx *ctx)
215 {
216 	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
217 
218 	return ALIGN((cr_offset * 3) / 2, 64);
219 }
220 
rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx * ctx)221 static void rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx *ctx)
222 {
223 	struct hantro_dev *vpu = ctx->dev;
224 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
225 
226 	if (av1_dec->db_data_col.cpu)
227 		dma_free_coherent(vpu->dev, av1_dec->db_data_col.size,
228 				  av1_dec->db_data_col.cpu,
229 				  av1_dec->db_data_col.dma);
230 	av1_dec->db_data_col.cpu = NULL;
231 
232 	if (av1_dec->db_ctrl_col.cpu)
233 		dma_free_coherent(vpu->dev, av1_dec->db_ctrl_col.size,
234 				  av1_dec->db_ctrl_col.cpu,
235 				  av1_dec->db_ctrl_col.dma);
236 	av1_dec->db_ctrl_col.cpu = NULL;
237 
238 	if (av1_dec->cdef_col.cpu)
239 		dma_free_coherent(vpu->dev, av1_dec->cdef_col.size,
240 				  av1_dec->cdef_col.cpu, av1_dec->cdef_col.dma);
241 	av1_dec->cdef_col.cpu = NULL;
242 
243 	if (av1_dec->sr_col.cpu)
244 		dma_free_coherent(vpu->dev, av1_dec->sr_col.size,
245 				  av1_dec->sr_col.cpu, av1_dec->sr_col.dma);
246 	av1_dec->sr_col.cpu = NULL;
247 
248 	if (av1_dec->lr_col.cpu)
249 		dma_free_coherent(vpu->dev, av1_dec->lr_col.size,
250 				  av1_dec->lr_col.cpu, av1_dec->lr_col.dma);
251 	av1_dec->lr_col.cpu = NULL;
252 }
253 
rockchip_vpu981_av1_dec_tiles_reallocate(struct hantro_ctx * ctx)254 static int rockchip_vpu981_av1_dec_tiles_reallocate(struct hantro_ctx *ctx)
255 {
256 	struct hantro_dev *vpu = ctx->dev;
257 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
258 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
259 	const struct v4l2_av1_tile_info *tile_info = &ctrls->frame->tile_info;
260 	unsigned int num_tile_cols = tile_info->tile_cols;
261 	unsigned int height = ALIGN(ctrls->frame->frame_height_minus_1 + 1, 64);
262 	unsigned int height_in_sb = height / 64;
263 	unsigned int stripe_num = ((height + 8) + 63) / 64;
264 	size_t size;
265 
266 	if (av1_dec->db_data_col.size >=
267 	    ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols)
268 		return 0;
269 
270 	rockchip_vpu981_av1_dec_tiles_free(ctx);
271 
272 	size = ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols;
273 	av1_dec->db_data_col.cpu = dma_alloc_coherent(vpu->dev, size,
274 						      &av1_dec->db_data_col.dma,
275 						      GFP_KERNEL);
276 	if (!av1_dec->db_data_col.cpu)
277 		goto buffer_allocation_error;
278 	av1_dec->db_data_col.size = size;
279 
280 	size = ALIGN(height * 2 * 16 / 4, 128) * num_tile_cols;
281 	av1_dec->db_ctrl_col.cpu = dma_alloc_coherent(vpu->dev, size,
282 						      &av1_dec->db_ctrl_col.dma,
283 						      GFP_KERNEL);
284 	if (!av1_dec->db_ctrl_col.cpu)
285 		goto buffer_allocation_error;
286 	av1_dec->db_ctrl_col.size = size;
287 
288 	size = ALIGN(height_in_sb * 44 * ctx->bit_depth * 16 / 8, 128) * num_tile_cols;
289 	av1_dec->cdef_col.cpu = dma_alloc_coherent(vpu->dev, size,
290 						   &av1_dec->cdef_col.dma,
291 						   GFP_KERNEL);
292 	if (!av1_dec->cdef_col.cpu)
293 		goto buffer_allocation_error;
294 	av1_dec->cdef_col.size = size;
295 
296 	size = ALIGN(height_in_sb * (3040 + 1280), 128) * num_tile_cols;
297 	av1_dec->sr_col.cpu = dma_alloc_coherent(vpu->dev, size,
298 						 &av1_dec->sr_col.dma,
299 						 GFP_KERNEL);
300 	if (!av1_dec->sr_col.cpu)
301 		goto buffer_allocation_error;
302 	av1_dec->sr_col.size = size;
303 
304 	size = ALIGN(stripe_num * 1536 * ctx->bit_depth / 8, 128) * num_tile_cols;
305 	av1_dec->lr_col.cpu = dma_alloc_coherent(vpu->dev, size,
306 						 &av1_dec->lr_col.dma,
307 						 GFP_KERNEL);
308 	if (!av1_dec->lr_col.cpu)
309 		goto buffer_allocation_error;
310 	av1_dec->lr_col.size = size;
311 
312 	av1_dec->num_tile_cols_allocated = num_tile_cols;
313 	return 0;
314 
315 buffer_allocation_error:
316 	rockchip_vpu981_av1_dec_tiles_free(ctx);
317 	return -ENOMEM;
318 }
319 
rockchip_vpu981_av1_dec_exit(struct hantro_ctx * ctx)320 void rockchip_vpu981_av1_dec_exit(struct hantro_ctx *ctx)
321 {
322 	struct hantro_dev *vpu = ctx->dev;
323 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
324 
325 	if (av1_dec->global_model.cpu)
326 		dma_free_coherent(vpu->dev, av1_dec->global_model.size,
327 				  av1_dec->global_model.cpu,
328 				  av1_dec->global_model.dma);
329 	av1_dec->global_model.cpu = NULL;
330 
331 	if (av1_dec->tile_info.cpu)
332 		dma_free_coherent(vpu->dev, av1_dec->tile_info.size,
333 				  av1_dec->tile_info.cpu,
334 				  av1_dec->tile_info.dma);
335 	av1_dec->tile_info.cpu = NULL;
336 
337 	if (av1_dec->film_grain.cpu)
338 		dma_free_coherent(vpu->dev, av1_dec->film_grain.size,
339 				  av1_dec->film_grain.cpu,
340 				  av1_dec->film_grain.dma);
341 	av1_dec->film_grain.cpu = NULL;
342 
343 	if (av1_dec->prob_tbl.cpu)
344 		dma_free_coherent(vpu->dev, av1_dec->prob_tbl.size,
345 				  av1_dec->prob_tbl.cpu, av1_dec->prob_tbl.dma);
346 	av1_dec->prob_tbl.cpu = NULL;
347 
348 	if (av1_dec->prob_tbl_out.cpu)
349 		dma_free_coherent(vpu->dev, av1_dec->prob_tbl_out.size,
350 				  av1_dec->prob_tbl_out.cpu,
351 				  av1_dec->prob_tbl_out.dma);
352 	av1_dec->prob_tbl_out.cpu = NULL;
353 
354 	if (av1_dec->tile_buf.cpu)
355 		dma_free_coherent(vpu->dev, av1_dec->tile_buf.size,
356 				  av1_dec->tile_buf.cpu, av1_dec->tile_buf.dma);
357 	av1_dec->tile_buf.cpu = NULL;
358 
359 	rockchip_vpu981_av1_dec_tiles_free(ctx);
360 }
361 
rockchip_vpu981_av1_dec_init(struct hantro_ctx * ctx)362 int rockchip_vpu981_av1_dec_init(struct hantro_ctx *ctx)
363 {
364 	struct hantro_dev *vpu = ctx->dev;
365 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
366 
367 	memset(av1_dec, 0, sizeof(*av1_dec));
368 
369 	av1_dec->global_model.cpu = dma_alloc_coherent(vpu->dev, GLOBAL_MODEL_SIZE,
370 						       &av1_dec->global_model.dma,
371 						       GFP_KERNEL);
372 	if (!av1_dec->global_model.cpu)
373 		return -ENOMEM;
374 	av1_dec->global_model.size = GLOBAL_MODEL_SIZE;
375 
376 	av1_dec->tile_info.cpu = dma_alloc_coherent(vpu->dev, AV1_MAX_TILES,
377 						    &av1_dec->tile_info.dma,
378 						    GFP_KERNEL);
379 	if (!av1_dec->tile_info.cpu)
380 		return -ENOMEM;
381 	av1_dec->tile_info.size = AV1_MAX_TILES;
382 
383 	av1_dec->film_grain.cpu = dma_alloc_coherent(vpu->dev,
384 						     ALIGN(sizeof(struct rockchip_av1_film_grain), 2048),
385 						     &av1_dec->film_grain.dma,
386 						     GFP_KERNEL);
387 	if (!av1_dec->film_grain.cpu)
388 		return -ENOMEM;
389 	av1_dec->film_grain.size = ALIGN(sizeof(struct rockchip_av1_film_grain), 2048);
390 
391 	av1_dec->prob_tbl.cpu = dma_alloc_coherent(vpu->dev,
392 						   ALIGN(sizeof(struct av1cdfs), 2048),
393 						   &av1_dec->prob_tbl.dma,
394 						   GFP_KERNEL);
395 	if (!av1_dec->prob_tbl.cpu)
396 		return -ENOMEM;
397 	av1_dec->prob_tbl.size = ALIGN(sizeof(struct av1cdfs), 2048);
398 
399 	av1_dec->prob_tbl_out.cpu = dma_alloc_coherent(vpu->dev,
400 						       ALIGN(sizeof(struct av1cdfs), 2048),
401 						       &av1_dec->prob_tbl_out.dma,
402 						       GFP_KERNEL);
403 	if (!av1_dec->prob_tbl_out.cpu)
404 		return -ENOMEM;
405 	av1_dec->prob_tbl_out.size = ALIGN(sizeof(struct av1cdfs), 2048);
406 	av1_dec->cdfs = &av1_dec->default_cdfs;
407 	av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
408 
409 	rockchip_av1_set_default_cdfs(av1_dec->cdfs, av1_dec->cdfs_ndvc);
410 
411 	av1_dec->tile_buf.cpu = dma_alloc_coherent(vpu->dev,
412 						   AV1_TILE_SIZE,
413 						   &av1_dec->tile_buf.dma,
414 						   GFP_KERNEL);
415 	if (!av1_dec->tile_buf.cpu)
416 		return -ENOMEM;
417 	av1_dec->tile_buf.size = AV1_TILE_SIZE;
418 
419 	return 0;
420 }
421 
rockchip_vpu981_av1_dec_prepare_run(struct hantro_ctx * ctx)422 static int rockchip_vpu981_av1_dec_prepare_run(struct hantro_ctx *ctx)
423 {
424 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
425 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
426 
427 	ctrls->sequence = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_SEQUENCE);
428 	if (WARN_ON(!ctrls->sequence))
429 		return -EINVAL;
430 
431 	ctrls->tile_group_entry =
432 	    hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY);
433 	if (WARN_ON(!ctrls->tile_group_entry))
434 		return -EINVAL;
435 
436 	ctrls->frame = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FRAME);
437 	if (WARN_ON(!ctrls->frame))
438 		return -EINVAL;
439 
440 	ctrls->film_grain =
441 	    hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FILM_GRAIN);
442 
443 	return rockchip_vpu981_av1_dec_tiles_reallocate(ctx);
444 }
445 
rockchip_vpu981_av1_dec_get_msb(u32 n)446 static inline int rockchip_vpu981_av1_dec_get_msb(u32 n)
447 {
448 	if (n == 0)
449 		return 0;
450 	return 31 ^ __builtin_clz(n);
451 }
452 
rockchip_vpu981_av1_dec_resolve_divisor_32(u32 d,short * shift)453 static short rockchip_vpu981_av1_dec_resolve_divisor_32(u32 d, short *shift)
454 {
455 	int f;
456 	u64 e;
457 
458 	*shift = rockchip_vpu981_av1_dec_get_msb(d);
459 	/* e is obtained from D after resetting the most significant 1 bit. */
460 	e = d - ((u32)1 << *shift);
461 	/* Get the most significant DIV_LUT_BITS (8) bits of e into f */
462 	if (*shift > DIV_LUT_BITS)
463 		f = AV1_DIV_ROUND_UP_POW2(e, *shift - DIV_LUT_BITS);
464 	else
465 		f = e << (DIV_LUT_BITS - *shift);
466 	if (f > DIV_LUT_NUM)
467 		return -1;
468 	*shift += DIV_LUT_PREC_BITS;
469 	/* Use f as lookup into the precomputed table of multipliers */
470 	return div_lut[f];
471 }
472 
473 static void
rockchip_vpu981_av1_dec_get_shear_params(const u32 * params,s64 * alpha,s64 * beta,s64 * gamma,s64 * delta)474 rockchip_vpu981_av1_dec_get_shear_params(const u32 *params, s64 *alpha,
475 					 s64 *beta, s64 *gamma, s64 *delta)
476 {
477 	const int *mat = params;
478 	short shift;
479 	short y;
480 	long long gv, dv;
481 
482 	if (mat[2] <= 0)
483 		return;
484 
485 	*alpha = clamp_val(mat[2] - (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX);
486 	*beta = clamp_val(mat[3], S16_MIN, S16_MAX);
487 
488 	y = rockchip_vpu981_av1_dec_resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
489 
490 	gv = ((long long)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
491 
492 	*gamma = clamp_val((int)AV1_DIV_ROUND_UP_POW2_SIGNED(gv, shift), S16_MIN, S16_MAX);
493 
494 	dv = ((long long)mat[3] * mat[4]) * y;
495 	*delta = clamp_val(mat[5] -
496 		(int)AV1_DIV_ROUND_UP_POW2_SIGNED(dv, shift) - (1 << WARPEDMODEL_PREC_BITS),
497 		S16_MIN, S16_MAX);
498 
499 	*alpha = AV1_DIV_ROUND_UP_POW2_SIGNED(*alpha, WARP_PARAM_REDUCE_BITS)
500 		 * (1 << WARP_PARAM_REDUCE_BITS);
501 	*beta = AV1_DIV_ROUND_UP_POW2_SIGNED(*beta, WARP_PARAM_REDUCE_BITS)
502 		* (1 << WARP_PARAM_REDUCE_BITS);
503 	*gamma = AV1_DIV_ROUND_UP_POW2_SIGNED(*gamma, WARP_PARAM_REDUCE_BITS)
504 		 * (1 << WARP_PARAM_REDUCE_BITS);
505 	*delta = AV1_DIV_ROUND_UP_POW2_SIGNED(*delta, WARP_PARAM_REDUCE_BITS)
506 		* (1 << WARP_PARAM_REDUCE_BITS);
507 }
508 
rockchip_vpu981_av1_dec_set_global_model(struct hantro_ctx * ctx)509 static void rockchip_vpu981_av1_dec_set_global_model(struct hantro_ctx *ctx)
510 {
511 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
512 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
513 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
514 	const struct v4l2_av1_global_motion *gm = &frame->global_motion;
515 	u8 *dst = av1_dec->global_model.cpu;
516 	struct hantro_dev *vpu = ctx->dev;
517 	int ref_frame, i;
518 
519 	memset(dst, 0, GLOBAL_MODEL_SIZE);
520 	for (ref_frame = 0; ref_frame < V4L2_AV1_REFS_PER_FRAME; ++ref_frame) {
521 		s64 alpha = 0, beta = 0, gamma = 0, delta = 0;
522 
523 		for (i = 0; i < 6; ++i) {
524 			if (i == 2)
525 				*(s32 *)dst =
526 					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][3];
527 			else if (i == 3)
528 				*(s32 *)dst =
529 					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][2];
530 			else
531 				*(s32 *)dst =
532 					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][i];
533 			dst += 4;
534 		}
535 
536 		if (gm->type[V4L2_AV1_REF_LAST_FRAME + ref_frame] <= V4L2_AV1_WARP_MODEL_AFFINE)
537 			rockchip_vpu981_av1_dec_get_shear_params(&gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][0],
538 								 &alpha, &beta, &gamma, &delta);
539 
540 		*(s16 *)dst = alpha;
541 		dst += 2;
542 		*(s16 *)dst = beta;
543 		dst += 2;
544 		*(s16 *)dst = gamma;
545 		dst += 2;
546 		*(s16 *)dst = delta;
547 		dst += 2;
548 	}
549 
550 	hantro_write_addr(vpu, AV1_GLOBAL_MODEL, av1_dec->global_model.dma);
551 }
552 
rockchip_vpu981_av1_tile_log2(int target)553 static int rockchip_vpu981_av1_tile_log2(int target)
554 {
555 	int k;
556 
557 	/*
558 	 * returns the smallest value for k such that 1 << k is greater
559 	 * than or equal to target
560 	 */
561 	for (k = 0; (1 << k) < target; k++);
562 
563 	return k;
564 }
565 
rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx * ctx)566 static void rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx *ctx)
567 {
568 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
569 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
570 	const struct v4l2_av1_tile_info *tile_info = &ctrls->frame->tile_info;
571 	const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
572 	    ctrls->tile_group_entry;
573 	int context_update_y =
574 	    tile_info->context_update_tile_id / tile_info->tile_cols;
575 	int context_update_x =
576 	    tile_info->context_update_tile_id % tile_info->tile_cols;
577 	int context_update_tile_id =
578 	    context_update_x * tile_info->tile_rows + context_update_y;
579 	u8 *dst = av1_dec->tile_info.cpu;
580 	struct hantro_dev *vpu = ctx->dev;
581 	int tile0, tile1;
582 
583 	memset(dst, 0, av1_dec->tile_info.size);
584 
585 	for (tile0 = 0; tile0 < tile_info->tile_cols; tile0++) {
586 		for (tile1 = 0; tile1 < tile_info->tile_rows; tile1++) {
587 			int tile_id = tile1 * tile_info->tile_cols + tile0;
588 			u32 start, end;
589 			u32 y0 =
590 			    tile_info->height_in_sbs_minus_1[tile1] + 1;
591 			u32 x0 = tile_info->width_in_sbs_minus_1[tile0] + 1;
592 
593 			/* tile size in SB units (width,height) */
594 			*dst++ = x0;
595 			*dst++ = 0;
596 			*dst++ = 0;
597 			*dst++ = 0;
598 			*dst++ = y0;
599 			*dst++ = 0;
600 			*dst++ = 0;
601 			*dst++ = 0;
602 
603 			/* tile start position */
604 			start = group_entry[tile_id].tile_offset - group_entry[0].tile_offset;
605 			*dst++ = start & 255;
606 			*dst++ = (start >> 8) & 255;
607 			*dst++ = (start >> 16) & 255;
608 			*dst++ = (start >> 24) & 255;
609 
610 			/* number of bytes in tile data */
611 			end = start + group_entry[tile_id].tile_size;
612 			*dst++ = end & 255;
613 			*dst++ = (end >> 8) & 255;
614 			*dst++ = (end >> 16) & 255;
615 			*dst++ = (end >> 24) & 255;
616 		}
617 	}
618 
619 	hantro_reg_write(vpu, &av1_multicore_expect_context_update, !!(context_update_x == 0));
620 	hantro_reg_write(vpu, &av1_tile_enable,
621 			 !!((tile_info->tile_cols > 1) || (tile_info->tile_rows > 1)));
622 	hantro_reg_write(vpu, &av1_num_tile_cols_8k, tile_info->tile_cols);
623 	hantro_reg_write(vpu, &av1_num_tile_rows_8k, tile_info->tile_rows);
624 	hantro_reg_write(vpu, &av1_context_update_tile_id, context_update_tile_id);
625 	hantro_reg_write(vpu, &av1_tile_transpose, 1);
626 	if (rockchip_vpu981_av1_tile_log2(tile_info->tile_cols) ||
627 	    rockchip_vpu981_av1_tile_log2(tile_info->tile_rows))
628 		hantro_reg_write(vpu, &av1_dec_tile_size_mag, tile_info->tile_size_bytes - 1);
629 	else
630 		hantro_reg_write(vpu, &av1_dec_tile_size_mag, 3);
631 
632 	hantro_write_addr(vpu, AV1_TILE_BASE, av1_dec->tile_info.dma);
633 }
634 
rockchip_vpu981_av1_dec_get_dist(struct hantro_ctx * ctx,int a,int b)635 static int rockchip_vpu981_av1_dec_get_dist(struct hantro_ctx *ctx,
636 					    int a, int b)
637 {
638 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
639 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
640 	int bits = ctrls->sequence->order_hint_bits - 1;
641 	int diff, m;
642 
643 	if (!ctrls->sequence->order_hint_bits)
644 		return 0;
645 
646 	diff = a - b;
647 	m = 1 << bits;
648 	diff = (diff & (m - 1)) - (diff & m);
649 
650 	return diff;
651 }
652 
rockchip_vpu981_av1_dec_set_frame_sign_bias(struct hantro_ctx * ctx)653 static void rockchip_vpu981_av1_dec_set_frame_sign_bias(struct hantro_ctx *ctx)
654 {
655 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
656 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
657 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
658 	const struct v4l2_ctrl_av1_sequence *sequence = ctrls->sequence;
659 	int i;
660 
661 	if (!sequence->order_hint_bits || IS_INTRA(frame->frame_type)) {
662 		for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++)
663 			av1_dec->ref_frame_sign_bias[i] = 0;
664 
665 		return;
666 	}
667 	// Identify the nearest forward and backward references.
668 	for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; i++) {
669 		if (rockchip_vpu981_get_frame_index(ctx, i) >= 0) {
670 			int rel_off =
671 			    rockchip_vpu981_av1_dec_get_dist(ctx,
672 							     rockchip_vpu981_get_order_hint(ctx, i),
673 							     frame->order_hint);
674 			av1_dec->ref_frame_sign_bias[i + 1] = (rel_off <= 0) ? 0 : 1;
675 		}
676 	}
677 }
678 
679 static bool
rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx * ctx,int ref,int idx,int width,int height)680 rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx *ctx, int ref, int idx,
681 				int width, int height)
682 {
683 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
684 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
685 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
686 	struct hantro_dev *vpu = ctx->dev;
687 	struct hantro_decoded_buffer *dst;
688 	dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
689 	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
690 	size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
691 	int cur_width = frame->frame_width_minus_1 + 1;
692 	int cur_height = frame->frame_height_minus_1 + 1;
693 	int scale_width =
694 	    ((width << AV1_REF_SCALE_SHIFT) + cur_width / 2) / cur_width;
695 	int scale_height =
696 	    ((height << AV1_REF_SCALE_SHIFT) + cur_height / 2) / cur_height;
697 
698 	switch (ref) {
699 	case 0:
700 		hantro_reg_write(vpu, &av1_ref0_height, height);
701 		hantro_reg_write(vpu, &av1_ref0_width, width);
702 		hantro_reg_write(vpu, &av1_ref0_ver_scale, scale_width);
703 		hantro_reg_write(vpu, &av1_ref0_hor_scale, scale_height);
704 		break;
705 	case 1:
706 		hantro_reg_write(vpu, &av1_ref1_height, height);
707 		hantro_reg_write(vpu, &av1_ref1_width, width);
708 		hantro_reg_write(vpu, &av1_ref1_ver_scale, scale_width);
709 		hantro_reg_write(vpu, &av1_ref1_hor_scale, scale_height);
710 		break;
711 	case 2:
712 		hantro_reg_write(vpu, &av1_ref2_height, height);
713 		hantro_reg_write(vpu, &av1_ref2_width, width);
714 		hantro_reg_write(vpu, &av1_ref2_ver_scale, scale_width);
715 		hantro_reg_write(vpu, &av1_ref2_hor_scale, scale_height);
716 		break;
717 	case 3:
718 		hantro_reg_write(vpu, &av1_ref3_height, height);
719 		hantro_reg_write(vpu, &av1_ref3_width, width);
720 		hantro_reg_write(vpu, &av1_ref3_ver_scale, scale_width);
721 		hantro_reg_write(vpu, &av1_ref3_hor_scale, scale_height);
722 		break;
723 	case 4:
724 		hantro_reg_write(vpu, &av1_ref4_height, height);
725 		hantro_reg_write(vpu, &av1_ref4_width, width);
726 		hantro_reg_write(vpu, &av1_ref4_ver_scale, scale_width);
727 		hantro_reg_write(vpu, &av1_ref4_hor_scale, scale_height);
728 		break;
729 	case 5:
730 		hantro_reg_write(vpu, &av1_ref5_height, height);
731 		hantro_reg_write(vpu, &av1_ref5_width, width);
732 		hantro_reg_write(vpu, &av1_ref5_ver_scale, scale_width);
733 		hantro_reg_write(vpu, &av1_ref5_hor_scale, scale_height);
734 		break;
735 	case 6:
736 		hantro_reg_write(vpu, &av1_ref6_height, height);
737 		hantro_reg_write(vpu, &av1_ref6_width, width);
738 		hantro_reg_write(vpu, &av1_ref6_ver_scale, scale_width);
739 		hantro_reg_write(vpu, &av1_ref6_hor_scale, scale_height);
740 		break;
741 	default:
742 		pr_warn("AV1 invalid reference frame index\n");
743 	}
744 
745 	dst = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
746 	luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
747 	chroma_addr = luma_addr + cr_offset;
748 	mv_addr = luma_addr + mv_offset;
749 
750 	hantro_write_addr(vpu, AV1_REFERENCE_Y(ref), luma_addr);
751 	hantro_write_addr(vpu, AV1_REFERENCE_CB(ref), chroma_addr);
752 	hantro_write_addr(vpu, AV1_REFERENCE_MV(ref), mv_addr);
753 
754 	return (scale_width != (1 << AV1_REF_SCALE_SHIFT)) ||
755 		(scale_height != (1 << AV1_REF_SCALE_SHIFT));
756 }
757 
rockchip_vpu981_av1_dec_set_sign_bias(struct hantro_ctx * ctx,int ref,int val)758 static void rockchip_vpu981_av1_dec_set_sign_bias(struct hantro_ctx *ctx,
759 						  int ref, int val)
760 {
761 	struct hantro_dev *vpu = ctx->dev;
762 
763 	switch (ref) {
764 	case 0:
765 		hantro_reg_write(vpu, &av1_ref0_sign_bias, val);
766 		break;
767 	case 1:
768 		hantro_reg_write(vpu, &av1_ref1_sign_bias, val);
769 		break;
770 	case 2:
771 		hantro_reg_write(vpu, &av1_ref2_sign_bias, val);
772 		break;
773 	case 3:
774 		hantro_reg_write(vpu, &av1_ref3_sign_bias, val);
775 		break;
776 	case 4:
777 		hantro_reg_write(vpu, &av1_ref4_sign_bias, val);
778 		break;
779 	case 5:
780 		hantro_reg_write(vpu, &av1_ref5_sign_bias, val);
781 		break;
782 	case 6:
783 		hantro_reg_write(vpu, &av1_ref6_sign_bias, val);
784 		break;
785 	default:
786 		pr_warn("AV1 invalid sign bias index\n");
787 		break;
788 	}
789 }
790 
rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx * ctx)791 static void rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx *ctx)
792 {
793 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
794 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
795 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
796 	const struct v4l2_av1_segmentation *seg = &frame->segmentation;
797 	u32 segval[V4L2_AV1_MAX_SEGMENTS][V4L2_AV1_SEG_LVL_MAX] = { 0 };
798 	struct hantro_dev *vpu = ctx->dev;
799 	u8 segsign = 0, preskip_segid = 0, last_active_seg = 0, i, j;
800 
801 	if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED) &&
802 	    frame->primary_ref_frame < V4L2_AV1_REFS_PER_FRAME) {
803 		int idx = rockchip_vpu981_get_frame_index(ctx, frame->primary_ref_frame);
804 
805 		if (idx >= 0) {
806 			dma_addr_t luma_addr, mv_addr = 0;
807 			struct hantro_decoded_buffer *seg;
808 			size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
809 
810 			seg = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
811 			luma_addr = hantro_get_dec_buf_addr(ctx, &seg->base.vb.vb2_buf);
812 			mv_addr = luma_addr + mv_offset;
813 
814 			hantro_write_addr(vpu, AV1_SEGMENTATION, mv_addr);
815 			hantro_reg_write(vpu, &av1_use_temporal3_mvs, 1);
816 		}
817 	}
818 
819 	hantro_reg_write(vpu, &av1_segment_temp_upd_e,
820 			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_TEMPORAL_UPDATE));
821 	hantro_reg_write(vpu, &av1_segment_upd_e,
822 			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_UPDATE_MAP));
823 	hantro_reg_write(vpu, &av1_segment_e,
824 			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED));
825 
826 	hantro_reg_write(vpu, &av1_error_resilient,
827 			 !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE));
828 
829 	if (IS_INTRA(frame->frame_type) ||
830 	    !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE)) {
831 		hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
832 	}
833 
834 	if (seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED) {
835 		int s;
836 
837 		for (s = 0; s < V4L2_AV1_MAX_SEGMENTS; s++) {
838 			if (seg->feature_enabled[s] &
839 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
840 				segval[s][V4L2_AV1_SEG_LVL_ALT_Q] =
841 				    clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q]),
842 					  0, 255);
843 				segsign |=
844 					(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q] < 0) << s;
845 			}
846 
847 			if (seg->feature_enabled[s] &
848 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_V))
849 				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V] =
850 					clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]),
851 					      -63, 63);
852 
853 			if (seg->feature_enabled[s] &
854 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_H))
855 				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H] =
856 				    clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]),
857 					  -63, 63);
858 
859 			if (seg->feature_enabled[s] &
860 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_U))
861 				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_U] =
862 				    clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_U]),
863 					  -63, 63);
864 
865 			if (seg->feature_enabled[s] &
866 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_V))
867 				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_V] =
868 				    clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_V]),
869 					  -63, 63);
870 
871 			if (frame->frame_type && seg->feature_enabled[s] &
872 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_FRAME))
873 				segval[s][V4L2_AV1_SEG_LVL_REF_FRAME]++;
874 
875 			if (seg->feature_enabled[s] &
876 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_SKIP))
877 				segval[s][V4L2_AV1_SEG_LVL_REF_SKIP] = 1;
878 
879 			if (seg->feature_enabled[s] &
880 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_GLOBALMV))
881 				segval[s][V4L2_AV1_SEG_LVL_REF_GLOBALMV] = 1;
882 		}
883 	}
884 
885 	for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
886 		for (j = 0; j < V4L2_AV1_SEG_LVL_MAX; j++) {
887 			if (seg->feature_enabled[i]
888 			    & V4L2_AV1_SEGMENT_FEATURE_ENABLED(j)) {
889 				preskip_segid |= (j >= V4L2_AV1_SEG_LVL_REF_FRAME);
890 				last_active_seg = max(i, last_active_seg);
891 			}
892 		}
893 	}
894 
895 	hantro_reg_write(vpu, &av1_last_active_seg, last_active_seg);
896 	hantro_reg_write(vpu, &av1_preskip_segid, preskip_segid);
897 
898 	hantro_reg_write(vpu, &av1_seg_quant_sign, segsign);
899 
900 	/* Write QP, filter level, ref frame and skip for every segment */
901 	hantro_reg_write(vpu, &av1_quant_seg0,
902 			 segval[0][V4L2_AV1_SEG_LVL_ALT_Q]);
903 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg0,
904 			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
905 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg0,
906 			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
907 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg0,
908 			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_U]);
909 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg0,
910 			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_V]);
911 	hantro_reg_write(vpu, &av1_refpic_seg0,
912 			 segval[0][V4L2_AV1_SEG_LVL_REF_FRAME]);
913 	hantro_reg_write(vpu, &av1_skip_seg0,
914 			 segval[0][V4L2_AV1_SEG_LVL_REF_SKIP]);
915 	hantro_reg_write(vpu, &av1_global_mv_seg0,
916 			 segval[0][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
917 
918 	hantro_reg_write(vpu, &av1_quant_seg1,
919 			 segval[1][V4L2_AV1_SEG_LVL_ALT_Q]);
920 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg1,
921 			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
922 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg1,
923 			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
924 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg1,
925 			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_U]);
926 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg1,
927 			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_V]);
928 	hantro_reg_write(vpu, &av1_refpic_seg1,
929 			 segval[1][V4L2_AV1_SEG_LVL_REF_FRAME]);
930 	hantro_reg_write(vpu, &av1_skip_seg1,
931 			 segval[1][V4L2_AV1_SEG_LVL_REF_SKIP]);
932 	hantro_reg_write(vpu, &av1_global_mv_seg1,
933 			 segval[1][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
934 
935 	hantro_reg_write(vpu, &av1_quant_seg2,
936 			 segval[2][V4L2_AV1_SEG_LVL_ALT_Q]);
937 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg2,
938 			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
939 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg2,
940 			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
941 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg2,
942 			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_U]);
943 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg2,
944 			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_V]);
945 	hantro_reg_write(vpu, &av1_refpic_seg2,
946 			 segval[2][V4L2_AV1_SEG_LVL_REF_FRAME]);
947 	hantro_reg_write(vpu, &av1_skip_seg2,
948 			 segval[2][V4L2_AV1_SEG_LVL_REF_SKIP]);
949 	hantro_reg_write(vpu, &av1_global_mv_seg2,
950 			 segval[2][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
951 
952 	hantro_reg_write(vpu, &av1_quant_seg3,
953 			 segval[3][V4L2_AV1_SEG_LVL_ALT_Q]);
954 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg3,
955 			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
956 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg3,
957 			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
958 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg3,
959 			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_U]);
960 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg3,
961 			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_V]);
962 	hantro_reg_write(vpu, &av1_refpic_seg3,
963 			 segval[3][V4L2_AV1_SEG_LVL_REF_FRAME]);
964 	hantro_reg_write(vpu, &av1_skip_seg3,
965 			 segval[3][V4L2_AV1_SEG_LVL_REF_SKIP]);
966 	hantro_reg_write(vpu, &av1_global_mv_seg3,
967 			 segval[3][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
968 
969 	hantro_reg_write(vpu, &av1_quant_seg4,
970 			 segval[4][V4L2_AV1_SEG_LVL_ALT_Q]);
971 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg4,
972 			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
973 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg4,
974 			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
975 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg4,
976 			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_U]);
977 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg4,
978 			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_V]);
979 	hantro_reg_write(vpu, &av1_refpic_seg4,
980 			 segval[4][V4L2_AV1_SEG_LVL_REF_FRAME]);
981 	hantro_reg_write(vpu, &av1_skip_seg4,
982 			 segval[4][V4L2_AV1_SEG_LVL_REF_SKIP]);
983 	hantro_reg_write(vpu, &av1_global_mv_seg4,
984 			 segval[4][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
985 
986 	hantro_reg_write(vpu, &av1_quant_seg5,
987 			 segval[5][V4L2_AV1_SEG_LVL_ALT_Q]);
988 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg5,
989 			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
990 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg5,
991 			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
992 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg5,
993 			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_U]);
994 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg5,
995 			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_V]);
996 	hantro_reg_write(vpu, &av1_refpic_seg5,
997 			 segval[5][V4L2_AV1_SEG_LVL_REF_FRAME]);
998 	hantro_reg_write(vpu, &av1_skip_seg5,
999 			 segval[5][V4L2_AV1_SEG_LVL_REF_SKIP]);
1000 	hantro_reg_write(vpu, &av1_global_mv_seg5,
1001 			 segval[5][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1002 
1003 	hantro_reg_write(vpu, &av1_quant_seg6,
1004 			 segval[6][V4L2_AV1_SEG_LVL_ALT_Q]);
1005 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg6,
1006 			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
1007 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg6,
1008 			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
1009 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg6,
1010 			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_U]);
1011 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg6,
1012 			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_V]);
1013 	hantro_reg_write(vpu, &av1_refpic_seg6,
1014 			 segval[6][V4L2_AV1_SEG_LVL_REF_FRAME]);
1015 	hantro_reg_write(vpu, &av1_skip_seg6,
1016 			 segval[6][V4L2_AV1_SEG_LVL_REF_SKIP]);
1017 	hantro_reg_write(vpu, &av1_global_mv_seg6,
1018 			 segval[6][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1019 
1020 	hantro_reg_write(vpu, &av1_quant_seg7,
1021 			 segval[7][V4L2_AV1_SEG_LVL_ALT_Q]);
1022 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg7,
1023 			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
1024 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg7,
1025 			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
1026 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg7,
1027 			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_U]);
1028 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg7,
1029 			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_V]);
1030 	hantro_reg_write(vpu, &av1_refpic_seg7,
1031 			 segval[7][V4L2_AV1_SEG_LVL_REF_FRAME]);
1032 	hantro_reg_write(vpu, &av1_skip_seg7,
1033 			 segval[7][V4L2_AV1_SEG_LVL_REF_SKIP]);
1034 	hantro_reg_write(vpu, &av1_global_mv_seg7,
1035 			 segval[7][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1036 }
1037 
rockchip_vpu981_av1_dec_is_lossless(struct hantro_ctx * ctx)1038 static bool rockchip_vpu981_av1_dec_is_lossless(struct hantro_ctx *ctx)
1039 {
1040 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1041 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1042 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1043 	const struct v4l2_av1_segmentation *segmentation = &frame->segmentation;
1044 	const struct v4l2_av1_quantization *quantization = &frame->quantization;
1045 	int i;
1046 
1047 	for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
1048 		int qindex = quantization->base_q_idx;
1049 
1050 		if (segmentation->feature_enabled[i] &
1051 		    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
1052 			qindex += segmentation->feature_data[i][V4L2_AV1_SEG_LVL_ALT_Q];
1053 		}
1054 		qindex = clamp(qindex, 0, 255);
1055 
1056 		if (qindex ||
1057 		    quantization->delta_q_y_dc ||
1058 		    quantization->delta_q_u_dc ||
1059 		    quantization->delta_q_u_ac ||
1060 		    quantization->delta_q_v_dc ||
1061 		    quantization->delta_q_v_ac)
1062 			return false;
1063 	}
1064 	return true;
1065 }
1066 
rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx * ctx)1067 static void rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx *ctx)
1068 {
1069 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1070 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1071 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1072 	const struct v4l2_av1_loop_filter *loop_filter = &frame->loop_filter;
1073 	bool filtering_dis = (loop_filter->level[0] == 0) && (loop_filter->level[1] == 0);
1074 	struct hantro_dev *vpu = ctx->dev;
1075 
1076 	hantro_reg_write(vpu, &av1_filtering_dis, filtering_dis);
1077 	hantro_reg_write(vpu, &av1_filt_level_base_gt32, loop_filter->level[0] > 32);
1078 	hantro_reg_write(vpu, &av1_filt_sharpness, loop_filter->sharpness);
1079 
1080 	hantro_reg_write(vpu, &av1_filt_level0, loop_filter->level[0]);
1081 	hantro_reg_write(vpu, &av1_filt_level1, loop_filter->level[1]);
1082 	hantro_reg_write(vpu, &av1_filt_level2, loop_filter->level[2]);
1083 	hantro_reg_write(vpu, &av1_filt_level3, loop_filter->level[3]);
1084 
1085 	if (loop_filter->flags & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED &&
1086 	    !rockchip_vpu981_av1_dec_is_lossless(ctx) &&
1087 	    !(frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC)) {
1088 		hantro_reg_write(vpu, &av1_filt_ref_adj_0,
1089 				 loop_filter->ref_deltas[0]);
1090 		hantro_reg_write(vpu, &av1_filt_ref_adj_1,
1091 				 loop_filter->ref_deltas[1]);
1092 		hantro_reg_write(vpu, &av1_filt_ref_adj_2,
1093 				 loop_filter->ref_deltas[2]);
1094 		hantro_reg_write(vpu, &av1_filt_ref_adj_3,
1095 				 loop_filter->ref_deltas[3]);
1096 		hantro_reg_write(vpu, &av1_filt_ref_adj_4,
1097 				 loop_filter->ref_deltas[4]);
1098 		hantro_reg_write(vpu, &av1_filt_ref_adj_5,
1099 				 loop_filter->ref_deltas[5]);
1100 		hantro_reg_write(vpu, &av1_filt_ref_adj_6,
1101 				 loop_filter->ref_deltas[6]);
1102 		hantro_reg_write(vpu, &av1_filt_ref_adj_7,
1103 				 loop_filter->ref_deltas[7]);
1104 		hantro_reg_write(vpu, &av1_filt_mb_adj_0,
1105 				 loop_filter->mode_deltas[0]);
1106 		hantro_reg_write(vpu, &av1_filt_mb_adj_1,
1107 				 loop_filter->mode_deltas[1]);
1108 	} else {
1109 		hantro_reg_write(vpu, &av1_filt_ref_adj_0, 0);
1110 		hantro_reg_write(vpu, &av1_filt_ref_adj_1, 0);
1111 		hantro_reg_write(vpu, &av1_filt_ref_adj_2, 0);
1112 		hantro_reg_write(vpu, &av1_filt_ref_adj_3, 0);
1113 		hantro_reg_write(vpu, &av1_filt_ref_adj_4, 0);
1114 		hantro_reg_write(vpu, &av1_filt_ref_adj_5, 0);
1115 		hantro_reg_write(vpu, &av1_filt_ref_adj_6, 0);
1116 		hantro_reg_write(vpu, &av1_filt_ref_adj_7, 0);
1117 		hantro_reg_write(vpu, &av1_filt_mb_adj_0, 0);
1118 		hantro_reg_write(vpu, &av1_filt_mb_adj_1, 0);
1119 	}
1120 
1121 	hantro_write_addr(vpu, AV1_DB_DATA_COL, av1_dec->db_data_col.dma);
1122 	hantro_write_addr(vpu, AV1_DB_CTRL_COL, av1_dec->db_ctrl_col.dma);
1123 }
1124 
rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx * ctx)1125 static void rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx *ctx)
1126 {
1127 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1128 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1129 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1130 	bool frame_is_intra = IS_INTRA(frame->frame_type);
1131 	struct av1cdfs *out_cdfs = (struct av1cdfs *)av1_dec->prob_tbl_out.cpu;
1132 	int i;
1133 
1134 	if (frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF)
1135 		return;
1136 
1137 	for (i = 0; i < NUM_REF_FRAMES; i++) {
1138 		if (frame->refresh_frame_flags & BIT(i)) {
1139 			struct mvcdfs stored_mv_cdf;
1140 
1141 			rockchip_av1_get_cdfs(ctx, i);
1142 			stored_mv_cdf = av1_dec->cdfs->mv_cdf;
1143 			*av1_dec->cdfs = *out_cdfs;
1144 			if (frame_is_intra) {
1145 				av1_dec->cdfs->mv_cdf = stored_mv_cdf;
1146 				*av1_dec->cdfs_ndvc = out_cdfs->mv_cdf;
1147 			}
1148 			rockchip_av1_store_cdfs(ctx,
1149 						frame->refresh_frame_flags);
1150 			break;
1151 		}
1152 	}
1153 }
1154 
rockchip_vpu981_av1_dec_done(struct hantro_ctx * ctx)1155 void rockchip_vpu981_av1_dec_done(struct hantro_ctx *ctx)
1156 {
1157 	rockchip_vpu981_av1_dec_update_prob(ctx);
1158 }
1159 
rockchip_vpu981_av1_dec_set_prob(struct hantro_ctx * ctx)1160 static void rockchip_vpu981_av1_dec_set_prob(struct hantro_ctx *ctx)
1161 {
1162 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1163 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1164 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1165 	const struct v4l2_av1_quantization *quantization = &frame->quantization;
1166 	struct hantro_dev *vpu = ctx->dev;
1167 	bool error_resilient_mode =
1168 	    !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE);
1169 	bool frame_is_intra = IS_INTRA(frame->frame_type);
1170 
1171 	if (error_resilient_mode || frame_is_intra ||
1172 	    frame->primary_ref_frame == AV1_PRIMARY_REF_NONE) {
1173 		av1_dec->cdfs = &av1_dec->default_cdfs;
1174 		av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
1175 		rockchip_av1_default_coeff_probs(quantization->base_q_idx,
1176 						 av1_dec->cdfs);
1177 	} else {
1178 		rockchip_av1_get_cdfs(ctx, frame->ref_frame_idx[frame->primary_ref_frame]);
1179 	}
1180 	rockchip_av1_store_cdfs(ctx, frame->refresh_frame_flags);
1181 
1182 	memcpy(av1_dec->prob_tbl.cpu, av1_dec->cdfs, sizeof(struct av1cdfs));
1183 
1184 	if (frame_is_intra) {
1185 		int mv_offset = offsetof(struct av1cdfs, mv_cdf);
1186 		/* Overwrite MV context area with intrabc MV context */
1187 		memcpy(av1_dec->prob_tbl.cpu + mv_offset, av1_dec->cdfs_ndvc,
1188 		       sizeof(struct mvcdfs));
1189 	}
1190 
1191 	hantro_write_addr(vpu, AV1_PROP_TABLE_OUT, av1_dec->prob_tbl_out.dma);
1192 	hantro_write_addr(vpu, AV1_PROP_TABLE, av1_dec->prob_tbl.dma);
1193 }
1194 
1195 static void
rockchip_vpu981_av1_dec_init_scaling_function(const u8 * values,const u8 * scaling,u8 num_points,u8 * scaling_lut)1196 rockchip_vpu981_av1_dec_init_scaling_function(const u8 *values, const u8 *scaling,
1197 					      u8 num_points, u8 *scaling_lut)
1198 {
1199 	int i, point;
1200 
1201 	if (num_points == 0) {
1202 		memset(scaling_lut, 0, 256);
1203 		return;
1204 	}
1205 
1206 	for (point = 0; point < num_points - 1; point++) {
1207 		int x;
1208 		s32 delta_y = scaling[point + 1] - scaling[point];
1209 		s32 delta_x = values[point + 1] - values[point];
1210 		s64 delta =
1211 		    delta_x ? delta_y * ((65536 + (delta_x >> 1)) /
1212 					 delta_x) : 0;
1213 
1214 		for (x = 0; x < delta_x; x++) {
1215 			scaling_lut[values[point] + x] =
1216 			    scaling[point] +
1217 			    (s32)((x * delta + 32768) >> 16);
1218 		}
1219 	}
1220 
1221 	for (i = values[num_points - 1]; i < 256; i++)
1222 		scaling_lut[i] = scaling[num_points - 1];
1223 }
1224 
rockchip_vpu981_av1_dec_set_fgs(struct hantro_ctx * ctx)1225 static void rockchip_vpu981_av1_dec_set_fgs(struct hantro_ctx *ctx)
1226 {
1227 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1228 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1229 	const struct v4l2_ctrl_av1_film_grain *film_grain = ctrls->film_grain;
1230 	struct rockchip_av1_film_grain *fgmem = av1_dec->film_grain.cpu;
1231 	struct hantro_dev *vpu = ctx->dev;
1232 	bool scaling_from_luma =
1233 		!!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA);
1234 	s32 (*ar_coeffs_y)[24];
1235 	s32 (*ar_coeffs_cb)[25];
1236 	s32 (*ar_coeffs_cr)[25];
1237 	s32 (*luma_grain_block)[73][82];
1238 	s32 (*cb_grain_block)[38][44];
1239 	s32 (*cr_grain_block)[38][44];
1240 	s32 ar_coeff_lag, ar_coeff_shift;
1241 	s32 grain_scale_shift, bitdepth;
1242 	s32 grain_center, grain_min, grain_max;
1243 	int i, j;
1244 
1245 	hantro_reg_write(vpu, &av1_apply_grain, 0);
1246 
1247 	if (!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_APPLY_GRAIN)) {
1248 		hantro_reg_write(vpu, &av1_num_y_points_b, 0);
1249 		hantro_reg_write(vpu, &av1_num_cb_points_b, 0);
1250 		hantro_reg_write(vpu, &av1_num_cr_points_b, 0);
1251 		hantro_reg_write(vpu, &av1_scaling_shift, 0);
1252 		hantro_reg_write(vpu, &av1_cb_mult, 0);
1253 		hantro_reg_write(vpu, &av1_cb_luma_mult, 0);
1254 		hantro_reg_write(vpu, &av1_cb_offset, 0);
1255 		hantro_reg_write(vpu, &av1_cr_mult, 0);
1256 		hantro_reg_write(vpu, &av1_cr_luma_mult, 0);
1257 		hantro_reg_write(vpu, &av1_cr_offset, 0);
1258 		hantro_reg_write(vpu, &av1_overlap_flag, 0);
1259 		hantro_reg_write(vpu, &av1_clip_to_restricted_range, 0);
1260 		hantro_reg_write(vpu, &av1_chroma_scaling_from_luma, 0);
1261 		hantro_reg_write(vpu, &av1_random_seed, 0);
1262 		hantro_write_addr(vpu, AV1_FILM_GRAIN, 0);
1263 		return;
1264 	}
1265 
1266 	ar_coeffs_y = kzalloc(sizeof(int32_t) * 24, GFP_KERNEL);
1267 	ar_coeffs_cb = kzalloc(sizeof(int32_t) * 25, GFP_KERNEL);
1268 	ar_coeffs_cr = kzalloc(sizeof(int32_t) * 25, GFP_KERNEL);
1269 	luma_grain_block = kzalloc(sizeof(int32_t) * 73 * 82, GFP_KERNEL);
1270 	cb_grain_block = kzalloc(sizeof(int32_t) * 38 * 44, GFP_KERNEL);
1271 	cr_grain_block = kzalloc(sizeof(int32_t) * 38 * 44, GFP_KERNEL);
1272 
1273 	if (!ar_coeffs_y || !ar_coeffs_cb || !ar_coeffs_cr ||
1274 	    !luma_grain_block || !cb_grain_block || !cr_grain_block) {
1275 		pr_warn("Fail allocating memory for film grain parameters\n");
1276 		goto alloc_fail;
1277 	}
1278 
1279 	hantro_reg_write(vpu, &av1_apply_grain, 1);
1280 
1281 	hantro_reg_write(vpu, &av1_num_y_points_b,
1282 			 film_grain->num_y_points > 0);
1283 	hantro_reg_write(vpu, &av1_num_cb_points_b,
1284 			 film_grain->num_cb_points > 0);
1285 	hantro_reg_write(vpu, &av1_num_cr_points_b,
1286 			 film_grain->num_cr_points > 0);
1287 	hantro_reg_write(vpu, &av1_scaling_shift,
1288 			 film_grain->grain_scaling_minus_8 + 8);
1289 
1290 	if (!scaling_from_luma) {
1291 		hantro_reg_write(vpu, &av1_cb_mult, film_grain->cb_mult - 128);
1292 		hantro_reg_write(vpu, &av1_cb_luma_mult, film_grain->cb_luma_mult - 128);
1293 		hantro_reg_write(vpu, &av1_cb_offset, film_grain->cb_offset - 256);
1294 		hantro_reg_write(vpu, &av1_cr_mult, film_grain->cr_mult - 128);
1295 		hantro_reg_write(vpu, &av1_cr_luma_mult, film_grain->cr_luma_mult - 128);
1296 		hantro_reg_write(vpu, &av1_cr_offset, film_grain->cr_offset - 256);
1297 	} else {
1298 		hantro_reg_write(vpu, &av1_cb_mult, 0);
1299 		hantro_reg_write(vpu, &av1_cb_luma_mult, 0);
1300 		hantro_reg_write(vpu, &av1_cb_offset, 0);
1301 		hantro_reg_write(vpu, &av1_cr_mult, 0);
1302 		hantro_reg_write(vpu, &av1_cr_luma_mult, 0);
1303 		hantro_reg_write(vpu, &av1_cr_offset, 0);
1304 	}
1305 
1306 	hantro_reg_write(vpu, &av1_overlap_flag,
1307 			 !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_OVERLAP));
1308 	hantro_reg_write(vpu, &av1_clip_to_restricted_range,
1309 			 !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_CLIP_TO_RESTRICTED_RANGE));
1310 	hantro_reg_write(vpu, &av1_chroma_scaling_from_luma, scaling_from_luma);
1311 	hantro_reg_write(vpu, &av1_random_seed, film_grain->grain_seed);
1312 
1313 	rockchip_vpu981_av1_dec_init_scaling_function(film_grain->point_y_value,
1314 						      film_grain->point_y_scaling,
1315 						      film_grain->num_y_points,
1316 						      fgmem->scaling_lut_y);
1317 
1318 	if (film_grain->flags &
1319 	    V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA) {
1320 		memcpy(fgmem->scaling_lut_cb, fgmem->scaling_lut_y,
1321 		       sizeof(*fgmem->scaling_lut_y) * 256);
1322 		memcpy(fgmem->scaling_lut_cr, fgmem->scaling_lut_y,
1323 		       sizeof(*fgmem->scaling_lut_y) * 256);
1324 	} else {
1325 		rockchip_vpu981_av1_dec_init_scaling_function
1326 		    (film_grain->point_cb_value, film_grain->point_cb_scaling,
1327 		     film_grain->num_cb_points, fgmem->scaling_lut_cb);
1328 		rockchip_vpu981_av1_dec_init_scaling_function
1329 		    (film_grain->point_cr_value, film_grain->point_cr_scaling,
1330 		     film_grain->num_cr_points, fgmem->scaling_lut_cr);
1331 	}
1332 
1333 	for (i = 0; i < V4L2_AV1_AR_COEFFS_SIZE; i++) {
1334 		if (i < 24)
1335 			(*ar_coeffs_y)[i] = film_grain->ar_coeffs_y_plus_128[i] - 128;
1336 		(*ar_coeffs_cb)[i] = film_grain->ar_coeffs_cb_plus_128[i] - 128;
1337 		(*ar_coeffs_cr)[i] = film_grain->ar_coeffs_cr_plus_128[i] - 128;
1338 	}
1339 
1340 	ar_coeff_lag = film_grain->ar_coeff_lag;
1341 	ar_coeff_shift = film_grain->ar_coeff_shift_minus_6 + 6;
1342 	grain_scale_shift = film_grain->grain_scale_shift;
1343 	bitdepth = ctx->bit_depth;
1344 	grain_center = 128 << (bitdepth - 8);
1345 	grain_min = 0 - grain_center;
1346 	grain_max = (256 << (bitdepth - 8)) - 1 - grain_center;
1347 
1348 	rockchip_av1_generate_luma_grain_block(luma_grain_block, bitdepth,
1349 					       film_grain->num_y_points, grain_scale_shift,
1350 					       ar_coeff_lag, ar_coeffs_y, ar_coeff_shift,
1351 					       grain_min, grain_max, film_grain->grain_seed);
1352 
1353 	rockchip_av1_generate_chroma_grain_block(luma_grain_block, cb_grain_block,
1354 						 cr_grain_block, bitdepth,
1355 						 film_grain->num_y_points,
1356 						 film_grain->num_cb_points,
1357 						 film_grain->num_cr_points,
1358 						 grain_scale_shift, ar_coeff_lag, ar_coeffs_cb,
1359 						 ar_coeffs_cr, ar_coeff_shift, grain_min,
1360 						 grain_max,
1361 						 scaling_from_luma,
1362 						 film_grain->grain_seed);
1363 
1364 	for (i = 0; i < 64; i++) {
1365 		for (j = 0; j < 64; j++)
1366 			fgmem->cropped_luma_grain_block[i * 64 + j] =
1367 				(*luma_grain_block)[i + 9][j + 9];
1368 	}
1369 
1370 	for (i = 0; i < 32; i++) {
1371 		for (j = 0; j < 32; j++) {
1372 			fgmem->cropped_chroma_grain_block[i * 64 + 2 * j] =
1373 				(*cb_grain_block)[i + 6][j + 6];
1374 			fgmem->cropped_chroma_grain_block[i * 64 + 2 * j + 1] =
1375 				(*cr_grain_block)[i + 6][j + 6];
1376 		}
1377 	}
1378 
1379 	hantro_write_addr(vpu, AV1_FILM_GRAIN, av1_dec->film_grain.dma);
1380 
1381 alloc_fail:
1382 	kfree(ar_coeffs_y);
1383 	kfree(ar_coeffs_cb);
1384 	kfree(ar_coeffs_cr);
1385 	kfree(luma_grain_block);
1386 	kfree(cb_grain_block);
1387 	kfree(cr_grain_block);
1388 }
1389 
rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx * ctx)1390 static void rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx *ctx)
1391 {
1392 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1393 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1394 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1395 	const struct v4l2_av1_cdef *cdef = &frame->cdef;
1396 	struct hantro_dev *vpu = ctx->dev;
1397 	u32 luma_pri_strength = 0;
1398 	u16 luma_sec_strength = 0;
1399 	u32 chroma_pri_strength = 0;
1400 	u16 chroma_sec_strength = 0;
1401 	int i;
1402 
1403 	hantro_reg_write(vpu, &av1_cdef_bits, cdef->bits);
1404 	hantro_reg_write(vpu, &av1_cdef_damping, cdef->damping_minus_3);
1405 
1406 	for (i = 0; i < BIT(cdef->bits); i++) {
1407 		luma_pri_strength |= cdef->y_pri_strength[i] << (i * 4);
1408 		if (cdef->y_sec_strength[i] == 4)
1409 			luma_sec_strength |= 3 << (i * 2);
1410 		else
1411 			luma_sec_strength |= cdef->y_sec_strength[i] << (i * 2);
1412 
1413 		chroma_pri_strength |= cdef->uv_pri_strength[i] << (i * 4);
1414 		if (cdef->uv_sec_strength[i] == 4)
1415 			chroma_sec_strength |= 3 << (i * 2);
1416 		else
1417 			chroma_sec_strength |= cdef->uv_sec_strength[i] << (i * 2);
1418 	}
1419 
1420 	hantro_reg_write(vpu, &av1_cdef_luma_primary_strength,
1421 			 luma_pri_strength);
1422 	hantro_reg_write(vpu, &av1_cdef_luma_secondary_strength,
1423 			 luma_sec_strength);
1424 	hantro_reg_write(vpu, &av1_cdef_chroma_primary_strength,
1425 			 chroma_pri_strength);
1426 	hantro_reg_write(vpu, &av1_cdef_chroma_secondary_strength,
1427 			 chroma_sec_strength);
1428 
1429 	hantro_write_addr(vpu, AV1_CDEF_COL, av1_dec->cdef_col.dma);
1430 }
1431 
rockchip_vpu981_av1_dec_set_lr(struct hantro_ctx * ctx)1432 static void rockchip_vpu981_av1_dec_set_lr(struct hantro_ctx *ctx)
1433 {
1434 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1435 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1436 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1437 	const struct v4l2_av1_loop_restoration *loop_restoration =
1438 	    &frame->loop_restoration;
1439 	struct hantro_dev *vpu = ctx->dev;
1440 	u16 lr_type = 0, lr_unit_size = 0;
1441 	u8 restoration_unit_size[V4L2_AV1_NUM_PLANES_MAX] = { 3, 3, 3 };
1442 	int i;
1443 
1444 	if (loop_restoration->flags & V4L2_AV1_LOOP_RESTORATION_FLAG_USES_LR) {
1445 		restoration_unit_size[0] = 1 + loop_restoration->lr_unit_shift;
1446 		restoration_unit_size[1] =
1447 		    1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
1448 		restoration_unit_size[2] =
1449 		    1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
1450 	}
1451 
1452 	for (i = 0; i < V4L2_AV1_NUM_PLANES_MAX; i++) {
1453 		lr_type |=
1454 		    loop_restoration->frame_restoration_type[i] << (i * 2);
1455 		lr_unit_size |= restoration_unit_size[i] << (i * 2);
1456 	}
1457 
1458 	hantro_reg_write(vpu, &av1_lr_type, lr_type);
1459 	hantro_reg_write(vpu, &av1_lr_unit_size, lr_unit_size);
1460 	hantro_write_addr(vpu, AV1_LR_COL, av1_dec->lr_col.dma);
1461 }
1462 
rockchip_vpu981_av1_dec_set_superres_params(struct hantro_ctx * ctx)1463 static void rockchip_vpu981_av1_dec_set_superres_params(struct hantro_ctx *ctx)
1464 {
1465 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1466 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1467 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1468 	struct hantro_dev *vpu = ctx->dev;
1469 	u8 superres_scale_denominator = SCALE_NUMERATOR;
1470 	int superres_luma_step = RS_SCALE_SUBPEL_BITS;
1471 	int superres_chroma_step = RS_SCALE_SUBPEL_BITS;
1472 	int superres_luma_step_invra = RS_SCALE_SUBPEL_BITS;
1473 	int superres_chroma_step_invra = RS_SCALE_SUBPEL_BITS;
1474 	int superres_init_luma_subpel_x = 0;
1475 	int superres_init_chroma_subpel_x = 0;
1476 	int superres_is_scaled = 0;
1477 	int min_w = min_t(uint32_t, 16, frame->upscaled_width);
1478 	int upscaled_luma, downscaled_luma;
1479 	int downscaled_chroma, upscaled_chroma;
1480 	int step_luma, step_chroma;
1481 	int err_luma, err_chroma;
1482 	int initial_luma, initial_chroma;
1483 	int width = 0;
1484 
1485 	if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
1486 		superres_scale_denominator = frame->superres_denom;
1487 
1488 	if (superres_scale_denominator <= SCALE_NUMERATOR)
1489 		goto set_regs;
1490 
1491 	width = (frame->upscaled_width * SCALE_NUMERATOR +
1492 		(superres_scale_denominator / 2)) / superres_scale_denominator;
1493 
1494 	if (width < min_w)
1495 		width = min_w;
1496 
1497 	if (width == frame->upscaled_width)
1498 		goto set_regs;
1499 
1500 	superres_is_scaled = 1;
1501 	upscaled_luma = frame->upscaled_width;
1502 	downscaled_luma = width;
1503 	downscaled_chroma = (downscaled_luma + 1) >> 1;
1504 	upscaled_chroma = (upscaled_luma + 1) >> 1;
1505 	step_luma =
1506 		((downscaled_luma << RS_SCALE_SUBPEL_BITS) +
1507 		 (upscaled_luma / 2)) / upscaled_luma;
1508 	step_chroma =
1509 		((downscaled_chroma << RS_SCALE_SUBPEL_BITS) +
1510 		 (upscaled_chroma / 2)) / upscaled_chroma;
1511 	err_luma =
1512 		(upscaled_luma * step_luma)
1513 		- (downscaled_luma << RS_SCALE_SUBPEL_BITS);
1514 	err_chroma =
1515 		(upscaled_chroma * step_chroma)
1516 		- (downscaled_chroma << RS_SCALE_SUBPEL_BITS);
1517 	initial_luma =
1518 		((-((upscaled_luma - downscaled_luma) << (RS_SCALE_SUBPEL_BITS - 1))
1519 		  + upscaled_luma / 2)
1520 		 / upscaled_luma + (1 << (RS_SCALE_EXTRA_BITS - 1)) - err_luma / 2)
1521 		& RS_SCALE_SUBPEL_MASK;
1522 	initial_chroma =
1523 		((-((upscaled_chroma - downscaled_chroma) << (RS_SCALE_SUBPEL_BITS - 1))
1524 		  + upscaled_chroma / 2)
1525 		 / upscaled_chroma + (1 << (RS_SCALE_EXTRA_BITS - 1)) - err_chroma / 2)
1526 		& RS_SCALE_SUBPEL_MASK;
1527 	superres_luma_step = step_luma;
1528 	superres_chroma_step = step_chroma;
1529 	superres_luma_step_invra =
1530 		((upscaled_luma << RS_SCALE_SUBPEL_BITS) + (downscaled_luma / 2))
1531 		/ downscaled_luma;
1532 	superres_chroma_step_invra =
1533 		((upscaled_chroma << RS_SCALE_SUBPEL_BITS) + (downscaled_chroma / 2))
1534 		/ downscaled_chroma;
1535 	superres_init_luma_subpel_x = initial_luma;
1536 	superres_init_chroma_subpel_x = initial_chroma;
1537 
1538 set_regs:
1539 	hantro_reg_write(vpu, &av1_superres_pic_width, frame->upscaled_width);
1540 
1541 	if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
1542 		hantro_reg_write(vpu, &av1_scale_denom_minus9,
1543 				 frame->superres_denom - SUPERRES_SCALE_DENOMINATOR_MIN);
1544 	else
1545 		hantro_reg_write(vpu, &av1_scale_denom_minus9, frame->superres_denom);
1546 
1547 	hantro_reg_write(vpu, &av1_superres_luma_step, superres_luma_step);
1548 	hantro_reg_write(vpu, &av1_superres_chroma_step, superres_chroma_step);
1549 	hantro_reg_write(vpu, &av1_superres_luma_step_invra,
1550 			 superres_luma_step_invra);
1551 	hantro_reg_write(vpu, &av1_superres_chroma_step_invra,
1552 			 superres_chroma_step_invra);
1553 	hantro_reg_write(vpu, &av1_superres_init_luma_subpel_x,
1554 			 superres_init_luma_subpel_x);
1555 	hantro_reg_write(vpu, &av1_superres_init_chroma_subpel_x,
1556 			 superres_init_chroma_subpel_x);
1557 	hantro_reg_write(vpu, &av1_superres_is_scaled, superres_is_scaled);
1558 
1559 	hantro_write_addr(vpu, AV1_SR_COL, av1_dec->sr_col.dma);
1560 }
1561 
rockchip_vpu981_av1_dec_set_picture_dimensions(struct hantro_ctx * ctx)1562 static void rockchip_vpu981_av1_dec_set_picture_dimensions(struct hantro_ctx *ctx)
1563 {
1564 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1565 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1566 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1567 	struct hantro_dev *vpu = ctx->dev;
1568 	int pic_width_in_cbs = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
1569 	int pic_height_in_cbs = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
1570 	int pic_width_pad = ALIGN(frame->frame_width_minus_1 + 1, 8)
1571 			    - (frame->frame_width_minus_1 + 1);
1572 	int pic_height_pad = ALIGN(frame->frame_height_minus_1 + 1, 8)
1573 			     - (frame->frame_height_minus_1 + 1);
1574 
1575 	hantro_reg_write(vpu, &av1_pic_width_in_cbs, pic_width_in_cbs);
1576 	hantro_reg_write(vpu, &av1_pic_height_in_cbs, pic_height_in_cbs);
1577 	hantro_reg_write(vpu, &av1_pic_width_pad, pic_width_pad);
1578 	hantro_reg_write(vpu, &av1_pic_height_pad, pic_height_pad);
1579 
1580 	rockchip_vpu981_av1_dec_set_superres_params(ctx);
1581 }
1582 
rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx * ctx)1583 static void rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx *ctx)
1584 {
1585 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1586 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1587 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1588 	struct hantro_dev *vpu = ctx->dev;
1589 	bool use_ref_frame_mvs =
1590 	    !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS);
1591 	int cur_frame_offset = frame->order_hint;
1592 	int alt_frame_offset = 0;
1593 	int gld_frame_offset = 0;
1594 	int bwd_frame_offset = 0;
1595 	int alt2_frame_offset = 0;
1596 	int refs_selected[3] = { 0, 0, 0 };
1597 	int cur_mi_cols = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
1598 	int cur_mi_rows = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
1599 	int cur_offset[V4L2_AV1_TOTAL_REFS_PER_FRAME - 1];
1600 	int cur_roffset[V4L2_AV1_TOTAL_REFS_PER_FRAME - 1];
1601 	int mf_types[3] = { 0, 0, 0 };
1602 	int ref_stamp = 2;
1603 	int ref_ind = 0;
1604 	int rf, idx;
1605 
1606 	alt_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT_BUF_IDX);
1607 	gld_frame_offset = rockchip_vpu981_get_order_hint(ctx, GLD_BUF_IDX);
1608 	bwd_frame_offset = rockchip_vpu981_get_order_hint(ctx, BWD_BUF_IDX);
1609 	alt2_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT2_BUF_IDX);
1610 
1611 	idx = rockchip_vpu981_get_frame_index(ctx, LST_BUF_IDX);
1612 	if (idx >= 0) {
1613 		int alt_frame_offset_in_lst =
1614 			av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME];
1615 		bool is_lst_overlay =
1616 		    (alt_frame_offset_in_lst == gld_frame_offset);
1617 
1618 		if (!is_lst_overlay) {
1619 			int lst_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1620 			int lst_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1621 			bool lst_intra_only =
1622 			    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1623 
1624 			if (lst_mi_cols == cur_mi_cols &&
1625 			    lst_mi_rows == cur_mi_rows && !lst_intra_only) {
1626 				mf_types[ref_ind] = V4L2_AV1_REF_LAST_FRAME;
1627 				refs_selected[ref_ind++] = LST_BUF_IDX;
1628 			}
1629 		}
1630 		ref_stamp--;
1631 	}
1632 
1633 	idx = rockchip_vpu981_get_frame_index(ctx, BWD_BUF_IDX);
1634 	if (rockchip_vpu981_av1_dec_get_dist(ctx, bwd_frame_offset, cur_frame_offset) > 0) {
1635 		int bwd_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1636 		int bwd_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1637 		bool bwd_intra_only =
1638 		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1639 
1640 		if (bwd_mi_cols == cur_mi_cols && bwd_mi_rows == cur_mi_rows &&
1641 		    !bwd_intra_only) {
1642 			mf_types[ref_ind] = V4L2_AV1_REF_BWDREF_FRAME;
1643 			refs_selected[ref_ind++] = BWD_BUF_IDX;
1644 			ref_stamp--;
1645 		}
1646 	}
1647 
1648 	idx = rockchip_vpu981_get_frame_index(ctx, ALT2_BUF_IDX);
1649 	if (rockchip_vpu981_av1_dec_get_dist(ctx, alt2_frame_offset, cur_frame_offset) > 0) {
1650 		int alt2_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1651 		int alt2_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1652 		bool alt2_intra_only =
1653 		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1654 
1655 		if (alt2_mi_cols == cur_mi_cols && alt2_mi_rows == cur_mi_rows &&
1656 		    !alt2_intra_only) {
1657 			mf_types[ref_ind] = V4L2_AV1_REF_ALTREF2_FRAME;
1658 			refs_selected[ref_ind++] = ALT2_BUF_IDX;
1659 			ref_stamp--;
1660 		}
1661 	}
1662 
1663 	idx = rockchip_vpu981_get_frame_index(ctx, ALT_BUF_IDX);
1664 	if (rockchip_vpu981_av1_dec_get_dist(ctx, alt_frame_offset, cur_frame_offset) > 0 &&
1665 	    ref_stamp >= 0) {
1666 		int alt_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1667 		int alt_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1668 		bool alt_intra_only =
1669 		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1670 
1671 		if (alt_mi_cols == cur_mi_cols && alt_mi_rows == cur_mi_rows &&
1672 		    !alt_intra_only) {
1673 			mf_types[ref_ind] = V4L2_AV1_REF_ALTREF_FRAME;
1674 			refs_selected[ref_ind++] = ALT_BUF_IDX;
1675 			ref_stamp--;
1676 		}
1677 	}
1678 
1679 	idx = rockchip_vpu981_get_frame_index(ctx, LST2_BUF_IDX);
1680 	if (idx >= 0 && ref_stamp >= 0) {
1681 		int lst2_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1682 		int lst2_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1683 		bool lst2_intra_only =
1684 		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1685 
1686 		if (lst2_mi_cols == cur_mi_cols && lst2_mi_rows == cur_mi_rows &&
1687 		    !lst2_intra_only) {
1688 			mf_types[ref_ind] = V4L2_AV1_REF_LAST2_FRAME;
1689 			refs_selected[ref_ind++] = LST2_BUF_IDX;
1690 			ref_stamp--;
1691 		}
1692 	}
1693 
1694 	for (rf = 0; rf < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; ++rf) {
1695 		idx = rockchip_vpu981_get_frame_index(ctx, rf);
1696 		if (idx >= 0) {
1697 			int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, rf);
1698 
1699 			cur_offset[rf] =
1700 			    rockchip_vpu981_av1_dec_get_dist(ctx, cur_frame_offset, rf_order_hint);
1701 			cur_roffset[rf] =
1702 			    rockchip_vpu981_av1_dec_get_dist(ctx, rf_order_hint, cur_frame_offset);
1703 		} else {
1704 			cur_offset[rf] = 0;
1705 			cur_roffset[rf] = 0;
1706 		}
1707 	}
1708 
1709 	hantro_reg_write(vpu, &av1_use_temporal0_mvs, 0);
1710 	hantro_reg_write(vpu, &av1_use_temporal1_mvs, 0);
1711 	hantro_reg_write(vpu, &av1_use_temporal2_mvs, 0);
1712 	hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
1713 
1714 	hantro_reg_write(vpu, &av1_mf1_last_offset, 0);
1715 	hantro_reg_write(vpu, &av1_mf1_last2_offset, 0);
1716 	hantro_reg_write(vpu, &av1_mf1_last3_offset, 0);
1717 	hantro_reg_write(vpu, &av1_mf1_golden_offset, 0);
1718 	hantro_reg_write(vpu, &av1_mf1_bwdref_offset, 0);
1719 	hantro_reg_write(vpu, &av1_mf1_altref2_offset, 0);
1720 	hantro_reg_write(vpu, &av1_mf1_altref_offset, 0);
1721 
1722 	if (use_ref_frame_mvs && ref_ind > 0 &&
1723 	    cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1724 	    cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1725 		int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[0]);
1726 		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[0]);
1727 		u32 *oh = av1_dec->frame_refs[idx].order_hints;
1728 		int val;
1729 
1730 		hantro_reg_write(vpu, &av1_use_temporal0_mvs, 1);
1731 
1732 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1733 		hantro_reg_write(vpu, &av1_mf1_last_offset, val);
1734 
1735 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1736 		hantro_reg_write(vpu, &av1_mf1_last2_offset, val);
1737 
1738 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1739 		hantro_reg_write(vpu, &av1_mf1_last3_offset, val);
1740 
1741 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1742 		hantro_reg_write(vpu, &av1_mf1_golden_offset, val);
1743 
1744 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1745 		hantro_reg_write(vpu, &av1_mf1_bwdref_offset, val);
1746 
1747 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1748 		hantro_reg_write(vpu, &av1_mf1_altref2_offset, val);
1749 
1750 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1751 		hantro_reg_write(vpu, &av1_mf1_altref_offset, val);
1752 	}
1753 
1754 	hantro_reg_write(vpu, &av1_mf2_last_offset, 0);
1755 	hantro_reg_write(vpu, &av1_mf2_last2_offset, 0);
1756 	hantro_reg_write(vpu, &av1_mf2_last3_offset, 0);
1757 	hantro_reg_write(vpu, &av1_mf2_golden_offset, 0);
1758 	hantro_reg_write(vpu, &av1_mf2_bwdref_offset, 0);
1759 	hantro_reg_write(vpu, &av1_mf2_altref2_offset, 0);
1760 	hantro_reg_write(vpu, &av1_mf2_altref_offset, 0);
1761 
1762 	if (use_ref_frame_mvs && ref_ind > 1 &&
1763 	    cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1764 	    cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1765 		int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[1]);
1766 		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[1]);
1767 		u32 *oh = av1_dec->frame_refs[idx].order_hints;
1768 		int val;
1769 
1770 		hantro_reg_write(vpu, &av1_use_temporal1_mvs, 1);
1771 
1772 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1773 		hantro_reg_write(vpu, &av1_mf2_last_offset, val);
1774 
1775 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1776 		hantro_reg_write(vpu, &av1_mf2_last2_offset, val);
1777 
1778 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1779 		hantro_reg_write(vpu, &av1_mf2_last3_offset, val);
1780 
1781 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1782 		hantro_reg_write(vpu, &av1_mf2_golden_offset, val);
1783 
1784 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1785 		hantro_reg_write(vpu, &av1_mf2_bwdref_offset, val);
1786 
1787 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1788 		hantro_reg_write(vpu, &av1_mf2_altref2_offset, val);
1789 
1790 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1791 		hantro_reg_write(vpu, &av1_mf2_altref_offset, val);
1792 	}
1793 
1794 	hantro_reg_write(vpu, &av1_mf3_last_offset, 0);
1795 	hantro_reg_write(vpu, &av1_mf3_last2_offset, 0);
1796 	hantro_reg_write(vpu, &av1_mf3_last3_offset, 0);
1797 	hantro_reg_write(vpu, &av1_mf3_golden_offset, 0);
1798 	hantro_reg_write(vpu, &av1_mf3_bwdref_offset, 0);
1799 	hantro_reg_write(vpu, &av1_mf3_altref2_offset, 0);
1800 	hantro_reg_write(vpu, &av1_mf3_altref_offset, 0);
1801 
1802 	if (use_ref_frame_mvs && ref_ind > 2 &&
1803 	    cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1804 	    cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1805 		int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[2]);
1806 		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[2]);
1807 		u32 *oh = av1_dec->frame_refs[idx].order_hints;
1808 		int val;
1809 
1810 		hantro_reg_write(vpu, &av1_use_temporal2_mvs, 1);
1811 
1812 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1813 		hantro_reg_write(vpu, &av1_mf3_last_offset, val);
1814 
1815 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1816 		hantro_reg_write(vpu, &av1_mf3_last2_offset, val);
1817 
1818 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1819 		hantro_reg_write(vpu, &av1_mf3_last3_offset, val);
1820 
1821 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1822 		hantro_reg_write(vpu, &av1_mf3_golden_offset, val);
1823 
1824 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1825 		hantro_reg_write(vpu, &av1_mf3_bwdref_offset, val);
1826 
1827 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1828 		hantro_reg_write(vpu, &av1_mf3_altref2_offset, val);
1829 
1830 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1831 		hantro_reg_write(vpu, &av1_mf3_altref_offset, val);
1832 	}
1833 
1834 	hantro_reg_write(vpu, &av1_cur_last_offset, cur_offset[0]);
1835 	hantro_reg_write(vpu, &av1_cur_last2_offset, cur_offset[1]);
1836 	hantro_reg_write(vpu, &av1_cur_last3_offset, cur_offset[2]);
1837 	hantro_reg_write(vpu, &av1_cur_golden_offset, cur_offset[3]);
1838 	hantro_reg_write(vpu, &av1_cur_bwdref_offset, cur_offset[4]);
1839 	hantro_reg_write(vpu, &av1_cur_altref2_offset, cur_offset[5]);
1840 	hantro_reg_write(vpu, &av1_cur_altref_offset, cur_offset[6]);
1841 
1842 	hantro_reg_write(vpu, &av1_cur_last_roffset, cur_roffset[0]);
1843 	hantro_reg_write(vpu, &av1_cur_last2_roffset, cur_roffset[1]);
1844 	hantro_reg_write(vpu, &av1_cur_last3_roffset, cur_roffset[2]);
1845 	hantro_reg_write(vpu, &av1_cur_golden_roffset, cur_roffset[3]);
1846 	hantro_reg_write(vpu, &av1_cur_bwdref_roffset, cur_roffset[4]);
1847 	hantro_reg_write(vpu, &av1_cur_altref2_roffset, cur_roffset[5]);
1848 	hantro_reg_write(vpu, &av1_cur_altref_roffset, cur_roffset[6]);
1849 
1850 	hantro_reg_write(vpu, &av1_mf1_type, mf_types[0] - V4L2_AV1_REF_LAST_FRAME);
1851 	hantro_reg_write(vpu, &av1_mf2_type, mf_types[1] - V4L2_AV1_REF_LAST_FRAME);
1852 	hantro_reg_write(vpu, &av1_mf3_type, mf_types[2] - V4L2_AV1_REF_LAST_FRAME);
1853 }
1854 
rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx * ctx)1855 static void rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx *ctx)
1856 {
1857 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1858 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1859 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1860 	int frame_type = frame->frame_type;
1861 	bool allow_intrabc = !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC);
1862 	int ref_count[AV1DEC_MAX_PIC_BUFFERS] = { 0 };
1863 	struct hantro_dev *vpu = ctx->dev;
1864 	int i, ref_frames = 0;
1865 	bool scale_enable = false;
1866 
1867 	if (IS_INTRA(frame_type) && !allow_intrabc)
1868 		return;
1869 
1870 	if (!allow_intrabc) {
1871 		for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) {
1872 			int idx = rockchip_vpu981_get_frame_index(ctx, i);
1873 
1874 			if (idx >= 0)
1875 				ref_count[idx]++;
1876 		}
1877 
1878 		for (i = 0; i < AV1DEC_MAX_PIC_BUFFERS; i++) {
1879 			if (ref_count[i])
1880 				ref_frames++;
1881 		}
1882 	} else {
1883 		ref_frames = 1;
1884 	}
1885 	hantro_reg_write(vpu, &av1_ref_frames, ref_frames);
1886 
1887 	rockchip_vpu981_av1_dec_set_frame_sign_bias(ctx);
1888 
1889 	for (i = V4L2_AV1_REF_LAST_FRAME; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++) {
1890 		u32 ref = i - 1;
1891 		int idx = 0;
1892 		int width, height;
1893 
1894 		if (allow_intrabc) {
1895 			idx = av1_dec->current_frame_index;
1896 			width = frame->frame_width_minus_1 + 1;
1897 			height = frame->frame_height_minus_1 + 1;
1898 		} else {
1899 			if (rockchip_vpu981_get_frame_index(ctx, ref) > 0)
1900 				idx = rockchip_vpu981_get_frame_index(ctx, ref);
1901 			width = av1_dec->frame_refs[idx].width;
1902 			height = av1_dec->frame_refs[idx].height;
1903 		}
1904 
1905 		scale_enable |=
1906 		    rockchip_vpu981_av1_dec_set_ref(ctx, ref, idx, width,
1907 						    height);
1908 
1909 		rockchip_vpu981_av1_dec_set_sign_bias(ctx, ref,
1910 						      av1_dec->ref_frame_sign_bias[i]);
1911 	}
1912 	hantro_reg_write(vpu, &av1_ref_scaling_enable, scale_enable);
1913 
1914 	hantro_reg_write(vpu, &av1_ref0_gm_mode,
1915 			 frame->global_motion.type[V4L2_AV1_REF_LAST_FRAME]);
1916 	hantro_reg_write(vpu, &av1_ref1_gm_mode,
1917 			 frame->global_motion.type[V4L2_AV1_REF_LAST2_FRAME]);
1918 	hantro_reg_write(vpu, &av1_ref2_gm_mode,
1919 			 frame->global_motion.type[V4L2_AV1_REF_LAST3_FRAME]);
1920 	hantro_reg_write(vpu, &av1_ref3_gm_mode,
1921 			 frame->global_motion.type[V4L2_AV1_REF_GOLDEN_FRAME]);
1922 	hantro_reg_write(vpu, &av1_ref4_gm_mode,
1923 			 frame->global_motion.type[V4L2_AV1_REF_BWDREF_FRAME]);
1924 	hantro_reg_write(vpu, &av1_ref5_gm_mode,
1925 			 frame->global_motion.type[V4L2_AV1_REF_ALTREF2_FRAME]);
1926 	hantro_reg_write(vpu, &av1_ref6_gm_mode,
1927 			 frame->global_motion.type[V4L2_AV1_REF_ALTREF_FRAME]);
1928 
1929 	rockchip_vpu981_av1_dec_set_other_frames(ctx);
1930 }
1931 
rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx * ctx)1932 static void rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx *ctx)
1933 {
1934 	struct hantro_dev *vpu = ctx->dev;
1935 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1936 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1937 
1938 	hantro_reg_write(vpu, &av1_skip_mode,
1939 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SKIP_MODE_PRESENT));
1940 	hantro_reg_write(vpu, &av1_tempor_mvp_e,
1941 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS));
1942 	hantro_reg_write(vpu, &av1_delta_lf_res_log,
1943 			 ctrls->frame->loop_filter.delta_lf_res);
1944 	hantro_reg_write(vpu, &av1_delta_lf_multi,
1945 			 !!(ctrls->frame->loop_filter.flags
1946 			    & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI));
1947 	hantro_reg_write(vpu, &av1_delta_lf_present,
1948 			 !!(ctrls->frame->loop_filter.flags
1949 			    & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT));
1950 	hantro_reg_write(vpu, &av1_disable_cdf_update,
1951 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_CDF_UPDATE));
1952 	hantro_reg_write(vpu, &av1_allow_warp,
1953 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_WARPED_MOTION));
1954 	hantro_reg_write(vpu, &av1_show_frame,
1955 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SHOW_FRAME));
1956 	hantro_reg_write(vpu, &av1_switchable_motion_mode,
1957 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_IS_MOTION_MODE_SWITCHABLE));
1958 	hantro_reg_write(vpu, &av1_enable_cdef,
1959 			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF));
1960 	hantro_reg_write(vpu, &av1_allow_masked_compound,
1961 			 !!(ctrls->sequence->flags
1962 			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_MASKED_COMPOUND));
1963 	hantro_reg_write(vpu, &av1_allow_interintra,
1964 			 !!(ctrls->sequence->flags
1965 			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTERINTRA_COMPOUND));
1966 	hantro_reg_write(vpu, &av1_enable_intra_edge_filter,
1967 			 !!(ctrls->sequence->flags
1968 			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTRA_EDGE_FILTER));
1969 	hantro_reg_write(vpu, &av1_allow_filter_intra,
1970 			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_FILTER_INTRA));
1971 	hantro_reg_write(vpu, &av1_enable_jnt_comp,
1972 			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_JNT_COMP));
1973 	hantro_reg_write(vpu, &av1_enable_dual_filter,
1974 			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_DUAL_FILTER));
1975 	hantro_reg_write(vpu, &av1_reduced_tx_set_used,
1976 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REDUCED_TX_SET));
1977 	hantro_reg_write(vpu, &av1_allow_screen_content_tools,
1978 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS));
1979 	hantro_reg_write(vpu, &av1_allow_intrabc,
1980 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC));
1981 
1982 	if (!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS))
1983 		hantro_reg_write(vpu, &av1_force_interger_mv, 0);
1984 	else
1985 		hantro_reg_write(vpu, &av1_force_interger_mv,
1986 				 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_FORCE_INTEGER_MV));
1987 
1988 	hantro_reg_write(vpu, &av1_blackwhite_e, 0);
1989 	hantro_reg_write(vpu, &av1_delta_q_res_log, ctrls->frame->quantization.delta_q_res);
1990 	hantro_reg_write(vpu, &av1_delta_q_present,
1991 			 !!(ctrls->frame->quantization.flags
1992 			    & V4L2_AV1_QUANTIZATION_FLAG_DELTA_Q_PRESENT));
1993 
1994 	hantro_reg_write(vpu, &av1_idr_pic_e, !ctrls->frame->frame_type);
1995 	hantro_reg_write(vpu, &av1_quant_base_qindex, ctrls->frame->quantization.base_q_idx);
1996 	hantro_reg_write(vpu, &av1_bit_depth_y_minus8, ctx->bit_depth - 8);
1997 	hantro_reg_write(vpu, &av1_bit_depth_c_minus8, ctx->bit_depth - 8);
1998 
1999 	hantro_reg_write(vpu, &av1_mcomp_filt_type, ctrls->frame->interpolation_filter);
2000 	hantro_reg_write(vpu, &av1_high_prec_mv_e,
2001 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_HIGH_PRECISION_MV));
2002 	hantro_reg_write(vpu, &av1_comp_pred_mode,
2003 			 (ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REFERENCE_SELECT) ? 2 : 0);
2004 	hantro_reg_write(vpu, &av1_transform_mode, (ctrls->frame->tx_mode == 1) ? 3 : 4);
2005 	hantro_reg_write(vpu, &av1_max_cb_size,
2006 			 (ctrls->sequence->flags
2007 			  & V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK) ? 7 : 6);
2008 	hantro_reg_write(vpu, &av1_min_cb_size, 3);
2009 
2010 	hantro_reg_write(vpu, &av1_comp_pred_fixed_ref, 0);
2011 	hantro_reg_write(vpu, &av1_comp_pred_var_ref0_av1, 0);
2012 	hantro_reg_write(vpu, &av1_comp_pred_var_ref1_av1, 0);
2013 	hantro_reg_write(vpu, &av1_filt_level_seg0, 0);
2014 	hantro_reg_write(vpu, &av1_filt_level_seg1, 0);
2015 	hantro_reg_write(vpu, &av1_filt_level_seg2, 0);
2016 	hantro_reg_write(vpu, &av1_filt_level_seg3, 0);
2017 	hantro_reg_write(vpu, &av1_filt_level_seg4, 0);
2018 	hantro_reg_write(vpu, &av1_filt_level_seg5, 0);
2019 	hantro_reg_write(vpu, &av1_filt_level_seg6, 0);
2020 	hantro_reg_write(vpu, &av1_filt_level_seg7, 0);
2021 
2022 	hantro_reg_write(vpu, &av1_qp_delta_y_dc_av1, ctrls->frame->quantization.delta_q_y_dc);
2023 	hantro_reg_write(vpu, &av1_qp_delta_ch_dc_av1, ctrls->frame->quantization.delta_q_u_dc);
2024 	hantro_reg_write(vpu, &av1_qp_delta_ch_ac_av1, ctrls->frame->quantization.delta_q_u_ac);
2025 	if (ctrls->frame->quantization.flags & V4L2_AV1_QUANTIZATION_FLAG_USING_QMATRIX) {
2026 		hantro_reg_write(vpu, &av1_qmlevel_y, ctrls->frame->quantization.qm_y);
2027 		hantro_reg_write(vpu, &av1_qmlevel_u, ctrls->frame->quantization.qm_u);
2028 		hantro_reg_write(vpu, &av1_qmlevel_v, ctrls->frame->quantization.qm_v);
2029 	} else {
2030 		hantro_reg_write(vpu, &av1_qmlevel_y, 0xff);
2031 		hantro_reg_write(vpu, &av1_qmlevel_u, 0xff);
2032 		hantro_reg_write(vpu, &av1_qmlevel_v, 0xff);
2033 	}
2034 
2035 	hantro_reg_write(vpu, &av1_lossless_e, rockchip_vpu981_av1_dec_is_lossless(ctx));
2036 	hantro_reg_write(vpu, &av1_quant_delta_v_dc, ctrls->frame->quantization.delta_q_v_dc);
2037 	hantro_reg_write(vpu, &av1_quant_delta_v_ac, ctrls->frame->quantization.delta_q_v_ac);
2038 
2039 	hantro_reg_write(vpu, &av1_skip_ref0,
2040 			 (ctrls->frame->skip_mode_frame[0]) ? ctrls->frame->skip_mode_frame[0] : 1);
2041 	hantro_reg_write(vpu, &av1_skip_ref1,
2042 			 (ctrls->frame->skip_mode_frame[1]) ? ctrls->frame->skip_mode_frame[1] : 1);
2043 
2044 	hantro_write_addr(vpu, AV1_MC_SYNC_CURR, av1_dec->tile_buf.dma);
2045 	hantro_write_addr(vpu, AV1_MC_SYNC_LEFT, av1_dec->tile_buf.dma);
2046 }
2047 
2048 static void
rockchip_vpu981_av1_dec_set_input_buffer(struct hantro_ctx * ctx,struct vb2_v4l2_buffer * vb2_src)2049 rockchip_vpu981_av1_dec_set_input_buffer(struct hantro_ctx *ctx,
2050 					 struct vb2_v4l2_buffer *vb2_src)
2051 {
2052 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
2053 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
2054 	const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
2055 	    ctrls->tile_group_entry;
2056 	struct hantro_dev *vpu = ctx->dev;
2057 	dma_addr_t src_dma;
2058 	u32 src_len, src_buf_len;
2059 	int start_bit, offset;
2060 
2061 	src_dma = vb2_dma_contig_plane_dma_addr(&vb2_src->vb2_buf, 0);
2062 	src_len = vb2_get_plane_payload(&vb2_src->vb2_buf, 0);
2063 	src_buf_len = vb2_plane_size(&vb2_src->vb2_buf, 0);
2064 
2065 	start_bit = (group_entry[0].tile_offset & 0xf) * 8;
2066 	offset = group_entry[0].tile_offset & ~0xf;
2067 
2068 	hantro_reg_write(vpu, &av1_strm_buffer_len, src_buf_len);
2069 	hantro_reg_write(vpu, &av1_strm_start_bit, start_bit);
2070 	hantro_reg_write(vpu, &av1_stream_len, src_len);
2071 	hantro_reg_write(vpu, &av1_strm_start_offset, 0);
2072 	hantro_write_addr(vpu, AV1_INPUT_STREAM, src_dma + offset);
2073 }
2074 
2075 static void
rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx * ctx)2076 rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx *ctx)
2077 {
2078 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
2079 	struct hantro_dev *vpu = ctx->dev;
2080 	struct hantro_decoded_buffer *dst;
2081 	struct vb2_v4l2_buffer *vb2_dst;
2082 	dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
2083 	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
2084 	size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
2085 
2086 	vb2_dst = av1_dec->frame_refs[av1_dec->current_frame_index].vb2_ref;
2087 	dst = vb2_to_hantro_decoded_buf(&vb2_dst->vb2_buf);
2088 	luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
2089 	chroma_addr = luma_addr + cr_offset;
2090 	mv_addr = luma_addr + mv_offset;
2091 
2092 	hantro_write_addr(vpu, AV1_TILE_OUT_LU, luma_addr);
2093 	hantro_write_addr(vpu, AV1_TILE_OUT_CH, chroma_addr);
2094 	hantro_write_addr(vpu, AV1_TILE_OUT_MV, mv_addr);
2095 }
2096 
rockchip_vpu981_av1_dec_run(struct hantro_ctx * ctx)2097 int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx)
2098 {
2099 	struct hantro_dev *vpu = ctx->dev;
2100 	struct vb2_v4l2_buffer *vb2_src;
2101 	int ret;
2102 
2103 	hantro_start_prepare_run(ctx);
2104 
2105 	ret = rockchip_vpu981_av1_dec_prepare_run(ctx);
2106 	if (ret)
2107 		goto prepare_error;
2108 
2109 	vb2_src = hantro_get_src_buf(ctx);
2110 	if (!vb2_src) {
2111 		ret = -EINVAL;
2112 		goto prepare_error;
2113 	}
2114 
2115 	rockchip_vpu981_av1_dec_clean_refs(ctx);
2116 	rockchip_vpu981_av1_dec_frame_ref(ctx, vb2_src->vb2_buf.timestamp);
2117 
2118 	rockchip_vpu981_av1_dec_set_parameters(ctx);
2119 	rockchip_vpu981_av1_dec_set_global_model(ctx);
2120 	rockchip_vpu981_av1_dec_set_tile_info(ctx);
2121 	rockchip_vpu981_av1_dec_set_reference_frames(ctx);
2122 	rockchip_vpu981_av1_dec_set_segmentation(ctx);
2123 	rockchip_vpu981_av1_dec_set_loopfilter(ctx);
2124 	rockchip_vpu981_av1_dec_set_picture_dimensions(ctx);
2125 	rockchip_vpu981_av1_dec_set_cdef(ctx);
2126 	rockchip_vpu981_av1_dec_set_lr(ctx);
2127 	rockchip_vpu981_av1_dec_set_fgs(ctx);
2128 	rockchip_vpu981_av1_dec_set_prob(ctx);
2129 
2130 	hantro_reg_write(vpu, &av1_dec_mode, AV1_DEC_MODE);
2131 	hantro_reg_write(vpu, &av1_dec_out_ec_byte_word, 0);
2132 	hantro_reg_write(vpu, &av1_write_mvs_e, 1);
2133 	hantro_reg_write(vpu, &av1_dec_out_ec_bypass, 1);
2134 	hantro_reg_write(vpu, &av1_dec_clk_gate_e, 1);
2135 
2136 	hantro_reg_write(vpu, &av1_dec_abort_e, 0);
2137 	hantro_reg_write(vpu, &av1_dec_tile_int_e, 0);
2138 
2139 	hantro_reg_write(vpu, &av1_dec_alignment, 64);
2140 	hantro_reg_write(vpu, &av1_apf_disable, 0);
2141 	hantro_reg_write(vpu, &av1_apf_threshold, 8);
2142 	hantro_reg_write(vpu, &av1_dec_buswidth, 2);
2143 	hantro_reg_write(vpu, &av1_dec_max_burst, 16);
2144 	hantro_reg_write(vpu, &av1_error_conceal_e, 0);
2145 	hantro_reg_write(vpu, &av1_axi_rd_ostd_threshold, 64);
2146 	hantro_reg_write(vpu, &av1_axi_wr_ostd_threshold, 64);
2147 
2148 	hantro_reg_write(vpu, &av1_ext_timeout_cycles, 0xfffffff);
2149 	hantro_reg_write(vpu, &av1_ext_timeout_override_e, 1);
2150 	hantro_reg_write(vpu, &av1_timeout_cycles, 0xfffffff);
2151 	hantro_reg_write(vpu, &av1_timeout_override_e, 1);
2152 
2153 	rockchip_vpu981_av1_dec_set_output_buffer(ctx);
2154 	rockchip_vpu981_av1_dec_set_input_buffer(ctx, vb2_src);
2155 
2156 	hantro_end_prepare_run(ctx);
2157 
2158 	hantro_reg_write(vpu, &av1_dec_e, 1);
2159 
2160 	return 0;
2161 
2162 prepare_error:
2163 	hantro_end_prepare_run(ctx);
2164 	hantro_irq_done(vpu, VB2_BUF_STATE_ERROR);
2165 	return ret;
2166 }
2167 
rockchip_vpu981_postproc_enable(struct hantro_ctx * ctx)2168 static void rockchip_vpu981_postproc_enable(struct hantro_ctx *ctx)
2169 {
2170 	struct hantro_dev *vpu = ctx->dev;
2171 	int width = ctx->dst_fmt.width;
2172 	int height = ctx->dst_fmt.height;
2173 	struct vb2_v4l2_buffer *vb2_dst;
2174 	size_t chroma_offset;
2175 	dma_addr_t dst_dma;
2176 
2177 	vb2_dst = hantro_get_dst_buf(ctx);
2178 
2179 	dst_dma = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
2180 	chroma_offset = ctx->dst_fmt.plane_fmt[0].bytesperline *
2181 	    ctx->dst_fmt.height;
2182 
2183 	/* enable post processor */
2184 	hantro_reg_write(vpu, &av1_pp_out_e, 1);
2185 	hantro_reg_write(vpu, &av1_pp_in_format, 0);
2186 	hantro_reg_write(vpu, &av1_pp0_dup_hor, 1);
2187 	hantro_reg_write(vpu, &av1_pp0_dup_ver, 1);
2188 
2189 	hantro_reg_write(vpu, &av1_pp_in_height, height / 2);
2190 	hantro_reg_write(vpu, &av1_pp_in_width, width / 2);
2191 	hantro_reg_write(vpu, &av1_pp_out_height, height);
2192 	hantro_reg_write(vpu, &av1_pp_out_width, width);
2193 	hantro_reg_write(vpu, &av1_pp_out_y_stride,
2194 			 ctx->dst_fmt.plane_fmt[0].bytesperline);
2195 	hantro_reg_write(vpu, &av1_pp_out_c_stride,
2196 			 ctx->dst_fmt.plane_fmt[0].bytesperline);
2197 	switch (ctx->dst_fmt.pixelformat) {
2198 	case V4L2_PIX_FMT_P010:
2199 		hantro_reg_write(vpu, &av1_pp_out_format, 1);
2200 		break;
2201 	case V4L2_PIX_FMT_NV12:
2202 		hantro_reg_write(vpu, &av1_pp_out_format, 3);
2203 		break;
2204 	default:
2205 		hantro_reg_write(vpu, &av1_pp_out_format, 0);
2206 	}
2207 
2208 	hantro_reg_write(vpu, &av1_ppd_blend_exist, 0);
2209 	hantro_reg_write(vpu, &av1_ppd_dith_exist, 0);
2210 	hantro_reg_write(vpu, &av1_ablend_crop_e, 0);
2211 	hantro_reg_write(vpu, &av1_pp_format_customer1_e, 0);
2212 	hantro_reg_write(vpu, &av1_pp_crop_exist, 0);
2213 	hantro_reg_write(vpu, &av1_pp_up_level, 0);
2214 	hantro_reg_write(vpu, &av1_pp_down_level, 0);
2215 	hantro_reg_write(vpu, &av1_pp_exist, 0);
2216 
2217 	hantro_write_addr(vpu, AV1_PP_OUT_LU, dst_dma);
2218 	hantro_write_addr(vpu, AV1_PP_OUT_CH, dst_dma + chroma_offset);
2219 }
2220 
rockchip_vpu981_postproc_disable(struct hantro_ctx * ctx)2221 static void rockchip_vpu981_postproc_disable(struct hantro_ctx *ctx)
2222 {
2223 	struct hantro_dev *vpu = ctx->dev;
2224 
2225 	/* disable post processor */
2226 	hantro_reg_write(vpu, &av1_pp_out_e, 0);
2227 }
2228 
2229 const struct hantro_postproc_ops rockchip_vpu981_postproc_ops = {
2230 	.enable = rockchip_vpu981_postproc_enable,
2231 	.disable = rockchip_vpu981_postproc_disable,
2232 };
2233