xref: /linux/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c (revision 3a39d672e7f48b8d6b91a09afa4b55352773b4b5)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2023, Collabora
4  *
5  * Author: Benjamin Gaignard <benjamin.gaignard@collabora.com>
6  */
7 
8 #include <media/v4l2-mem2mem.h>
9 #include "hantro.h"
10 #include "hantro_v4l2.h"
11 #include "rockchip_vpu981_regs.h"
12 
13 #define AV1_DEC_MODE		17
14 #define GM_GLOBAL_MODELS_PER_FRAME	7
15 #define GLOBAL_MODEL_TOTAL_SIZE	(6 * 4 + 4 * 2)
16 #define GLOBAL_MODEL_SIZE	ALIGN(GM_GLOBAL_MODELS_PER_FRAME * GLOBAL_MODEL_TOTAL_SIZE, 2048)
17 #define AV1_MAX_TILES		128
18 #define AV1_TILE_INFO_SIZE	(AV1_MAX_TILES * 16)
19 #define AV1DEC_MAX_PIC_BUFFERS	24
20 #define AV1_REF_SCALE_SHIFT	14
21 #define AV1_INVALID_IDX		-1
22 #define MAX_FRAME_DISTANCE	31
23 #define AV1_PRIMARY_REF_NONE	7
24 #define AV1_TILE_SIZE		ALIGN(32 * 128, 4096)
25 /*
26  * These 3 values aren't defined enum v4l2_av1_segment_feature because
27  * they are not part of the specification
28  */
29 #define V4L2_AV1_SEG_LVL_ALT_LF_Y_H	2
30 #define V4L2_AV1_SEG_LVL_ALT_LF_U	3
31 #define V4L2_AV1_SEG_LVL_ALT_LF_V	4
32 
33 #define SUPERRES_SCALE_BITS 3
34 #define SCALE_NUMERATOR 8
35 #define SUPERRES_SCALE_DENOMINATOR_MIN (SCALE_NUMERATOR + 1)
36 
37 #define RS_SUBPEL_BITS 6
38 #define RS_SUBPEL_MASK ((1 << RS_SUBPEL_BITS) - 1)
39 #define RS_SCALE_SUBPEL_BITS 14
40 #define RS_SCALE_SUBPEL_MASK ((1 << RS_SCALE_SUBPEL_BITS) - 1)
41 #define RS_SCALE_EXTRA_BITS (RS_SCALE_SUBPEL_BITS - RS_SUBPEL_BITS)
42 #define RS_SCALE_EXTRA_OFF (1 << (RS_SCALE_EXTRA_BITS - 1))
43 
44 #define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
45 
46 #define LST_BUF_IDX (V4L2_AV1_REF_LAST_FRAME - V4L2_AV1_REF_LAST_FRAME)
47 #define LST2_BUF_IDX (V4L2_AV1_REF_LAST2_FRAME - V4L2_AV1_REF_LAST_FRAME)
48 #define LST3_BUF_IDX (V4L2_AV1_REF_LAST3_FRAME - V4L2_AV1_REF_LAST_FRAME)
49 #define GLD_BUF_IDX (V4L2_AV1_REF_GOLDEN_FRAME - V4L2_AV1_REF_LAST_FRAME)
50 #define BWD_BUF_IDX (V4L2_AV1_REF_BWDREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
51 #define ALT2_BUF_IDX (V4L2_AV1_REF_ALTREF2_FRAME - V4L2_AV1_REF_LAST_FRAME)
52 #define ALT_BUF_IDX (V4L2_AV1_REF_ALTREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
53 
54 #define DIV_LUT_PREC_BITS 14
55 #define DIV_LUT_BITS 8
56 #define DIV_LUT_NUM BIT(DIV_LUT_BITS)
57 #define WARP_PARAM_REDUCE_BITS 6
58 #define WARPEDMODEL_PREC_BITS 16
59 
60 #define AV1_DIV_ROUND_UP_POW2(value, n)			\
61 ({							\
62 	typeof(n) _n  = n;				\
63 	typeof(value) _value = value;			\
64 	(_value + (BIT(_n) >> 1)) >> _n;		\
65 })
66 
67 #define AV1_DIV_ROUND_UP_POW2_SIGNED(value, n)				\
68 ({									\
69 	typeof(n) _n_  = n;						\
70 	typeof(value) _value_ = value;					\
71 	(((_value_) < 0) ? -AV1_DIV_ROUND_UP_POW2(-(_value_), (_n_))	\
72 		: AV1_DIV_ROUND_UP_POW2((_value_), (_n_)));		\
73 })
74 
75 struct rockchip_av1_film_grain {
76 	u8 scaling_lut_y[256];
77 	u8 scaling_lut_cb[256];
78 	u8 scaling_lut_cr[256];
79 	s16 cropped_luma_grain_block[4096];
80 	s16 cropped_chroma_grain_block[1024 * 2];
81 };
82 
83 static const short div_lut[DIV_LUT_NUM + 1] = {
84 	16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
85 	15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
86 	15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
87 	14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
88 	13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
89 	13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
90 	13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
91 	12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
92 	12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
93 	11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
94 	11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
95 	11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
96 	10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
97 	10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
98 	10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
99 	9963,  9939,  9916,  9892,  9869,  9846,  9823,  9800,  9777,  9754,  9732,
100 	9709,  9687,  9664,  9642,  9620,  9598,  9576,  9554,  9533,  9511,  9489,
101 	9468,  9447,  9425,  9404,  9383,  9362,  9341,  9321,  9300,  9279,  9259,
102 	9239,  9218,  9198,  9178,  9158,  9138,  9118,  9098,  9079,  9059,  9039,
103 	9020,  9001,  8981,  8962,  8943,  8924,  8905,  8886,  8867,  8849,  8830,
104 	8812,  8793,  8775,  8756,  8738,  8720,  8702,  8684,  8666,  8648,  8630,
105 	8613,  8595,  8577,  8560,  8542,  8525,  8508,  8490,  8473,  8456,  8439,
106 	8422,  8405,  8389,  8372,  8355,  8339,  8322,  8306,  8289,  8273,  8257,
107 	8240,  8224,  8208,  8192,
108 };
109 
rockchip_vpu981_get_frame_index(struct hantro_ctx * ctx,int ref)110 static int rockchip_vpu981_get_frame_index(struct hantro_ctx *ctx, int ref)
111 {
112 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
113 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
114 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
115 	u64 timestamp;
116 	int i, idx = frame->ref_frame_idx[ref];
117 
118 	if (idx >= V4L2_AV1_TOTAL_REFS_PER_FRAME || idx < 0)
119 		return AV1_INVALID_IDX;
120 
121 	timestamp = frame->reference_frame_ts[idx];
122 	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
123 		if (!av1_dec->frame_refs[i].used)
124 			continue;
125 		if (av1_dec->frame_refs[i].timestamp == timestamp)
126 			return i;
127 	}
128 
129 	return AV1_INVALID_IDX;
130 }
131 
rockchip_vpu981_get_order_hint(struct hantro_ctx * ctx,int ref)132 static int rockchip_vpu981_get_order_hint(struct hantro_ctx *ctx, int ref)
133 {
134 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
135 	int idx = rockchip_vpu981_get_frame_index(ctx, ref);
136 
137 	if (idx != AV1_INVALID_IDX)
138 		return av1_dec->frame_refs[idx].order_hint;
139 
140 	return 0;
141 }
142 
rockchip_vpu981_av1_dec_frame_ref(struct hantro_ctx * ctx,u64 timestamp)143 static int rockchip_vpu981_av1_dec_frame_ref(struct hantro_ctx *ctx,
144 					     u64 timestamp)
145 {
146 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
147 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
148 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
149 	int i;
150 
151 	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
152 		int j;
153 
154 		if (av1_dec->frame_refs[i].used)
155 			continue;
156 
157 		av1_dec->frame_refs[i].width = frame->frame_width_minus_1 + 1;
158 		av1_dec->frame_refs[i].height = frame->frame_height_minus_1 + 1;
159 		av1_dec->frame_refs[i].mi_cols = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
160 		av1_dec->frame_refs[i].mi_rows = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
161 		av1_dec->frame_refs[i].timestamp = timestamp;
162 		av1_dec->frame_refs[i].frame_type = frame->frame_type;
163 		av1_dec->frame_refs[i].order_hint = frame->order_hint;
164 		if (!av1_dec->frame_refs[i].vb2_ref)
165 			av1_dec->frame_refs[i].vb2_ref = hantro_get_dst_buf(ctx);
166 
167 		for (j = 0; j < V4L2_AV1_TOTAL_REFS_PER_FRAME; j++)
168 			av1_dec->frame_refs[i].order_hints[j] = frame->order_hints[j];
169 		av1_dec->frame_refs[i].used = true;
170 		av1_dec->current_frame_index = i;
171 
172 		return i;
173 	}
174 
175 	return AV1_INVALID_IDX;
176 }
177 
rockchip_vpu981_av1_dec_frame_unref(struct hantro_ctx * ctx,int idx)178 static void rockchip_vpu981_av1_dec_frame_unref(struct hantro_ctx *ctx, int idx)
179 {
180 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
181 
182 	if (idx >= 0)
183 		av1_dec->frame_refs[idx].used = false;
184 }
185 
rockchip_vpu981_av1_dec_clean_refs(struct hantro_ctx * ctx)186 static void rockchip_vpu981_av1_dec_clean_refs(struct hantro_ctx *ctx)
187 {
188 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
189 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
190 
191 	int ref, idx;
192 
193 	for (idx = 0; idx < AV1_MAX_FRAME_BUF_COUNT; idx++) {
194 		u64 timestamp = av1_dec->frame_refs[idx].timestamp;
195 		bool used = false;
196 
197 		if (!av1_dec->frame_refs[idx].used)
198 			continue;
199 
200 		for (ref = 0; ref < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref++) {
201 			if (ctrls->frame->reference_frame_ts[ref] == timestamp)
202 				used = true;
203 		}
204 
205 		if (!used)
206 			rockchip_vpu981_av1_dec_frame_unref(ctx, idx);
207 	}
208 }
209 
rockchip_vpu981_av1_dec_luma_size(struct hantro_ctx * ctx)210 static size_t rockchip_vpu981_av1_dec_luma_size(struct hantro_ctx *ctx)
211 {
212 	return ctx->dst_fmt.width * ctx->dst_fmt.height * ctx->bit_depth / 8;
213 }
214 
rockchip_vpu981_av1_dec_chroma_size(struct hantro_ctx * ctx)215 static size_t rockchip_vpu981_av1_dec_chroma_size(struct hantro_ctx *ctx)
216 {
217 	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
218 
219 	return ALIGN((cr_offset * 3) / 2, 64);
220 }
221 
rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx * ctx)222 static void rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx *ctx)
223 {
224 	struct hantro_dev *vpu = ctx->dev;
225 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
226 
227 	if (av1_dec->db_data_col.cpu)
228 		dma_free_coherent(vpu->dev, av1_dec->db_data_col.size,
229 				  av1_dec->db_data_col.cpu,
230 				  av1_dec->db_data_col.dma);
231 	av1_dec->db_data_col.cpu = NULL;
232 
233 	if (av1_dec->db_ctrl_col.cpu)
234 		dma_free_coherent(vpu->dev, av1_dec->db_ctrl_col.size,
235 				  av1_dec->db_ctrl_col.cpu,
236 				  av1_dec->db_ctrl_col.dma);
237 	av1_dec->db_ctrl_col.cpu = NULL;
238 
239 	if (av1_dec->cdef_col.cpu)
240 		dma_free_coherent(vpu->dev, av1_dec->cdef_col.size,
241 				  av1_dec->cdef_col.cpu, av1_dec->cdef_col.dma);
242 	av1_dec->cdef_col.cpu = NULL;
243 
244 	if (av1_dec->sr_col.cpu)
245 		dma_free_coherent(vpu->dev, av1_dec->sr_col.size,
246 				  av1_dec->sr_col.cpu, av1_dec->sr_col.dma);
247 	av1_dec->sr_col.cpu = NULL;
248 
249 	if (av1_dec->lr_col.cpu)
250 		dma_free_coherent(vpu->dev, av1_dec->lr_col.size,
251 				  av1_dec->lr_col.cpu, av1_dec->lr_col.dma);
252 	av1_dec->lr_col.cpu = NULL;
253 }
254 
rockchip_vpu981_av1_dec_tiles_reallocate(struct hantro_ctx * ctx)255 static int rockchip_vpu981_av1_dec_tiles_reallocate(struct hantro_ctx *ctx)
256 {
257 	struct hantro_dev *vpu = ctx->dev;
258 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
259 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
260 	const struct v4l2_av1_tile_info *tile_info = &ctrls->frame->tile_info;
261 	unsigned int num_tile_cols = tile_info->tile_cols;
262 	unsigned int height = ALIGN(ctrls->frame->frame_height_minus_1 + 1, 64);
263 	unsigned int height_in_sb = height / 64;
264 	unsigned int stripe_num = ((height + 8) + 63) / 64;
265 	size_t size;
266 
267 	if (av1_dec->db_data_col.size >=
268 	    ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols)
269 		return 0;
270 
271 	rockchip_vpu981_av1_dec_tiles_free(ctx);
272 
273 	size = ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols;
274 	av1_dec->db_data_col.cpu = dma_alloc_coherent(vpu->dev, size,
275 						      &av1_dec->db_data_col.dma,
276 						      GFP_KERNEL);
277 	if (!av1_dec->db_data_col.cpu)
278 		goto buffer_allocation_error;
279 	av1_dec->db_data_col.size = size;
280 
281 	size = ALIGN(height * 2 * 16 / 4, 128) * num_tile_cols;
282 	av1_dec->db_ctrl_col.cpu = dma_alloc_coherent(vpu->dev, size,
283 						      &av1_dec->db_ctrl_col.dma,
284 						      GFP_KERNEL);
285 	if (!av1_dec->db_ctrl_col.cpu)
286 		goto buffer_allocation_error;
287 	av1_dec->db_ctrl_col.size = size;
288 
289 	size = ALIGN(height_in_sb * 44 * ctx->bit_depth * 16 / 8, 128) * num_tile_cols;
290 	av1_dec->cdef_col.cpu = dma_alloc_coherent(vpu->dev, size,
291 						   &av1_dec->cdef_col.dma,
292 						   GFP_KERNEL);
293 	if (!av1_dec->cdef_col.cpu)
294 		goto buffer_allocation_error;
295 	av1_dec->cdef_col.size = size;
296 
297 	size = ALIGN(height_in_sb * (3040 + 1280), 128) * num_tile_cols;
298 	av1_dec->sr_col.cpu = dma_alloc_coherent(vpu->dev, size,
299 						 &av1_dec->sr_col.dma,
300 						 GFP_KERNEL);
301 	if (!av1_dec->sr_col.cpu)
302 		goto buffer_allocation_error;
303 	av1_dec->sr_col.size = size;
304 
305 	size = ALIGN(stripe_num * 1536 * ctx->bit_depth / 8, 128) * num_tile_cols;
306 	av1_dec->lr_col.cpu = dma_alloc_coherent(vpu->dev, size,
307 						 &av1_dec->lr_col.dma,
308 						 GFP_KERNEL);
309 	if (!av1_dec->lr_col.cpu)
310 		goto buffer_allocation_error;
311 	av1_dec->lr_col.size = size;
312 
313 	av1_dec->num_tile_cols_allocated = num_tile_cols;
314 	return 0;
315 
316 buffer_allocation_error:
317 	rockchip_vpu981_av1_dec_tiles_free(ctx);
318 	return -ENOMEM;
319 }
320 
rockchip_vpu981_av1_dec_exit(struct hantro_ctx * ctx)321 void rockchip_vpu981_av1_dec_exit(struct hantro_ctx *ctx)
322 {
323 	struct hantro_dev *vpu = ctx->dev;
324 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
325 
326 	if (av1_dec->global_model.cpu)
327 		dma_free_coherent(vpu->dev, av1_dec->global_model.size,
328 				  av1_dec->global_model.cpu,
329 				  av1_dec->global_model.dma);
330 	av1_dec->global_model.cpu = NULL;
331 
332 	if (av1_dec->tile_info.cpu)
333 		dma_free_coherent(vpu->dev, av1_dec->tile_info.size,
334 				  av1_dec->tile_info.cpu,
335 				  av1_dec->tile_info.dma);
336 	av1_dec->tile_info.cpu = NULL;
337 
338 	if (av1_dec->film_grain.cpu)
339 		dma_free_coherent(vpu->dev, av1_dec->film_grain.size,
340 				  av1_dec->film_grain.cpu,
341 				  av1_dec->film_grain.dma);
342 	av1_dec->film_grain.cpu = NULL;
343 
344 	if (av1_dec->prob_tbl.cpu)
345 		dma_free_coherent(vpu->dev, av1_dec->prob_tbl.size,
346 				  av1_dec->prob_tbl.cpu, av1_dec->prob_tbl.dma);
347 	av1_dec->prob_tbl.cpu = NULL;
348 
349 	if (av1_dec->prob_tbl_out.cpu)
350 		dma_free_coherent(vpu->dev, av1_dec->prob_tbl_out.size,
351 				  av1_dec->prob_tbl_out.cpu,
352 				  av1_dec->prob_tbl_out.dma);
353 	av1_dec->prob_tbl_out.cpu = NULL;
354 
355 	if (av1_dec->tile_buf.cpu)
356 		dma_free_coherent(vpu->dev, av1_dec->tile_buf.size,
357 				  av1_dec->tile_buf.cpu, av1_dec->tile_buf.dma);
358 	av1_dec->tile_buf.cpu = NULL;
359 
360 	rockchip_vpu981_av1_dec_tiles_free(ctx);
361 }
362 
rockchip_vpu981_av1_dec_init(struct hantro_ctx * ctx)363 int rockchip_vpu981_av1_dec_init(struct hantro_ctx *ctx)
364 {
365 	struct hantro_dev *vpu = ctx->dev;
366 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
367 
368 	memset(av1_dec, 0, sizeof(*av1_dec));
369 
370 	av1_dec->global_model.cpu = dma_alloc_coherent(vpu->dev, GLOBAL_MODEL_SIZE,
371 						       &av1_dec->global_model.dma,
372 						       GFP_KERNEL);
373 	if (!av1_dec->global_model.cpu)
374 		return -ENOMEM;
375 	av1_dec->global_model.size = GLOBAL_MODEL_SIZE;
376 
377 	av1_dec->tile_info.cpu = dma_alloc_coherent(vpu->dev, AV1_MAX_TILES,
378 						    &av1_dec->tile_info.dma,
379 						    GFP_KERNEL);
380 	if (!av1_dec->tile_info.cpu)
381 		return -ENOMEM;
382 	av1_dec->tile_info.size = AV1_MAX_TILES;
383 
384 	av1_dec->film_grain.cpu = dma_alloc_coherent(vpu->dev,
385 						     ALIGN(sizeof(struct rockchip_av1_film_grain), 2048),
386 						     &av1_dec->film_grain.dma,
387 						     GFP_KERNEL);
388 	if (!av1_dec->film_grain.cpu)
389 		return -ENOMEM;
390 	av1_dec->film_grain.size = ALIGN(sizeof(struct rockchip_av1_film_grain), 2048);
391 
392 	av1_dec->prob_tbl.cpu = dma_alloc_coherent(vpu->dev,
393 						   ALIGN(sizeof(struct av1cdfs), 2048),
394 						   &av1_dec->prob_tbl.dma,
395 						   GFP_KERNEL);
396 	if (!av1_dec->prob_tbl.cpu)
397 		return -ENOMEM;
398 	av1_dec->prob_tbl.size = ALIGN(sizeof(struct av1cdfs), 2048);
399 
400 	av1_dec->prob_tbl_out.cpu = dma_alloc_coherent(vpu->dev,
401 						       ALIGN(sizeof(struct av1cdfs), 2048),
402 						       &av1_dec->prob_tbl_out.dma,
403 						       GFP_KERNEL);
404 	if (!av1_dec->prob_tbl_out.cpu)
405 		return -ENOMEM;
406 	av1_dec->prob_tbl_out.size = ALIGN(sizeof(struct av1cdfs), 2048);
407 	av1_dec->cdfs = &av1_dec->default_cdfs;
408 	av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
409 
410 	rockchip_av1_set_default_cdfs(av1_dec->cdfs, av1_dec->cdfs_ndvc);
411 
412 	av1_dec->tile_buf.cpu = dma_alloc_coherent(vpu->dev,
413 						   AV1_TILE_SIZE,
414 						   &av1_dec->tile_buf.dma,
415 						   GFP_KERNEL);
416 	if (!av1_dec->tile_buf.cpu)
417 		return -ENOMEM;
418 	av1_dec->tile_buf.size = AV1_TILE_SIZE;
419 
420 	return 0;
421 }
422 
rockchip_vpu981_av1_dec_prepare_run(struct hantro_ctx * ctx)423 static int rockchip_vpu981_av1_dec_prepare_run(struct hantro_ctx *ctx)
424 {
425 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
426 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
427 
428 	ctrls->sequence = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_SEQUENCE);
429 	if (WARN_ON(!ctrls->sequence))
430 		return -EINVAL;
431 
432 	ctrls->tile_group_entry =
433 	    hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY);
434 	if (WARN_ON(!ctrls->tile_group_entry))
435 		return -EINVAL;
436 
437 	ctrls->frame = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FRAME);
438 	if (WARN_ON(!ctrls->frame))
439 		return -EINVAL;
440 
441 	ctrls->film_grain =
442 	    hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FILM_GRAIN);
443 
444 	return rockchip_vpu981_av1_dec_tiles_reallocate(ctx);
445 }
446 
rockchip_vpu981_av1_dec_get_msb(u32 n)447 static inline int rockchip_vpu981_av1_dec_get_msb(u32 n)
448 {
449 	if (n == 0)
450 		return 0;
451 	return 31 ^ __builtin_clz(n);
452 }
453 
rockchip_vpu981_av1_dec_resolve_divisor_32(u32 d,short * shift)454 static short rockchip_vpu981_av1_dec_resolve_divisor_32(u32 d, short *shift)
455 {
456 	int f;
457 	u64 e;
458 
459 	*shift = rockchip_vpu981_av1_dec_get_msb(d);
460 	/* e is obtained from D after resetting the most significant 1 bit. */
461 	e = d - ((u32)1 << *shift);
462 	/* Get the most significant DIV_LUT_BITS (8) bits of e into f */
463 	if (*shift > DIV_LUT_BITS)
464 		f = AV1_DIV_ROUND_UP_POW2(e, *shift - DIV_LUT_BITS);
465 	else
466 		f = e << (DIV_LUT_BITS - *shift);
467 	if (f > DIV_LUT_NUM)
468 		return -1;
469 	*shift += DIV_LUT_PREC_BITS;
470 	/* Use f as lookup into the precomputed table of multipliers */
471 	return div_lut[f];
472 }
473 
474 static void
rockchip_vpu981_av1_dec_get_shear_params(const u32 * params,s64 * alpha,s64 * beta,s64 * gamma,s64 * delta)475 rockchip_vpu981_av1_dec_get_shear_params(const u32 *params, s64 *alpha,
476 					 s64 *beta, s64 *gamma, s64 *delta)
477 {
478 	const int *mat = params;
479 	short shift;
480 	short y;
481 	long long gv, dv;
482 
483 	if (mat[2] <= 0)
484 		return;
485 
486 	*alpha = clamp_val(mat[2] - (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX);
487 	*beta = clamp_val(mat[3], S16_MIN, S16_MAX);
488 
489 	y = rockchip_vpu981_av1_dec_resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
490 
491 	gv = ((long long)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
492 
493 	*gamma = clamp_val((int)AV1_DIV_ROUND_UP_POW2_SIGNED(gv, shift), S16_MIN, S16_MAX);
494 
495 	dv = ((long long)mat[3] * mat[4]) * y;
496 	*delta = clamp_val(mat[5] -
497 		(int)AV1_DIV_ROUND_UP_POW2_SIGNED(dv, shift) - (1 << WARPEDMODEL_PREC_BITS),
498 		S16_MIN, S16_MAX);
499 
500 	*alpha = AV1_DIV_ROUND_UP_POW2_SIGNED(*alpha, WARP_PARAM_REDUCE_BITS)
501 		 * (1 << WARP_PARAM_REDUCE_BITS);
502 	*beta = AV1_DIV_ROUND_UP_POW2_SIGNED(*beta, WARP_PARAM_REDUCE_BITS)
503 		* (1 << WARP_PARAM_REDUCE_BITS);
504 	*gamma = AV1_DIV_ROUND_UP_POW2_SIGNED(*gamma, WARP_PARAM_REDUCE_BITS)
505 		 * (1 << WARP_PARAM_REDUCE_BITS);
506 	*delta = AV1_DIV_ROUND_UP_POW2_SIGNED(*delta, WARP_PARAM_REDUCE_BITS)
507 		* (1 << WARP_PARAM_REDUCE_BITS);
508 }
509 
rockchip_vpu981_av1_dec_set_global_model(struct hantro_ctx * ctx)510 static void rockchip_vpu981_av1_dec_set_global_model(struct hantro_ctx *ctx)
511 {
512 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
513 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
514 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
515 	const struct v4l2_av1_global_motion *gm = &frame->global_motion;
516 	u8 *dst = av1_dec->global_model.cpu;
517 	struct hantro_dev *vpu = ctx->dev;
518 	int ref_frame, i;
519 
520 	memset(dst, 0, GLOBAL_MODEL_SIZE);
521 	for (ref_frame = 0; ref_frame < V4L2_AV1_REFS_PER_FRAME; ++ref_frame) {
522 		s64 alpha = 0, beta = 0, gamma = 0, delta = 0;
523 
524 		for (i = 0; i < 6; ++i) {
525 			if (i == 2)
526 				*(s32 *)dst =
527 					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][3];
528 			else if (i == 3)
529 				*(s32 *)dst =
530 					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][2];
531 			else
532 				*(s32 *)dst =
533 					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][i];
534 			dst += 4;
535 		}
536 
537 		if (gm->type[V4L2_AV1_REF_LAST_FRAME + ref_frame] <= V4L2_AV1_WARP_MODEL_AFFINE)
538 			rockchip_vpu981_av1_dec_get_shear_params(&gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][0],
539 								 &alpha, &beta, &gamma, &delta);
540 
541 		*(s16 *)dst = alpha;
542 		dst += 2;
543 		*(s16 *)dst = beta;
544 		dst += 2;
545 		*(s16 *)dst = gamma;
546 		dst += 2;
547 		*(s16 *)dst = delta;
548 		dst += 2;
549 	}
550 
551 	hantro_write_addr(vpu, AV1_GLOBAL_MODEL, av1_dec->global_model.dma);
552 }
553 
rockchip_vpu981_av1_tile_log2(int target)554 static int rockchip_vpu981_av1_tile_log2(int target)
555 {
556 	int k;
557 
558 	/*
559 	 * returns the smallest value for k such that 1 << k is greater
560 	 * than or equal to target
561 	 */
562 	for (k = 0; (1 << k) < target; k++);
563 
564 	return k;
565 }
566 
rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx * ctx)567 static void rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx *ctx)
568 {
569 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
570 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
571 	const struct v4l2_av1_tile_info *tile_info = &ctrls->frame->tile_info;
572 	const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
573 	    ctrls->tile_group_entry;
574 	int context_update_y =
575 	    tile_info->context_update_tile_id / tile_info->tile_cols;
576 	int context_update_x =
577 	    tile_info->context_update_tile_id % tile_info->tile_cols;
578 	int context_update_tile_id =
579 	    context_update_x * tile_info->tile_rows + context_update_y;
580 	u8 *dst = av1_dec->tile_info.cpu;
581 	struct hantro_dev *vpu = ctx->dev;
582 	int tile0, tile1;
583 
584 	memset(dst, 0, av1_dec->tile_info.size);
585 
586 	for (tile0 = 0; tile0 < tile_info->tile_cols; tile0++) {
587 		for (tile1 = 0; tile1 < tile_info->tile_rows; tile1++) {
588 			int tile_id = tile1 * tile_info->tile_cols + tile0;
589 			u32 start, end;
590 			u32 y0 =
591 			    tile_info->height_in_sbs_minus_1[tile1] + 1;
592 			u32 x0 = tile_info->width_in_sbs_minus_1[tile0] + 1;
593 
594 			/* tile size in SB units (width,height) */
595 			*dst++ = x0;
596 			*dst++ = 0;
597 			*dst++ = 0;
598 			*dst++ = 0;
599 			*dst++ = y0;
600 			*dst++ = 0;
601 			*dst++ = 0;
602 			*dst++ = 0;
603 
604 			/* tile start position */
605 			start = group_entry[tile_id].tile_offset - group_entry[0].tile_offset;
606 			*dst++ = start & 255;
607 			*dst++ = (start >> 8) & 255;
608 			*dst++ = (start >> 16) & 255;
609 			*dst++ = (start >> 24) & 255;
610 
611 			/* number of bytes in tile data */
612 			end = start + group_entry[tile_id].tile_size;
613 			*dst++ = end & 255;
614 			*dst++ = (end >> 8) & 255;
615 			*dst++ = (end >> 16) & 255;
616 			*dst++ = (end >> 24) & 255;
617 		}
618 	}
619 
620 	hantro_reg_write(vpu, &av1_multicore_expect_context_update, !!(context_update_x == 0));
621 	hantro_reg_write(vpu, &av1_tile_enable,
622 			 !!((tile_info->tile_cols > 1) || (tile_info->tile_rows > 1)));
623 	hantro_reg_write(vpu, &av1_num_tile_cols_8k, tile_info->tile_cols);
624 	hantro_reg_write(vpu, &av1_num_tile_rows_8k, tile_info->tile_rows);
625 	hantro_reg_write(vpu, &av1_context_update_tile_id, context_update_tile_id);
626 	hantro_reg_write(vpu, &av1_tile_transpose, 1);
627 	if (rockchip_vpu981_av1_tile_log2(tile_info->tile_cols) ||
628 	    rockchip_vpu981_av1_tile_log2(tile_info->tile_rows))
629 		hantro_reg_write(vpu, &av1_dec_tile_size_mag, tile_info->tile_size_bytes - 1);
630 	else
631 		hantro_reg_write(vpu, &av1_dec_tile_size_mag, 3);
632 
633 	hantro_write_addr(vpu, AV1_TILE_BASE, av1_dec->tile_info.dma);
634 }
635 
rockchip_vpu981_av1_dec_get_dist(struct hantro_ctx * ctx,int a,int b)636 static int rockchip_vpu981_av1_dec_get_dist(struct hantro_ctx *ctx,
637 					    int a, int b)
638 {
639 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
640 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
641 	int bits = ctrls->sequence->order_hint_bits - 1;
642 	int diff, m;
643 
644 	if (!ctrls->sequence->order_hint_bits)
645 		return 0;
646 
647 	diff = a - b;
648 	m = 1 << bits;
649 	diff = (diff & (m - 1)) - (diff & m);
650 
651 	return diff;
652 }
653 
rockchip_vpu981_av1_dec_set_frame_sign_bias(struct hantro_ctx * ctx)654 static void rockchip_vpu981_av1_dec_set_frame_sign_bias(struct hantro_ctx *ctx)
655 {
656 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
657 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
658 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
659 	const struct v4l2_ctrl_av1_sequence *sequence = ctrls->sequence;
660 	int i;
661 
662 	if (!sequence->order_hint_bits || IS_INTRA(frame->frame_type)) {
663 		for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++)
664 			av1_dec->ref_frame_sign_bias[i] = 0;
665 
666 		return;
667 	}
668 	// Identify the nearest forward and backward references.
669 	for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; i++) {
670 		if (rockchip_vpu981_get_frame_index(ctx, i) >= 0) {
671 			int rel_off =
672 			    rockchip_vpu981_av1_dec_get_dist(ctx,
673 							     rockchip_vpu981_get_order_hint(ctx, i),
674 							     frame->order_hint);
675 			av1_dec->ref_frame_sign_bias[i + 1] = (rel_off <= 0) ? 0 : 1;
676 		}
677 	}
678 }
679 
680 static bool
rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx * ctx,int ref,int idx,int width,int height)681 rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx *ctx, int ref, int idx,
682 				int width, int height)
683 {
684 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
685 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
686 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
687 	struct hantro_dev *vpu = ctx->dev;
688 	struct hantro_decoded_buffer *dst;
689 	dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
690 	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
691 	size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
692 	int cur_width = frame->frame_width_minus_1 + 1;
693 	int cur_height = frame->frame_height_minus_1 + 1;
694 	int scale_width =
695 	    ((width << AV1_REF_SCALE_SHIFT) + cur_width / 2) / cur_width;
696 	int scale_height =
697 	    ((height << AV1_REF_SCALE_SHIFT) + cur_height / 2) / cur_height;
698 
699 	switch (ref) {
700 	case 0:
701 		hantro_reg_write(vpu, &av1_ref0_height, height);
702 		hantro_reg_write(vpu, &av1_ref0_width, width);
703 		hantro_reg_write(vpu, &av1_ref0_ver_scale, scale_width);
704 		hantro_reg_write(vpu, &av1_ref0_hor_scale, scale_height);
705 		break;
706 	case 1:
707 		hantro_reg_write(vpu, &av1_ref1_height, height);
708 		hantro_reg_write(vpu, &av1_ref1_width, width);
709 		hantro_reg_write(vpu, &av1_ref1_ver_scale, scale_width);
710 		hantro_reg_write(vpu, &av1_ref1_hor_scale, scale_height);
711 		break;
712 	case 2:
713 		hantro_reg_write(vpu, &av1_ref2_height, height);
714 		hantro_reg_write(vpu, &av1_ref2_width, width);
715 		hantro_reg_write(vpu, &av1_ref2_ver_scale, scale_width);
716 		hantro_reg_write(vpu, &av1_ref2_hor_scale, scale_height);
717 		break;
718 	case 3:
719 		hantro_reg_write(vpu, &av1_ref3_height, height);
720 		hantro_reg_write(vpu, &av1_ref3_width, width);
721 		hantro_reg_write(vpu, &av1_ref3_ver_scale, scale_width);
722 		hantro_reg_write(vpu, &av1_ref3_hor_scale, scale_height);
723 		break;
724 	case 4:
725 		hantro_reg_write(vpu, &av1_ref4_height, height);
726 		hantro_reg_write(vpu, &av1_ref4_width, width);
727 		hantro_reg_write(vpu, &av1_ref4_ver_scale, scale_width);
728 		hantro_reg_write(vpu, &av1_ref4_hor_scale, scale_height);
729 		break;
730 	case 5:
731 		hantro_reg_write(vpu, &av1_ref5_height, height);
732 		hantro_reg_write(vpu, &av1_ref5_width, width);
733 		hantro_reg_write(vpu, &av1_ref5_ver_scale, scale_width);
734 		hantro_reg_write(vpu, &av1_ref5_hor_scale, scale_height);
735 		break;
736 	case 6:
737 		hantro_reg_write(vpu, &av1_ref6_height, height);
738 		hantro_reg_write(vpu, &av1_ref6_width, width);
739 		hantro_reg_write(vpu, &av1_ref6_ver_scale, scale_width);
740 		hantro_reg_write(vpu, &av1_ref6_hor_scale, scale_height);
741 		break;
742 	default:
743 		pr_warn("AV1 invalid reference frame index\n");
744 	}
745 
746 	dst = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
747 	luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
748 	chroma_addr = luma_addr + cr_offset;
749 	mv_addr = luma_addr + mv_offset;
750 
751 	hantro_write_addr(vpu, AV1_REFERENCE_Y(ref), luma_addr);
752 	hantro_write_addr(vpu, AV1_REFERENCE_CB(ref), chroma_addr);
753 	hantro_write_addr(vpu, AV1_REFERENCE_MV(ref), mv_addr);
754 
755 	return (scale_width != (1 << AV1_REF_SCALE_SHIFT)) ||
756 		(scale_height != (1 << AV1_REF_SCALE_SHIFT));
757 }
758 
rockchip_vpu981_av1_dec_set_sign_bias(struct hantro_ctx * ctx,int ref,int val)759 static void rockchip_vpu981_av1_dec_set_sign_bias(struct hantro_ctx *ctx,
760 						  int ref, int val)
761 {
762 	struct hantro_dev *vpu = ctx->dev;
763 
764 	switch (ref) {
765 	case 0:
766 		hantro_reg_write(vpu, &av1_ref0_sign_bias, val);
767 		break;
768 	case 1:
769 		hantro_reg_write(vpu, &av1_ref1_sign_bias, val);
770 		break;
771 	case 2:
772 		hantro_reg_write(vpu, &av1_ref2_sign_bias, val);
773 		break;
774 	case 3:
775 		hantro_reg_write(vpu, &av1_ref3_sign_bias, val);
776 		break;
777 	case 4:
778 		hantro_reg_write(vpu, &av1_ref4_sign_bias, val);
779 		break;
780 	case 5:
781 		hantro_reg_write(vpu, &av1_ref5_sign_bias, val);
782 		break;
783 	case 6:
784 		hantro_reg_write(vpu, &av1_ref6_sign_bias, val);
785 		break;
786 	default:
787 		pr_warn("AV1 invalid sign bias index\n");
788 		break;
789 	}
790 }
791 
rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx * ctx)792 static void rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx *ctx)
793 {
794 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
795 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
796 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
797 	const struct v4l2_av1_segmentation *seg = &frame->segmentation;
798 	u32 segval[V4L2_AV1_MAX_SEGMENTS][V4L2_AV1_SEG_LVL_MAX] = { 0 };
799 	struct hantro_dev *vpu = ctx->dev;
800 	u8 segsign = 0, preskip_segid = 0, last_active_seg = 0, i, j;
801 
802 	if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED) &&
803 	    frame->primary_ref_frame < V4L2_AV1_REFS_PER_FRAME) {
804 		int idx = rockchip_vpu981_get_frame_index(ctx, frame->primary_ref_frame);
805 
806 		if (idx >= 0) {
807 			dma_addr_t luma_addr, mv_addr = 0;
808 			struct hantro_decoded_buffer *seg;
809 			size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
810 
811 			seg = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
812 			luma_addr = hantro_get_dec_buf_addr(ctx, &seg->base.vb.vb2_buf);
813 			mv_addr = luma_addr + mv_offset;
814 
815 			hantro_write_addr(vpu, AV1_SEGMENTATION, mv_addr);
816 			hantro_reg_write(vpu, &av1_use_temporal3_mvs, 1);
817 		}
818 	}
819 
820 	hantro_reg_write(vpu, &av1_segment_temp_upd_e,
821 			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_TEMPORAL_UPDATE));
822 	hantro_reg_write(vpu, &av1_segment_upd_e,
823 			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_UPDATE_MAP));
824 	hantro_reg_write(vpu, &av1_segment_e,
825 			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED));
826 
827 	hantro_reg_write(vpu, &av1_error_resilient,
828 			 !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE));
829 
830 	if (IS_INTRA(frame->frame_type) ||
831 	    !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE)) {
832 		hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
833 	}
834 
835 	if (seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED) {
836 		int s;
837 
838 		for (s = 0; s < V4L2_AV1_MAX_SEGMENTS; s++) {
839 			if (seg->feature_enabled[s] &
840 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
841 				segval[s][V4L2_AV1_SEG_LVL_ALT_Q] =
842 				    clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q]),
843 					  0, 255);
844 				segsign |=
845 					(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q] < 0) << s;
846 			}
847 
848 			if (seg->feature_enabled[s] &
849 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_V))
850 				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V] =
851 					clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]),
852 					      -63, 63);
853 
854 			if (seg->feature_enabled[s] &
855 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_H))
856 				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H] =
857 				    clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]),
858 					  -63, 63);
859 
860 			if (seg->feature_enabled[s] &
861 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_U))
862 				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_U] =
863 				    clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_U]),
864 					  -63, 63);
865 
866 			if (seg->feature_enabled[s] &
867 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_V))
868 				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_V] =
869 				    clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_V]),
870 					  -63, 63);
871 
872 			if (frame->frame_type && seg->feature_enabled[s] &
873 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_FRAME))
874 				segval[s][V4L2_AV1_SEG_LVL_REF_FRAME]++;
875 
876 			if (seg->feature_enabled[s] &
877 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_SKIP))
878 				segval[s][V4L2_AV1_SEG_LVL_REF_SKIP] = 1;
879 
880 			if (seg->feature_enabled[s] &
881 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_GLOBALMV))
882 				segval[s][V4L2_AV1_SEG_LVL_REF_GLOBALMV] = 1;
883 		}
884 	}
885 
886 	for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
887 		for (j = 0; j < V4L2_AV1_SEG_LVL_MAX; j++) {
888 			if (seg->feature_enabled[i]
889 			    & V4L2_AV1_SEGMENT_FEATURE_ENABLED(j)) {
890 				preskip_segid |= (j >= V4L2_AV1_SEG_LVL_REF_FRAME);
891 				last_active_seg = max(i, last_active_seg);
892 			}
893 		}
894 	}
895 
896 	hantro_reg_write(vpu, &av1_last_active_seg, last_active_seg);
897 	hantro_reg_write(vpu, &av1_preskip_segid, preskip_segid);
898 
899 	hantro_reg_write(vpu, &av1_seg_quant_sign, segsign);
900 
901 	/* Write QP, filter level, ref frame and skip for every segment */
902 	hantro_reg_write(vpu, &av1_quant_seg0,
903 			 segval[0][V4L2_AV1_SEG_LVL_ALT_Q]);
904 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg0,
905 			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
906 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg0,
907 			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
908 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg0,
909 			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_U]);
910 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg0,
911 			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_V]);
912 	hantro_reg_write(vpu, &av1_refpic_seg0,
913 			 segval[0][V4L2_AV1_SEG_LVL_REF_FRAME]);
914 	hantro_reg_write(vpu, &av1_skip_seg0,
915 			 segval[0][V4L2_AV1_SEG_LVL_REF_SKIP]);
916 	hantro_reg_write(vpu, &av1_global_mv_seg0,
917 			 segval[0][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
918 
919 	hantro_reg_write(vpu, &av1_quant_seg1,
920 			 segval[1][V4L2_AV1_SEG_LVL_ALT_Q]);
921 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg1,
922 			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
923 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg1,
924 			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
925 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg1,
926 			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_U]);
927 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg1,
928 			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_V]);
929 	hantro_reg_write(vpu, &av1_refpic_seg1,
930 			 segval[1][V4L2_AV1_SEG_LVL_REF_FRAME]);
931 	hantro_reg_write(vpu, &av1_skip_seg1,
932 			 segval[1][V4L2_AV1_SEG_LVL_REF_SKIP]);
933 	hantro_reg_write(vpu, &av1_global_mv_seg1,
934 			 segval[1][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
935 
936 	hantro_reg_write(vpu, &av1_quant_seg2,
937 			 segval[2][V4L2_AV1_SEG_LVL_ALT_Q]);
938 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg2,
939 			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
940 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg2,
941 			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
942 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg2,
943 			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_U]);
944 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg2,
945 			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_V]);
946 	hantro_reg_write(vpu, &av1_refpic_seg2,
947 			 segval[2][V4L2_AV1_SEG_LVL_REF_FRAME]);
948 	hantro_reg_write(vpu, &av1_skip_seg2,
949 			 segval[2][V4L2_AV1_SEG_LVL_REF_SKIP]);
950 	hantro_reg_write(vpu, &av1_global_mv_seg2,
951 			 segval[2][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
952 
953 	hantro_reg_write(vpu, &av1_quant_seg3,
954 			 segval[3][V4L2_AV1_SEG_LVL_ALT_Q]);
955 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg3,
956 			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
957 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg3,
958 			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
959 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg3,
960 			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_U]);
961 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg3,
962 			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_V]);
963 	hantro_reg_write(vpu, &av1_refpic_seg3,
964 			 segval[3][V4L2_AV1_SEG_LVL_REF_FRAME]);
965 	hantro_reg_write(vpu, &av1_skip_seg3,
966 			 segval[3][V4L2_AV1_SEG_LVL_REF_SKIP]);
967 	hantro_reg_write(vpu, &av1_global_mv_seg3,
968 			 segval[3][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
969 
970 	hantro_reg_write(vpu, &av1_quant_seg4,
971 			 segval[4][V4L2_AV1_SEG_LVL_ALT_Q]);
972 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg4,
973 			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
974 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg4,
975 			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
976 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg4,
977 			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_U]);
978 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg4,
979 			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_V]);
980 	hantro_reg_write(vpu, &av1_refpic_seg4,
981 			 segval[4][V4L2_AV1_SEG_LVL_REF_FRAME]);
982 	hantro_reg_write(vpu, &av1_skip_seg4,
983 			 segval[4][V4L2_AV1_SEG_LVL_REF_SKIP]);
984 	hantro_reg_write(vpu, &av1_global_mv_seg4,
985 			 segval[4][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
986 
987 	hantro_reg_write(vpu, &av1_quant_seg5,
988 			 segval[5][V4L2_AV1_SEG_LVL_ALT_Q]);
989 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg5,
990 			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
991 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg5,
992 			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
993 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg5,
994 			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_U]);
995 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg5,
996 			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_V]);
997 	hantro_reg_write(vpu, &av1_refpic_seg5,
998 			 segval[5][V4L2_AV1_SEG_LVL_REF_FRAME]);
999 	hantro_reg_write(vpu, &av1_skip_seg5,
1000 			 segval[5][V4L2_AV1_SEG_LVL_REF_SKIP]);
1001 	hantro_reg_write(vpu, &av1_global_mv_seg5,
1002 			 segval[5][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1003 
1004 	hantro_reg_write(vpu, &av1_quant_seg6,
1005 			 segval[6][V4L2_AV1_SEG_LVL_ALT_Q]);
1006 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg6,
1007 			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
1008 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg6,
1009 			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
1010 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg6,
1011 			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_U]);
1012 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg6,
1013 			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_V]);
1014 	hantro_reg_write(vpu, &av1_refpic_seg6,
1015 			 segval[6][V4L2_AV1_SEG_LVL_REF_FRAME]);
1016 	hantro_reg_write(vpu, &av1_skip_seg6,
1017 			 segval[6][V4L2_AV1_SEG_LVL_REF_SKIP]);
1018 	hantro_reg_write(vpu, &av1_global_mv_seg6,
1019 			 segval[6][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1020 
1021 	hantro_reg_write(vpu, &av1_quant_seg7,
1022 			 segval[7][V4L2_AV1_SEG_LVL_ALT_Q]);
1023 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg7,
1024 			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
1025 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg7,
1026 			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
1027 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg7,
1028 			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_U]);
1029 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg7,
1030 			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_V]);
1031 	hantro_reg_write(vpu, &av1_refpic_seg7,
1032 			 segval[7][V4L2_AV1_SEG_LVL_REF_FRAME]);
1033 	hantro_reg_write(vpu, &av1_skip_seg7,
1034 			 segval[7][V4L2_AV1_SEG_LVL_REF_SKIP]);
1035 	hantro_reg_write(vpu, &av1_global_mv_seg7,
1036 			 segval[7][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1037 }
1038 
rockchip_vpu981_av1_dec_is_lossless(struct hantro_ctx * ctx)1039 static bool rockchip_vpu981_av1_dec_is_lossless(struct hantro_ctx *ctx)
1040 {
1041 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1042 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1043 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1044 	const struct v4l2_av1_segmentation *segmentation = &frame->segmentation;
1045 	const struct v4l2_av1_quantization *quantization = &frame->quantization;
1046 	int i;
1047 
1048 	for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
1049 		int qindex = quantization->base_q_idx;
1050 
1051 		if (segmentation->feature_enabled[i] &
1052 		    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
1053 			qindex += segmentation->feature_data[i][V4L2_AV1_SEG_LVL_ALT_Q];
1054 		}
1055 		qindex = clamp(qindex, 0, 255);
1056 
1057 		if (qindex ||
1058 		    quantization->delta_q_y_dc ||
1059 		    quantization->delta_q_u_dc ||
1060 		    quantization->delta_q_u_ac ||
1061 		    quantization->delta_q_v_dc ||
1062 		    quantization->delta_q_v_ac)
1063 			return false;
1064 	}
1065 	return true;
1066 }
1067 
rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx * ctx)1068 static void rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx *ctx)
1069 {
1070 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1071 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1072 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1073 	const struct v4l2_av1_loop_filter *loop_filter = &frame->loop_filter;
1074 	bool filtering_dis = (loop_filter->level[0] == 0) && (loop_filter->level[1] == 0);
1075 	struct hantro_dev *vpu = ctx->dev;
1076 
1077 	hantro_reg_write(vpu, &av1_filtering_dis, filtering_dis);
1078 	hantro_reg_write(vpu, &av1_filt_level_base_gt32, loop_filter->level[0] > 32);
1079 	hantro_reg_write(vpu, &av1_filt_sharpness, loop_filter->sharpness);
1080 
1081 	hantro_reg_write(vpu, &av1_filt_level0, loop_filter->level[0]);
1082 	hantro_reg_write(vpu, &av1_filt_level1, loop_filter->level[1]);
1083 	hantro_reg_write(vpu, &av1_filt_level2, loop_filter->level[2]);
1084 	hantro_reg_write(vpu, &av1_filt_level3, loop_filter->level[3]);
1085 
1086 	if (loop_filter->flags & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED &&
1087 	    !rockchip_vpu981_av1_dec_is_lossless(ctx) &&
1088 	    !(frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC)) {
1089 		hantro_reg_write(vpu, &av1_filt_ref_adj_0,
1090 				 loop_filter->ref_deltas[0]);
1091 		hantro_reg_write(vpu, &av1_filt_ref_adj_1,
1092 				 loop_filter->ref_deltas[1]);
1093 		hantro_reg_write(vpu, &av1_filt_ref_adj_2,
1094 				 loop_filter->ref_deltas[2]);
1095 		hantro_reg_write(vpu, &av1_filt_ref_adj_3,
1096 				 loop_filter->ref_deltas[3]);
1097 		hantro_reg_write(vpu, &av1_filt_ref_adj_4,
1098 				 loop_filter->ref_deltas[4]);
1099 		hantro_reg_write(vpu, &av1_filt_ref_adj_5,
1100 				 loop_filter->ref_deltas[5]);
1101 		hantro_reg_write(vpu, &av1_filt_ref_adj_6,
1102 				 loop_filter->ref_deltas[6]);
1103 		hantro_reg_write(vpu, &av1_filt_ref_adj_7,
1104 				 loop_filter->ref_deltas[7]);
1105 		hantro_reg_write(vpu, &av1_filt_mb_adj_0,
1106 				 loop_filter->mode_deltas[0]);
1107 		hantro_reg_write(vpu, &av1_filt_mb_adj_1,
1108 				 loop_filter->mode_deltas[1]);
1109 	} else {
1110 		hantro_reg_write(vpu, &av1_filt_ref_adj_0, 0);
1111 		hantro_reg_write(vpu, &av1_filt_ref_adj_1, 0);
1112 		hantro_reg_write(vpu, &av1_filt_ref_adj_2, 0);
1113 		hantro_reg_write(vpu, &av1_filt_ref_adj_3, 0);
1114 		hantro_reg_write(vpu, &av1_filt_ref_adj_4, 0);
1115 		hantro_reg_write(vpu, &av1_filt_ref_adj_5, 0);
1116 		hantro_reg_write(vpu, &av1_filt_ref_adj_6, 0);
1117 		hantro_reg_write(vpu, &av1_filt_ref_adj_7, 0);
1118 		hantro_reg_write(vpu, &av1_filt_mb_adj_0, 0);
1119 		hantro_reg_write(vpu, &av1_filt_mb_adj_1, 0);
1120 	}
1121 
1122 	hantro_write_addr(vpu, AV1_DB_DATA_COL, av1_dec->db_data_col.dma);
1123 	hantro_write_addr(vpu, AV1_DB_CTRL_COL, av1_dec->db_ctrl_col.dma);
1124 }
1125 
rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx * ctx)1126 static void rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx *ctx)
1127 {
1128 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1129 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1130 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1131 	bool frame_is_intra = IS_INTRA(frame->frame_type);
1132 	struct av1cdfs *out_cdfs = (struct av1cdfs *)av1_dec->prob_tbl_out.cpu;
1133 	int i;
1134 
1135 	if (frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF)
1136 		return;
1137 
1138 	for (i = 0; i < NUM_REF_FRAMES; i++) {
1139 		if (frame->refresh_frame_flags & BIT(i)) {
1140 			struct mvcdfs stored_mv_cdf;
1141 
1142 			rockchip_av1_get_cdfs(ctx, i);
1143 			stored_mv_cdf = av1_dec->cdfs->mv_cdf;
1144 			*av1_dec->cdfs = *out_cdfs;
1145 			if (frame_is_intra) {
1146 				av1_dec->cdfs->mv_cdf = stored_mv_cdf;
1147 				*av1_dec->cdfs_ndvc = out_cdfs->mv_cdf;
1148 			}
1149 			rockchip_av1_store_cdfs(ctx,
1150 						frame->refresh_frame_flags);
1151 			break;
1152 		}
1153 	}
1154 }
1155 
rockchip_vpu981_av1_dec_done(struct hantro_ctx * ctx)1156 void rockchip_vpu981_av1_dec_done(struct hantro_ctx *ctx)
1157 {
1158 	rockchip_vpu981_av1_dec_update_prob(ctx);
1159 }
1160 
rockchip_vpu981_av1_dec_set_prob(struct hantro_ctx * ctx)1161 static void rockchip_vpu981_av1_dec_set_prob(struct hantro_ctx *ctx)
1162 {
1163 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1164 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1165 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1166 	const struct v4l2_av1_quantization *quantization = &frame->quantization;
1167 	struct hantro_dev *vpu = ctx->dev;
1168 	bool error_resilient_mode =
1169 	    !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE);
1170 	bool frame_is_intra = IS_INTRA(frame->frame_type);
1171 
1172 	if (error_resilient_mode || frame_is_intra ||
1173 	    frame->primary_ref_frame == AV1_PRIMARY_REF_NONE) {
1174 		av1_dec->cdfs = &av1_dec->default_cdfs;
1175 		av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
1176 		rockchip_av1_default_coeff_probs(quantization->base_q_idx,
1177 						 av1_dec->cdfs);
1178 	} else {
1179 		rockchip_av1_get_cdfs(ctx, frame->ref_frame_idx[frame->primary_ref_frame]);
1180 	}
1181 	rockchip_av1_store_cdfs(ctx, frame->refresh_frame_flags);
1182 
1183 	memcpy(av1_dec->prob_tbl.cpu, av1_dec->cdfs, sizeof(struct av1cdfs));
1184 
1185 	if (frame_is_intra) {
1186 		int mv_offset = offsetof(struct av1cdfs, mv_cdf);
1187 		/* Overwrite MV context area with intrabc MV context */
1188 		memcpy(av1_dec->prob_tbl.cpu + mv_offset, av1_dec->cdfs_ndvc,
1189 		       sizeof(struct mvcdfs));
1190 	}
1191 
1192 	hantro_write_addr(vpu, AV1_PROP_TABLE_OUT, av1_dec->prob_tbl_out.dma);
1193 	hantro_write_addr(vpu, AV1_PROP_TABLE, av1_dec->prob_tbl.dma);
1194 }
1195 
1196 static void
rockchip_vpu981_av1_dec_init_scaling_function(const u8 * values,const u8 * scaling,u8 num_points,u8 * scaling_lut)1197 rockchip_vpu981_av1_dec_init_scaling_function(const u8 *values, const u8 *scaling,
1198 					      u8 num_points, u8 *scaling_lut)
1199 {
1200 	int i, point;
1201 
1202 	if (num_points == 0) {
1203 		memset(scaling_lut, 0, 256);
1204 		return;
1205 	}
1206 
1207 	for (point = 0; point < num_points - 1; point++) {
1208 		int x;
1209 		s32 delta_y = scaling[point + 1] - scaling[point];
1210 		s32 delta_x = values[point + 1] - values[point];
1211 		s64 delta =
1212 		    delta_x ? delta_y * ((65536 + (delta_x >> 1)) /
1213 					 delta_x) : 0;
1214 
1215 		for (x = 0; x < delta_x; x++) {
1216 			scaling_lut[values[point] + x] =
1217 			    scaling[point] +
1218 			    (s32)((x * delta + 32768) >> 16);
1219 		}
1220 	}
1221 
1222 	for (i = values[num_points - 1]; i < 256; i++)
1223 		scaling_lut[i] = scaling[num_points - 1];
1224 }
1225 
rockchip_vpu981_av1_dec_set_fgs(struct hantro_ctx * ctx)1226 static void rockchip_vpu981_av1_dec_set_fgs(struct hantro_ctx *ctx)
1227 {
1228 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1229 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1230 	const struct v4l2_ctrl_av1_film_grain *film_grain = ctrls->film_grain;
1231 	struct rockchip_av1_film_grain *fgmem = av1_dec->film_grain.cpu;
1232 	struct hantro_dev *vpu = ctx->dev;
1233 	bool scaling_from_luma =
1234 		!!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA);
1235 	s32 (*ar_coeffs_y)[24];
1236 	s32 (*ar_coeffs_cb)[25];
1237 	s32 (*ar_coeffs_cr)[25];
1238 	s32 (*luma_grain_block)[73][82];
1239 	s32 (*cb_grain_block)[38][44];
1240 	s32 (*cr_grain_block)[38][44];
1241 	s32 ar_coeff_lag, ar_coeff_shift;
1242 	s32 grain_scale_shift, bitdepth;
1243 	s32 grain_center, grain_min, grain_max;
1244 	int i, j;
1245 
1246 	hantro_reg_write(vpu, &av1_apply_grain, 0);
1247 
1248 	if (!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_APPLY_GRAIN)) {
1249 		hantro_reg_write(vpu, &av1_num_y_points_b, 0);
1250 		hantro_reg_write(vpu, &av1_num_cb_points_b, 0);
1251 		hantro_reg_write(vpu, &av1_num_cr_points_b, 0);
1252 		hantro_reg_write(vpu, &av1_scaling_shift, 0);
1253 		hantro_reg_write(vpu, &av1_cb_mult, 0);
1254 		hantro_reg_write(vpu, &av1_cb_luma_mult, 0);
1255 		hantro_reg_write(vpu, &av1_cb_offset, 0);
1256 		hantro_reg_write(vpu, &av1_cr_mult, 0);
1257 		hantro_reg_write(vpu, &av1_cr_luma_mult, 0);
1258 		hantro_reg_write(vpu, &av1_cr_offset, 0);
1259 		hantro_reg_write(vpu, &av1_overlap_flag, 0);
1260 		hantro_reg_write(vpu, &av1_clip_to_restricted_range, 0);
1261 		hantro_reg_write(vpu, &av1_chroma_scaling_from_luma, 0);
1262 		hantro_reg_write(vpu, &av1_random_seed, 0);
1263 		hantro_write_addr(vpu, AV1_FILM_GRAIN, 0);
1264 		return;
1265 	}
1266 
1267 	ar_coeffs_y = kzalloc(sizeof(int32_t) * 24, GFP_KERNEL);
1268 	ar_coeffs_cb = kzalloc(sizeof(int32_t) * 25, GFP_KERNEL);
1269 	ar_coeffs_cr = kzalloc(sizeof(int32_t) * 25, GFP_KERNEL);
1270 	luma_grain_block = kzalloc(sizeof(int32_t) * 73 * 82, GFP_KERNEL);
1271 	cb_grain_block = kzalloc(sizeof(int32_t) * 38 * 44, GFP_KERNEL);
1272 	cr_grain_block = kzalloc(sizeof(int32_t) * 38 * 44, GFP_KERNEL);
1273 
1274 	if (!ar_coeffs_y || !ar_coeffs_cb || !ar_coeffs_cr ||
1275 	    !luma_grain_block || !cb_grain_block || !cr_grain_block) {
1276 		pr_warn("Fail allocating memory for film grain parameters\n");
1277 		goto alloc_fail;
1278 	}
1279 
1280 	hantro_reg_write(vpu, &av1_apply_grain, 1);
1281 
1282 	hantro_reg_write(vpu, &av1_num_y_points_b,
1283 			 film_grain->num_y_points > 0);
1284 	hantro_reg_write(vpu, &av1_num_cb_points_b,
1285 			 film_grain->num_cb_points > 0);
1286 	hantro_reg_write(vpu, &av1_num_cr_points_b,
1287 			 film_grain->num_cr_points > 0);
1288 	hantro_reg_write(vpu, &av1_scaling_shift,
1289 			 film_grain->grain_scaling_minus_8 + 8);
1290 
1291 	if (!scaling_from_luma) {
1292 		hantro_reg_write(vpu, &av1_cb_mult, film_grain->cb_mult - 128);
1293 		hantro_reg_write(vpu, &av1_cb_luma_mult, film_grain->cb_luma_mult - 128);
1294 		hantro_reg_write(vpu, &av1_cb_offset, film_grain->cb_offset - 256);
1295 		hantro_reg_write(vpu, &av1_cr_mult, film_grain->cr_mult - 128);
1296 		hantro_reg_write(vpu, &av1_cr_luma_mult, film_grain->cr_luma_mult - 128);
1297 		hantro_reg_write(vpu, &av1_cr_offset, film_grain->cr_offset - 256);
1298 	} else {
1299 		hantro_reg_write(vpu, &av1_cb_mult, 0);
1300 		hantro_reg_write(vpu, &av1_cb_luma_mult, 0);
1301 		hantro_reg_write(vpu, &av1_cb_offset, 0);
1302 		hantro_reg_write(vpu, &av1_cr_mult, 0);
1303 		hantro_reg_write(vpu, &av1_cr_luma_mult, 0);
1304 		hantro_reg_write(vpu, &av1_cr_offset, 0);
1305 	}
1306 
1307 	hantro_reg_write(vpu, &av1_overlap_flag,
1308 			 !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_OVERLAP));
1309 	hantro_reg_write(vpu, &av1_clip_to_restricted_range,
1310 			 !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_CLIP_TO_RESTRICTED_RANGE));
1311 	hantro_reg_write(vpu, &av1_chroma_scaling_from_luma, scaling_from_luma);
1312 	hantro_reg_write(vpu, &av1_random_seed, film_grain->grain_seed);
1313 
1314 	rockchip_vpu981_av1_dec_init_scaling_function(film_grain->point_y_value,
1315 						      film_grain->point_y_scaling,
1316 						      film_grain->num_y_points,
1317 						      fgmem->scaling_lut_y);
1318 
1319 	if (film_grain->flags &
1320 	    V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA) {
1321 		memcpy(fgmem->scaling_lut_cb, fgmem->scaling_lut_y,
1322 		       sizeof(*fgmem->scaling_lut_y) * 256);
1323 		memcpy(fgmem->scaling_lut_cr, fgmem->scaling_lut_y,
1324 		       sizeof(*fgmem->scaling_lut_y) * 256);
1325 	} else {
1326 		rockchip_vpu981_av1_dec_init_scaling_function
1327 		    (film_grain->point_cb_value, film_grain->point_cb_scaling,
1328 		     film_grain->num_cb_points, fgmem->scaling_lut_cb);
1329 		rockchip_vpu981_av1_dec_init_scaling_function
1330 		    (film_grain->point_cr_value, film_grain->point_cr_scaling,
1331 		     film_grain->num_cr_points, fgmem->scaling_lut_cr);
1332 	}
1333 
1334 	for (i = 0; i < V4L2_AV1_AR_COEFFS_SIZE; i++) {
1335 		if (i < 24)
1336 			(*ar_coeffs_y)[i] = film_grain->ar_coeffs_y_plus_128[i] - 128;
1337 		(*ar_coeffs_cb)[i] = film_grain->ar_coeffs_cb_plus_128[i] - 128;
1338 		(*ar_coeffs_cr)[i] = film_grain->ar_coeffs_cr_plus_128[i] - 128;
1339 	}
1340 
1341 	ar_coeff_lag = film_grain->ar_coeff_lag;
1342 	ar_coeff_shift = film_grain->ar_coeff_shift_minus_6 + 6;
1343 	grain_scale_shift = film_grain->grain_scale_shift;
1344 	bitdepth = ctx->bit_depth;
1345 	grain_center = 128 << (bitdepth - 8);
1346 	grain_min = 0 - grain_center;
1347 	grain_max = (256 << (bitdepth - 8)) - 1 - grain_center;
1348 
1349 	rockchip_av1_generate_luma_grain_block(luma_grain_block, bitdepth,
1350 					       film_grain->num_y_points, grain_scale_shift,
1351 					       ar_coeff_lag, ar_coeffs_y, ar_coeff_shift,
1352 					       grain_min, grain_max, film_grain->grain_seed);
1353 
1354 	rockchip_av1_generate_chroma_grain_block(luma_grain_block, cb_grain_block,
1355 						 cr_grain_block, bitdepth,
1356 						 film_grain->num_y_points,
1357 						 film_grain->num_cb_points,
1358 						 film_grain->num_cr_points,
1359 						 grain_scale_shift, ar_coeff_lag, ar_coeffs_cb,
1360 						 ar_coeffs_cr, ar_coeff_shift, grain_min,
1361 						 grain_max,
1362 						 scaling_from_luma,
1363 						 film_grain->grain_seed);
1364 
1365 	for (i = 0; i < 64; i++) {
1366 		for (j = 0; j < 64; j++)
1367 			fgmem->cropped_luma_grain_block[i * 64 + j] =
1368 				(*luma_grain_block)[i + 9][j + 9];
1369 	}
1370 
1371 	for (i = 0; i < 32; i++) {
1372 		for (j = 0; j < 32; j++) {
1373 			fgmem->cropped_chroma_grain_block[i * 64 + 2 * j] =
1374 				(*cb_grain_block)[i + 6][j + 6];
1375 			fgmem->cropped_chroma_grain_block[i * 64 + 2 * j + 1] =
1376 				(*cr_grain_block)[i + 6][j + 6];
1377 		}
1378 	}
1379 
1380 	hantro_write_addr(vpu, AV1_FILM_GRAIN, av1_dec->film_grain.dma);
1381 
1382 alloc_fail:
1383 	kfree(ar_coeffs_y);
1384 	kfree(ar_coeffs_cb);
1385 	kfree(ar_coeffs_cr);
1386 	kfree(luma_grain_block);
1387 	kfree(cb_grain_block);
1388 	kfree(cr_grain_block);
1389 }
1390 
rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx * ctx)1391 static void rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx *ctx)
1392 {
1393 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1394 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1395 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1396 	const struct v4l2_av1_cdef *cdef = &frame->cdef;
1397 	struct hantro_dev *vpu = ctx->dev;
1398 	u32 luma_pri_strength = 0;
1399 	u16 luma_sec_strength = 0;
1400 	u32 chroma_pri_strength = 0;
1401 	u16 chroma_sec_strength = 0;
1402 	int i;
1403 
1404 	hantro_reg_write(vpu, &av1_cdef_bits, cdef->bits);
1405 	hantro_reg_write(vpu, &av1_cdef_damping, cdef->damping_minus_3);
1406 
1407 	for (i = 0; i < BIT(cdef->bits); i++) {
1408 		luma_pri_strength |= cdef->y_pri_strength[i] << (i * 4);
1409 		if (cdef->y_sec_strength[i] == 4)
1410 			luma_sec_strength |= 3 << (i * 2);
1411 		else
1412 			luma_sec_strength |= cdef->y_sec_strength[i] << (i * 2);
1413 
1414 		chroma_pri_strength |= cdef->uv_pri_strength[i] << (i * 4);
1415 		if (cdef->uv_sec_strength[i] == 4)
1416 			chroma_sec_strength |= 3 << (i * 2);
1417 		else
1418 			chroma_sec_strength |= cdef->uv_sec_strength[i] << (i * 2);
1419 	}
1420 
1421 	hantro_reg_write(vpu, &av1_cdef_luma_primary_strength,
1422 			 luma_pri_strength);
1423 	hantro_reg_write(vpu, &av1_cdef_luma_secondary_strength,
1424 			 luma_sec_strength);
1425 	hantro_reg_write(vpu, &av1_cdef_chroma_primary_strength,
1426 			 chroma_pri_strength);
1427 	hantro_reg_write(vpu, &av1_cdef_chroma_secondary_strength,
1428 			 chroma_sec_strength);
1429 
1430 	hantro_write_addr(vpu, AV1_CDEF_COL, av1_dec->cdef_col.dma);
1431 }
1432 
rockchip_vpu981_av1_dec_set_lr(struct hantro_ctx * ctx)1433 static void rockchip_vpu981_av1_dec_set_lr(struct hantro_ctx *ctx)
1434 {
1435 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1436 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1437 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1438 	const struct v4l2_av1_loop_restoration *loop_restoration =
1439 	    &frame->loop_restoration;
1440 	struct hantro_dev *vpu = ctx->dev;
1441 	u16 lr_type = 0, lr_unit_size = 0;
1442 	u8 restoration_unit_size[V4L2_AV1_NUM_PLANES_MAX] = { 3, 3, 3 };
1443 	int i;
1444 
1445 	if (loop_restoration->flags & V4L2_AV1_LOOP_RESTORATION_FLAG_USES_LR) {
1446 		restoration_unit_size[0] = 1 + loop_restoration->lr_unit_shift;
1447 		restoration_unit_size[1] =
1448 		    1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
1449 		restoration_unit_size[2] =
1450 		    1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
1451 	}
1452 
1453 	for (i = 0; i < V4L2_AV1_NUM_PLANES_MAX; i++) {
1454 		lr_type |=
1455 		    loop_restoration->frame_restoration_type[i] << (i * 2);
1456 		lr_unit_size |= restoration_unit_size[i] << (i * 2);
1457 	}
1458 
1459 	hantro_reg_write(vpu, &av1_lr_type, lr_type);
1460 	hantro_reg_write(vpu, &av1_lr_unit_size, lr_unit_size);
1461 	hantro_write_addr(vpu, AV1_LR_COL, av1_dec->lr_col.dma);
1462 }
1463 
rockchip_vpu981_av1_dec_set_superres_params(struct hantro_ctx * ctx)1464 static void rockchip_vpu981_av1_dec_set_superres_params(struct hantro_ctx *ctx)
1465 {
1466 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1467 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1468 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1469 	struct hantro_dev *vpu = ctx->dev;
1470 	u8 superres_scale_denominator = SCALE_NUMERATOR;
1471 	int superres_luma_step = RS_SCALE_SUBPEL_BITS;
1472 	int superres_chroma_step = RS_SCALE_SUBPEL_BITS;
1473 	int superres_luma_step_invra = RS_SCALE_SUBPEL_BITS;
1474 	int superres_chroma_step_invra = RS_SCALE_SUBPEL_BITS;
1475 	int superres_init_luma_subpel_x = 0;
1476 	int superres_init_chroma_subpel_x = 0;
1477 	int superres_is_scaled = 0;
1478 	int min_w = min_t(uint32_t, 16, frame->upscaled_width);
1479 	int upscaled_luma, downscaled_luma;
1480 	int downscaled_chroma, upscaled_chroma;
1481 	int step_luma, step_chroma;
1482 	int err_luma, err_chroma;
1483 	int initial_luma, initial_chroma;
1484 	int width = 0;
1485 
1486 	if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
1487 		superres_scale_denominator = frame->superres_denom;
1488 
1489 	if (superres_scale_denominator <= SCALE_NUMERATOR)
1490 		goto set_regs;
1491 
1492 	width = (frame->upscaled_width * SCALE_NUMERATOR +
1493 		(superres_scale_denominator / 2)) / superres_scale_denominator;
1494 
1495 	if (width < min_w)
1496 		width = min_w;
1497 
1498 	if (width == frame->upscaled_width)
1499 		goto set_regs;
1500 
1501 	superres_is_scaled = 1;
1502 	upscaled_luma = frame->upscaled_width;
1503 	downscaled_luma = width;
1504 	downscaled_chroma = (downscaled_luma + 1) >> 1;
1505 	upscaled_chroma = (upscaled_luma + 1) >> 1;
1506 	step_luma =
1507 		((downscaled_luma << RS_SCALE_SUBPEL_BITS) +
1508 		 (upscaled_luma / 2)) / upscaled_luma;
1509 	step_chroma =
1510 		((downscaled_chroma << RS_SCALE_SUBPEL_BITS) +
1511 		 (upscaled_chroma / 2)) / upscaled_chroma;
1512 	err_luma =
1513 		(upscaled_luma * step_luma)
1514 		- (downscaled_luma << RS_SCALE_SUBPEL_BITS);
1515 	err_chroma =
1516 		(upscaled_chroma * step_chroma)
1517 		- (downscaled_chroma << RS_SCALE_SUBPEL_BITS);
1518 	initial_luma =
1519 		((-((upscaled_luma - downscaled_luma) << (RS_SCALE_SUBPEL_BITS - 1))
1520 		  + upscaled_luma / 2)
1521 		 / upscaled_luma + (1 << (RS_SCALE_EXTRA_BITS - 1)) - err_luma / 2)
1522 		& RS_SCALE_SUBPEL_MASK;
1523 	initial_chroma =
1524 		((-((upscaled_chroma - downscaled_chroma) << (RS_SCALE_SUBPEL_BITS - 1))
1525 		  + upscaled_chroma / 2)
1526 		 / upscaled_chroma + (1 << (RS_SCALE_EXTRA_BITS - 1)) - err_chroma / 2)
1527 		& RS_SCALE_SUBPEL_MASK;
1528 	superres_luma_step = step_luma;
1529 	superres_chroma_step = step_chroma;
1530 	superres_luma_step_invra =
1531 		((upscaled_luma << RS_SCALE_SUBPEL_BITS) + (downscaled_luma / 2))
1532 		/ downscaled_luma;
1533 	superres_chroma_step_invra =
1534 		((upscaled_chroma << RS_SCALE_SUBPEL_BITS) + (downscaled_chroma / 2))
1535 		/ downscaled_chroma;
1536 	superres_init_luma_subpel_x = initial_luma;
1537 	superres_init_chroma_subpel_x = initial_chroma;
1538 
1539 set_regs:
1540 	hantro_reg_write(vpu, &av1_superres_pic_width, frame->upscaled_width);
1541 
1542 	if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
1543 		hantro_reg_write(vpu, &av1_scale_denom_minus9,
1544 				 frame->superres_denom - SUPERRES_SCALE_DENOMINATOR_MIN);
1545 	else
1546 		hantro_reg_write(vpu, &av1_scale_denom_minus9, frame->superres_denom);
1547 
1548 	hantro_reg_write(vpu, &av1_superres_luma_step, superres_luma_step);
1549 	hantro_reg_write(vpu, &av1_superres_chroma_step, superres_chroma_step);
1550 	hantro_reg_write(vpu, &av1_superres_luma_step_invra,
1551 			 superres_luma_step_invra);
1552 	hantro_reg_write(vpu, &av1_superres_chroma_step_invra,
1553 			 superres_chroma_step_invra);
1554 	hantro_reg_write(vpu, &av1_superres_init_luma_subpel_x,
1555 			 superres_init_luma_subpel_x);
1556 	hantro_reg_write(vpu, &av1_superres_init_chroma_subpel_x,
1557 			 superres_init_chroma_subpel_x);
1558 	hantro_reg_write(vpu, &av1_superres_is_scaled, superres_is_scaled);
1559 
1560 	hantro_write_addr(vpu, AV1_SR_COL, av1_dec->sr_col.dma);
1561 }
1562 
rockchip_vpu981_av1_dec_set_picture_dimensions(struct hantro_ctx * ctx)1563 static void rockchip_vpu981_av1_dec_set_picture_dimensions(struct hantro_ctx *ctx)
1564 {
1565 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1566 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1567 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1568 	struct hantro_dev *vpu = ctx->dev;
1569 	int pic_width_in_cbs = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
1570 	int pic_height_in_cbs = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
1571 	int pic_width_pad = ALIGN(frame->frame_width_minus_1 + 1, 8)
1572 			    - (frame->frame_width_minus_1 + 1);
1573 	int pic_height_pad = ALIGN(frame->frame_height_minus_1 + 1, 8)
1574 			     - (frame->frame_height_minus_1 + 1);
1575 
1576 	hantro_reg_write(vpu, &av1_pic_width_in_cbs, pic_width_in_cbs);
1577 	hantro_reg_write(vpu, &av1_pic_height_in_cbs, pic_height_in_cbs);
1578 	hantro_reg_write(vpu, &av1_pic_width_pad, pic_width_pad);
1579 	hantro_reg_write(vpu, &av1_pic_height_pad, pic_height_pad);
1580 
1581 	rockchip_vpu981_av1_dec_set_superres_params(ctx);
1582 }
1583 
rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx * ctx)1584 static void rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx *ctx)
1585 {
1586 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1587 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1588 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1589 	struct hantro_dev *vpu = ctx->dev;
1590 	bool use_ref_frame_mvs =
1591 	    !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS);
1592 	int cur_frame_offset = frame->order_hint;
1593 	int alt_frame_offset = 0;
1594 	int gld_frame_offset = 0;
1595 	int bwd_frame_offset = 0;
1596 	int alt2_frame_offset = 0;
1597 	int refs_selected[3] = { 0, 0, 0 };
1598 	int cur_mi_cols = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
1599 	int cur_mi_rows = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
1600 	int cur_offset[V4L2_AV1_TOTAL_REFS_PER_FRAME - 1];
1601 	int cur_roffset[V4L2_AV1_TOTAL_REFS_PER_FRAME - 1];
1602 	int mf_types[3] = { 0, 0, 0 };
1603 	int ref_stamp = 2;
1604 	int ref_ind = 0;
1605 	int rf, idx;
1606 
1607 	alt_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT_BUF_IDX);
1608 	gld_frame_offset = rockchip_vpu981_get_order_hint(ctx, GLD_BUF_IDX);
1609 	bwd_frame_offset = rockchip_vpu981_get_order_hint(ctx, BWD_BUF_IDX);
1610 	alt2_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT2_BUF_IDX);
1611 
1612 	idx = rockchip_vpu981_get_frame_index(ctx, LST_BUF_IDX);
1613 	if (idx >= 0) {
1614 		int alt_frame_offset_in_lst =
1615 			av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME];
1616 		bool is_lst_overlay =
1617 		    (alt_frame_offset_in_lst == gld_frame_offset);
1618 
1619 		if (!is_lst_overlay) {
1620 			int lst_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1621 			int lst_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1622 			bool lst_intra_only =
1623 			    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1624 
1625 			if (lst_mi_cols == cur_mi_cols &&
1626 			    lst_mi_rows == cur_mi_rows && !lst_intra_only) {
1627 				mf_types[ref_ind] = V4L2_AV1_REF_LAST_FRAME;
1628 				refs_selected[ref_ind++] = LST_BUF_IDX;
1629 			}
1630 		}
1631 		ref_stamp--;
1632 	}
1633 
1634 	idx = rockchip_vpu981_get_frame_index(ctx, BWD_BUF_IDX);
1635 	if (rockchip_vpu981_av1_dec_get_dist(ctx, bwd_frame_offset, cur_frame_offset) > 0) {
1636 		int bwd_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1637 		int bwd_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1638 		bool bwd_intra_only =
1639 		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1640 
1641 		if (bwd_mi_cols == cur_mi_cols && bwd_mi_rows == cur_mi_rows &&
1642 		    !bwd_intra_only) {
1643 			mf_types[ref_ind] = V4L2_AV1_REF_BWDREF_FRAME;
1644 			refs_selected[ref_ind++] = BWD_BUF_IDX;
1645 			ref_stamp--;
1646 		}
1647 	}
1648 
1649 	idx = rockchip_vpu981_get_frame_index(ctx, ALT2_BUF_IDX);
1650 	if (rockchip_vpu981_av1_dec_get_dist(ctx, alt2_frame_offset, cur_frame_offset) > 0) {
1651 		int alt2_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1652 		int alt2_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1653 		bool alt2_intra_only =
1654 		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1655 
1656 		if (alt2_mi_cols == cur_mi_cols && alt2_mi_rows == cur_mi_rows &&
1657 		    !alt2_intra_only) {
1658 			mf_types[ref_ind] = V4L2_AV1_REF_ALTREF2_FRAME;
1659 			refs_selected[ref_ind++] = ALT2_BUF_IDX;
1660 			ref_stamp--;
1661 		}
1662 	}
1663 
1664 	idx = rockchip_vpu981_get_frame_index(ctx, ALT_BUF_IDX);
1665 	if (rockchip_vpu981_av1_dec_get_dist(ctx, alt_frame_offset, cur_frame_offset) > 0 &&
1666 	    ref_stamp >= 0) {
1667 		int alt_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1668 		int alt_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1669 		bool alt_intra_only =
1670 		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1671 
1672 		if (alt_mi_cols == cur_mi_cols && alt_mi_rows == cur_mi_rows &&
1673 		    !alt_intra_only) {
1674 			mf_types[ref_ind] = V4L2_AV1_REF_ALTREF_FRAME;
1675 			refs_selected[ref_ind++] = ALT_BUF_IDX;
1676 			ref_stamp--;
1677 		}
1678 	}
1679 
1680 	idx = rockchip_vpu981_get_frame_index(ctx, LST2_BUF_IDX);
1681 	if (idx >= 0 && ref_stamp >= 0) {
1682 		int lst2_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1683 		int lst2_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1684 		bool lst2_intra_only =
1685 		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1686 
1687 		if (lst2_mi_cols == cur_mi_cols && lst2_mi_rows == cur_mi_rows &&
1688 		    !lst2_intra_only) {
1689 			mf_types[ref_ind] = V4L2_AV1_REF_LAST2_FRAME;
1690 			refs_selected[ref_ind++] = LST2_BUF_IDX;
1691 			ref_stamp--;
1692 		}
1693 	}
1694 
1695 	for (rf = 0; rf < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; ++rf) {
1696 		idx = rockchip_vpu981_get_frame_index(ctx, rf);
1697 		if (idx >= 0) {
1698 			int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, rf);
1699 
1700 			cur_offset[rf] =
1701 			    rockchip_vpu981_av1_dec_get_dist(ctx, cur_frame_offset, rf_order_hint);
1702 			cur_roffset[rf] =
1703 			    rockchip_vpu981_av1_dec_get_dist(ctx, rf_order_hint, cur_frame_offset);
1704 		} else {
1705 			cur_offset[rf] = 0;
1706 			cur_roffset[rf] = 0;
1707 		}
1708 	}
1709 
1710 	hantro_reg_write(vpu, &av1_use_temporal0_mvs, 0);
1711 	hantro_reg_write(vpu, &av1_use_temporal1_mvs, 0);
1712 	hantro_reg_write(vpu, &av1_use_temporal2_mvs, 0);
1713 	hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
1714 
1715 	hantro_reg_write(vpu, &av1_mf1_last_offset, 0);
1716 	hantro_reg_write(vpu, &av1_mf1_last2_offset, 0);
1717 	hantro_reg_write(vpu, &av1_mf1_last3_offset, 0);
1718 	hantro_reg_write(vpu, &av1_mf1_golden_offset, 0);
1719 	hantro_reg_write(vpu, &av1_mf1_bwdref_offset, 0);
1720 	hantro_reg_write(vpu, &av1_mf1_altref2_offset, 0);
1721 	hantro_reg_write(vpu, &av1_mf1_altref_offset, 0);
1722 
1723 	if (use_ref_frame_mvs && ref_ind > 0 &&
1724 	    cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1725 	    cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1726 		int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[0]);
1727 		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[0]);
1728 		u32 *oh = av1_dec->frame_refs[idx].order_hints;
1729 		int val;
1730 
1731 		hantro_reg_write(vpu, &av1_use_temporal0_mvs, 1);
1732 
1733 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1734 		hantro_reg_write(vpu, &av1_mf1_last_offset, val);
1735 
1736 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1737 		hantro_reg_write(vpu, &av1_mf1_last2_offset, val);
1738 
1739 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1740 		hantro_reg_write(vpu, &av1_mf1_last3_offset, val);
1741 
1742 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1743 		hantro_reg_write(vpu, &av1_mf1_golden_offset, val);
1744 
1745 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1746 		hantro_reg_write(vpu, &av1_mf1_bwdref_offset, val);
1747 
1748 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1749 		hantro_reg_write(vpu, &av1_mf1_altref2_offset, val);
1750 
1751 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1752 		hantro_reg_write(vpu, &av1_mf1_altref_offset, val);
1753 	}
1754 
1755 	hantro_reg_write(vpu, &av1_mf2_last_offset, 0);
1756 	hantro_reg_write(vpu, &av1_mf2_last2_offset, 0);
1757 	hantro_reg_write(vpu, &av1_mf2_last3_offset, 0);
1758 	hantro_reg_write(vpu, &av1_mf2_golden_offset, 0);
1759 	hantro_reg_write(vpu, &av1_mf2_bwdref_offset, 0);
1760 	hantro_reg_write(vpu, &av1_mf2_altref2_offset, 0);
1761 	hantro_reg_write(vpu, &av1_mf2_altref_offset, 0);
1762 
1763 	if (use_ref_frame_mvs && ref_ind > 1 &&
1764 	    cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1765 	    cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1766 		int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[1]);
1767 		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[1]);
1768 		u32 *oh = av1_dec->frame_refs[idx].order_hints;
1769 		int val;
1770 
1771 		hantro_reg_write(vpu, &av1_use_temporal1_mvs, 1);
1772 
1773 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1774 		hantro_reg_write(vpu, &av1_mf2_last_offset, val);
1775 
1776 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1777 		hantro_reg_write(vpu, &av1_mf2_last2_offset, val);
1778 
1779 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1780 		hantro_reg_write(vpu, &av1_mf2_last3_offset, val);
1781 
1782 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1783 		hantro_reg_write(vpu, &av1_mf2_golden_offset, val);
1784 
1785 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1786 		hantro_reg_write(vpu, &av1_mf2_bwdref_offset, val);
1787 
1788 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1789 		hantro_reg_write(vpu, &av1_mf2_altref2_offset, val);
1790 
1791 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1792 		hantro_reg_write(vpu, &av1_mf2_altref_offset, val);
1793 	}
1794 
1795 	hantro_reg_write(vpu, &av1_mf3_last_offset, 0);
1796 	hantro_reg_write(vpu, &av1_mf3_last2_offset, 0);
1797 	hantro_reg_write(vpu, &av1_mf3_last3_offset, 0);
1798 	hantro_reg_write(vpu, &av1_mf3_golden_offset, 0);
1799 	hantro_reg_write(vpu, &av1_mf3_bwdref_offset, 0);
1800 	hantro_reg_write(vpu, &av1_mf3_altref2_offset, 0);
1801 	hantro_reg_write(vpu, &av1_mf3_altref_offset, 0);
1802 
1803 	if (use_ref_frame_mvs && ref_ind > 2 &&
1804 	    cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1805 	    cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1806 		int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[2]);
1807 		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[2]);
1808 		u32 *oh = av1_dec->frame_refs[idx].order_hints;
1809 		int val;
1810 
1811 		hantro_reg_write(vpu, &av1_use_temporal2_mvs, 1);
1812 
1813 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1814 		hantro_reg_write(vpu, &av1_mf3_last_offset, val);
1815 
1816 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1817 		hantro_reg_write(vpu, &av1_mf3_last2_offset, val);
1818 
1819 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1820 		hantro_reg_write(vpu, &av1_mf3_last3_offset, val);
1821 
1822 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1823 		hantro_reg_write(vpu, &av1_mf3_golden_offset, val);
1824 
1825 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1826 		hantro_reg_write(vpu, &av1_mf3_bwdref_offset, val);
1827 
1828 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1829 		hantro_reg_write(vpu, &av1_mf3_altref2_offset, val);
1830 
1831 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1832 		hantro_reg_write(vpu, &av1_mf3_altref_offset, val);
1833 	}
1834 
1835 	hantro_reg_write(vpu, &av1_cur_last_offset, cur_offset[0]);
1836 	hantro_reg_write(vpu, &av1_cur_last2_offset, cur_offset[1]);
1837 	hantro_reg_write(vpu, &av1_cur_last3_offset, cur_offset[2]);
1838 	hantro_reg_write(vpu, &av1_cur_golden_offset, cur_offset[3]);
1839 	hantro_reg_write(vpu, &av1_cur_bwdref_offset, cur_offset[4]);
1840 	hantro_reg_write(vpu, &av1_cur_altref2_offset, cur_offset[5]);
1841 	hantro_reg_write(vpu, &av1_cur_altref_offset, cur_offset[6]);
1842 
1843 	hantro_reg_write(vpu, &av1_cur_last_roffset, cur_roffset[0]);
1844 	hantro_reg_write(vpu, &av1_cur_last2_roffset, cur_roffset[1]);
1845 	hantro_reg_write(vpu, &av1_cur_last3_roffset, cur_roffset[2]);
1846 	hantro_reg_write(vpu, &av1_cur_golden_roffset, cur_roffset[3]);
1847 	hantro_reg_write(vpu, &av1_cur_bwdref_roffset, cur_roffset[4]);
1848 	hantro_reg_write(vpu, &av1_cur_altref2_roffset, cur_roffset[5]);
1849 	hantro_reg_write(vpu, &av1_cur_altref_roffset, cur_roffset[6]);
1850 
1851 	hantro_reg_write(vpu, &av1_mf1_type, mf_types[0] - V4L2_AV1_REF_LAST_FRAME);
1852 	hantro_reg_write(vpu, &av1_mf2_type, mf_types[1] - V4L2_AV1_REF_LAST_FRAME);
1853 	hantro_reg_write(vpu, &av1_mf3_type, mf_types[2] - V4L2_AV1_REF_LAST_FRAME);
1854 }
1855 
rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx * ctx)1856 static void rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx *ctx)
1857 {
1858 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1859 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1860 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1861 	int frame_type = frame->frame_type;
1862 	bool allow_intrabc = !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC);
1863 	int ref_count[AV1DEC_MAX_PIC_BUFFERS] = { 0 };
1864 	struct hantro_dev *vpu = ctx->dev;
1865 	int i, ref_frames = 0;
1866 	bool scale_enable = false;
1867 
1868 	if (IS_INTRA(frame_type) && !allow_intrabc)
1869 		return;
1870 
1871 	if (!allow_intrabc) {
1872 		for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) {
1873 			int idx = rockchip_vpu981_get_frame_index(ctx, i);
1874 
1875 			if (idx >= 0)
1876 				ref_count[idx]++;
1877 		}
1878 
1879 		for (i = 0; i < AV1DEC_MAX_PIC_BUFFERS; i++) {
1880 			if (ref_count[i])
1881 				ref_frames++;
1882 		}
1883 	} else {
1884 		ref_frames = 1;
1885 	}
1886 	hantro_reg_write(vpu, &av1_ref_frames, ref_frames);
1887 
1888 	rockchip_vpu981_av1_dec_set_frame_sign_bias(ctx);
1889 
1890 	for (i = V4L2_AV1_REF_LAST_FRAME; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++) {
1891 		u32 ref = i - 1;
1892 		int idx = 0;
1893 		int width, height;
1894 
1895 		if (allow_intrabc) {
1896 			idx = av1_dec->current_frame_index;
1897 			width = frame->frame_width_minus_1 + 1;
1898 			height = frame->frame_height_minus_1 + 1;
1899 		} else {
1900 			if (rockchip_vpu981_get_frame_index(ctx, ref) > 0)
1901 				idx = rockchip_vpu981_get_frame_index(ctx, ref);
1902 			width = av1_dec->frame_refs[idx].width;
1903 			height = av1_dec->frame_refs[idx].height;
1904 		}
1905 
1906 		scale_enable |=
1907 		    rockchip_vpu981_av1_dec_set_ref(ctx, ref, idx, width,
1908 						    height);
1909 
1910 		rockchip_vpu981_av1_dec_set_sign_bias(ctx, ref,
1911 						      av1_dec->ref_frame_sign_bias[i]);
1912 	}
1913 	hantro_reg_write(vpu, &av1_ref_scaling_enable, scale_enable);
1914 
1915 	hantro_reg_write(vpu, &av1_ref0_gm_mode,
1916 			 frame->global_motion.type[V4L2_AV1_REF_LAST_FRAME]);
1917 	hantro_reg_write(vpu, &av1_ref1_gm_mode,
1918 			 frame->global_motion.type[V4L2_AV1_REF_LAST2_FRAME]);
1919 	hantro_reg_write(vpu, &av1_ref2_gm_mode,
1920 			 frame->global_motion.type[V4L2_AV1_REF_LAST3_FRAME]);
1921 	hantro_reg_write(vpu, &av1_ref3_gm_mode,
1922 			 frame->global_motion.type[V4L2_AV1_REF_GOLDEN_FRAME]);
1923 	hantro_reg_write(vpu, &av1_ref4_gm_mode,
1924 			 frame->global_motion.type[V4L2_AV1_REF_BWDREF_FRAME]);
1925 	hantro_reg_write(vpu, &av1_ref5_gm_mode,
1926 			 frame->global_motion.type[V4L2_AV1_REF_ALTREF2_FRAME]);
1927 	hantro_reg_write(vpu, &av1_ref6_gm_mode,
1928 			 frame->global_motion.type[V4L2_AV1_REF_ALTREF_FRAME]);
1929 
1930 	rockchip_vpu981_av1_dec_set_other_frames(ctx);
1931 }
1932 
rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx * ctx)1933 static void rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx *ctx)
1934 {
1935 	struct hantro_dev *vpu = ctx->dev;
1936 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1937 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1938 
1939 	hantro_reg_write(vpu, &av1_skip_mode,
1940 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SKIP_MODE_PRESENT));
1941 	hantro_reg_write(vpu, &av1_tempor_mvp_e,
1942 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS));
1943 	hantro_reg_write(vpu, &av1_delta_lf_res_log,
1944 			 ctrls->frame->loop_filter.delta_lf_res);
1945 	hantro_reg_write(vpu, &av1_delta_lf_multi,
1946 			 !!(ctrls->frame->loop_filter.flags
1947 			    & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI));
1948 	hantro_reg_write(vpu, &av1_delta_lf_present,
1949 			 !!(ctrls->frame->loop_filter.flags
1950 			    & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT));
1951 	hantro_reg_write(vpu, &av1_disable_cdf_update,
1952 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_CDF_UPDATE));
1953 	hantro_reg_write(vpu, &av1_allow_warp,
1954 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_WARPED_MOTION));
1955 	hantro_reg_write(vpu, &av1_show_frame,
1956 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SHOW_FRAME));
1957 	hantro_reg_write(vpu, &av1_switchable_motion_mode,
1958 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_IS_MOTION_MODE_SWITCHABLE));
1959 	hantro_reg_write(vpu, &av1_enable_cdef,
1960 			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF));
1961 	hantro_reg_write(vpu, &av1_allow_masked_compound,
1962 			 !!(ctrls->sequence->flags
1963 			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_MASKED_COMPOUND));
1964 	hantro_reg_write(vpu, &av1_allow_interintra,
1965 			 !!(ctrls->sequence->flags
1966 			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTERINTRA_COMPOUND));
1967 	hantro_reg_write(vpu, &av1_enable_intra_edge_filter,
1968 			 !!(ctrls->sequence->flags
1969 			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTRA_EDGE_FILTER));
1970 	hantro_reg_write(vpu, &av1_allow_filter_intra,
1971 			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_FILTER_INTRA));
1972 	hantro_reg_write(vpu, &av1_enable_jnt_comp,
1973 			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_JNT_COMP));
1974 	hantro_reg_write(vpu, &av1_enable_dual_filter,
1975 			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_DUAL_FILTER));
1976 	hantro_reg_write(vpu, &av1_reduced_tx_set_used,
1977 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REDUCED_TX_SET));
1978 	hantro_reg_write(vpu, &av1_allow_screen_content_tools,
1979 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS));
1980 	hantro_reg_write(vpu, &av1_allow_intrabc,
1981 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC));
1982 
1983 	if (!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS))
1984 		hantro_reg_write(vpu, &av1_force_interger_mv, 0);
1985 	else
1986 		hantro_reg_write(vpu, &av1_force_interger_mv,
1987 				 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_FORCE_INTEGER_MV));
1988 
1989 	hantro_reg_write(vpu, &av1_blackwhite_e, 0);
1990 	hantro_reg_write(vpu, &av1_delta_q_res_log, ctrls->frame->quantization.delta_q_res);
1991 	hantro_reg_write(vpu, &av1_delta_q_present,
1992 			 !!(ctrls->frame->quantization.flags
1993 			    & V4L2_AV1_QUANTIZATION_FLAG_DELTA_Q_PRESENT));
1994 
1995 	hantro_reg_write(vpu, &av1_idr_pic_e, !ctrls->frame->frame_type);
1996 	hantro_reg_write(vpu, &av1_quant_base_qindex, ctrls->frame->quantization.base_q_idx);
1997 	hantro_reg_write(vpu, &av1_bit_depth_y_minus8, ctx->bit_depth - 8);
1998 	hantro_reg_write(vpu, &av1_bit_depth_c_minus8, ctx->bit_depth - 8);
1999 
2000 	hantro_reg_write(vpu, &av1_mcomp_filt_type, ctrls->frame->interpolation_filter);
2001 	hantro_reg_write(vpu, &av1_high_prec_mv_e,
2002 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_HIGH_PRECISION_MV));
2003 	hantro_reg_write(vpu, &av1_comp_pred_mode,
2004 			 (ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REFERENCE_SELECT) ? 2 : 0);
2005 	hantro_reg_write(vpu, &av1_transform_mode, (ctrls->frame->tx_mode == 1) ? 3 : 4);
2006 	hantro_reg_write(vpu, &av1_max_cb_size,
2007 			 (ctrls->sequence->flags
2008 			  & V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK) ? 7 : 6);
2009 	hantro_reg_write(vpu, &av1_min_cb_size, 3);
2010 
2011 	hantro_reg_write(vpu, &av1_comp_pred_fixed_ref, 0);
2012 	hantro_reg_write(vpu, &av1_comp_pred_var_ref0_av1, 0);
2013 	hantro_reg_write(vpu, &av1_comp_pred_var_ref1_av1, 0);
2014 	hantro_reg_write(vpu, &av1_filt_level_seg0, 0);
2015 	hantro_reg_write(vpu, &av1_filt_level_seg1, 0);
2016 	hantro_reg_write(vpu, &av1_filt_level_seg2, 0);
2017 	hantro_reg_write(vpu, &av1_filt_level_seg3, 0);
2018 	hantro_reg_write(vpu, &av1_filt_level_seg4, 0);
2019 	hantro_reg_write(vpu, &av1_filt_level_seg5, 0);
2020 	hantro_reg_write(vpu, &av1_filt_level_seg6, 0);
2021 	hantro_reg_write(vpu, &av1_filt_level_seg7, 0);
2022 
2023 	hantro_reg_write(vpu, &av1_qp_delta_y_dc_av1, ctrls->frame->quantization.delta_q_y_dc);
2024 	hantro_reg_write(vpu, &av1_qp_delta_ch_dc_av1, ctrls->frame->quantization.delta_q_u_dc);
2025 	hantro_reg_write(vpu, &av1_qp_delta_ch_ac_av1, ctrls->frame->quantization.delta_q_u_ac);
2026 	if (ctrls->frame->quantization.flags & V4L2_AV1_QUANTIZATION_FLAG_USING_QMATRIX) {
2027 		hantro_reg_write(vpu, &av1_qmlevel_y, ctrls->frame->quantization.qm_y);
2028 		hantro_reg_write(vpu, &av1_qmlevel_u, ctrls->frame->quantization.qm_u);
2029 		hantro_reg_write(vpu, &av1_qmlevel_v, ctrls->frame->quantization.qm_v);
2030 	} else {
2031 		hantro_reg_write(vpu, &av1_qmlevel_y, 0xff);
2032 		hantro_reg_write(vpu, &av1_qmlevel_u, 0xff);
2033 		hantro_reg_write(vpu, &av1_qmlevel_v, 0xff);
2034 	}
2035 
2036 	hantro_reg_write(vpu, &av1_lossless_e, rockchip_vpu981_av1_dec_is_lossless(ctx));
2037 	hantro_reg_write(vpu, &av1_quant_delta_v_dc, ctrls->frame->quantization.delta_q_v_dc);
2038 	hantro_reg_write(vpu, &av1_quant_delta_v_ac, ctrls->frame->quantization.delta_q_v_ac);
2039 
2040 	hantro_reg_write(vpu, &av1_skip_ref0,
2041 			 (ctrls->frame->skip_mode_frame[0]) ? ctrls->frame->skip_mode_frame[0] : 1);
2042 	hantro_reg_write(vpu, &av1_skip_ref1,
2043 			 (ctrls->frame->skip_mode_frame[1]) ? ctrls->frame->skip_mode_frame[1] : 1);
2044 
2045 	hantro_write_addr(vpu, AV1_MC_SYNC_CURR, av1_dec->tile_buf.dma);
2046 	hantro_write_addr(vpu, AV1_MC_SYNC_LEFT, av1_dec->tile_buf.dma);
2047 }
2048 
2049 static void
rockchip_vpu981_av1_dec_set_input_buffer(struct hantro_ctx * ctx,struct vb2_v4l2_buffer * vb2_src)2050 rockchip_vpu981_av1_dec_set_input_buffer(struct hantro_ctx *ctx,
2051 					 struct vb2_v4l2_buffer *vb2_src)
2052 {
2053 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
2054 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
2055 	const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
2056 	    ctrls->tile_group_entry;
2057 	struct hantro_dev *vpu = ctx->dev;
2058 	dma_addr_t src_dma;
2059 	u32 src_len, src_buf_len;
2060 	int start_bit, offset;
2061 
2062 	src_dma = vb2_dma_contig_plane_dma_addr(&vb2_src->vb2_buf, 0);
2063 	src_len = vb2_get_plane_payload(&vb2_src->vb2_buf, 0);
2064 	src_buf_len = vb2_plane_size(&vb2_src->vb2_buf, 0);
2065 
2066 	start_bit = (group_entry[0].tile_offset & 0xf) * 8;
2067 	offset = group_entry[0].tile_offset & ~0xf;
2068 
2069 	hantro_reg_write(vpu, &av1_strm_buffer_len, src_buf_len);
2070 	hantro_reg_write(vpu, &av1_strm_start_bit, start_bit);
2071 	hantro_reg_write(vpu, &av1_stream_len, src_len);
2072 	hantro_reg_write(vpu, &av1_strm_start_offset, 0);
2073 	hantro_write_addr(vpu, AV1_INPUT_STREAM, src_dma + offset);
2074 }
2075 
2076 static void
rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx * ctx)2077 rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx *ctx)
2078 {
2079 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
2080 	struct hantro_dev *vpu = ctx->dev;
2081 	struct hantro_decoded_buffer *dst;
2082 	struct vb2_v4l2_buffer *vb2_dst;
2083 	dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
2084 	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
2085 	size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
2086 
2087 	vb2_dst = av1_dec->frame_refs[av1_dec->current_frame_index].vb2_ref;
2088 	dst = vb2_to_hantro_decoded_buf(&vb2_dst->vb2_buf);
2089 	luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
2090 	chroma_addr = luma_addr + cr_offset;
2091 	mv_addr = luma_addr + mv_offset;
2092 
2093 	hantro_write_addr(vpu, AV1_TILE_OUT_LU, luma_addr);
2094 	hantro_write_addr(vpu, AV1_TILE_OUT_CH, chroma_addr);
2095 	hantro_write_addr(vpu, AV1_TILE_OUT_MV, mv_addr);
2096 }
2097 
rockchip_vpu981_av1_dec_run(struct hantro_ctx * ctx)2098 int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx)
2099 {
2100 	struct hantro_dev *vpu = ctx->dev;
2101 	struct vb2_v4l2_buffer *vb2_src;
2102 	int ret;
2103 
2104 	hantro_start_prepare_run(ctx);
2105 
2106 	ret = rockchip_vpu981_av1_dec_prepare_run(ctx);
2107 	if (ret)
2108 		goto prepare_error;
2109 
2110 	vb2_src = hantro_get_src_buf(ctx);
2111 	if (!vb2_src) {
2112 		ret = -EINVAL;
2113 		goto prepare_error;
2114 	}
2115 
2116 	rockchip_vpu981_av1_dec_clean_refs(ctx);
2117 	rockchip_vpu981_av1_dec_frame_ref(ctx, vb2_src->vb2_buf.timestamp);
2118 
2119 	rockchip_vpu981_av1_dec_set_parameters(ctx);
2120 	rockchip_vpu981_av1_dec_set_global_model(ctx);
2121 	rockchip_vpu981_av1_dec_set_tile_info(ctx);
2122 	rockchip_vpu981_av1_dec_set_reference_frames(ctx);
2123 	rockchip_vpu981_av1_dec_set_segmentation(ctx);
2124 	rockchip_vpu981_av1_dec_set_loopfilter(ctx);
2125 	rockchip_vpu981_av1_dec_set_picture_dimensions(ctx);
2126 	rockchip_vpu981_av1_dec_set_cdef(ctx);
2127 	rockchip_vpu981_av1_dec_set_lr(ctx);
2128 	rockchip_vpu981_av1_dec_set_fgs(ctx);
2129 	rockchip_vpu981_av1_dec_set_prob(ctx);
2130 
2131 	hantro_reg_write(vpu, &av1_dec_mode, AV1_DEC_MODE);
2132 	hantro_reg_write(vpu, &av1_dec_out_ec_byte_word, 0);
2133 	hantro_reg_write(vpu, &av1_write_mvs_e, 1);
2134 	hantro_reg_write(vpu, &av1_dec_out_ec_bypass, 1);
2135 	hantro_reg_write(vpu, &av1_dec_clk_gate_e, 1);
2136 
2137 	hantro_reg_write(vpu, &av1_dec_abort_e, 0);
2138 	hantro_reg_write(vpu, &av1_dec_tile_int_e, 0);
2139 
2140 	hantro_reg_write(vpu, &av1_dec_alignment, 64);
2141 	hantro_reg_write(vpu, &av1_apf_disable, 0);
2142 	hantro_reg_write(vpu, &av1_apf_threshold, 8);
2143 	hantro_reg_write(vpu, &av1_dec_buswidth, 2);
2144 	hantro_reg_write(vpu, &av1_dec_max_burst, 16);
2145 	hantro_reg_write(vpu, &av1_error_conceal_e, 0);
2146 	hantro_reg_write(vpu, &av1_axi_rd_ostd_threshold, 64);
2147 	hantro_reg_write(vpu, &av1_axi_wr_ostd_threshold, 64);
2148 
2149 	hantro_reg_write(vpu, &av1_ext_timeout_cycles, 0xfffffff);
2150 	hantro_reg_write(vpu, &av1_ext_timeout_override_e, 1);
2151 	hantro_reg_write(vpu, &av1_timeout_cycles, 0xfffffff);
2152 	hantro_reg_write(vpu, &av1_timeout_override_e, 1);
2153 
2154 	rockchip_vpu981_av1_dec_set_output_buffer(ctx);
2155 	rockchip_vpu981_av1_dec_set_input_buffer(ctx, vb2_src);
2156 
2157 	hantro_end_prepare_run(ctx);
2158 
2159 	hantro_reg_write(vpu, &av1_dec_e, 1);
2160 
2161 	return 0;
2162 
2163 prepare_error:
2164 	hantro_end_prepare_run(ctx);
2165 	hantro_irq_done(vpu, VB2_BUF_STATE_ERROR);
2166 	return ret;
2167 }
2168 
rockchip_vpu981_postproc_enable(struct hantro_ctx * ctx)2169 static void rockchip_vpu981_postproc_enable(struct hantro_ctx *ctx)
2170 {
2171 	struct hantro_dev *vpu = ctx->dev;
2172 	int width = ctx->dst_fmt.width;
2173 	int height = ctx->dst_fmt.height;
2174 	struct vb2_v4l2_buffer *vb2_dst;
2175 	size_t chroma_offset;
2176 	dma_addr_t dst_dma;
2177 
2178 	vb2_dst = hantro_get_dst_buf(ctx);
2179 
2180 	dst_dma = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
2181 	chroma_offset = ctx->dst_fmt.plane_fmt[0].bytesperline *
2182 	    ctx->dst_fmt.height;
2183 
2184 	/* enable post processor */
2185 	hantro_reg_write(vpu, &av1_pp_out_e, 1);
2186 	hantro_reg_write(vpu, &av1_pp_in_format, 0);
2187 	hantro_reg_write(vpu, &av1_pp0_dup_hor, 1);
2188 	hantro_reg_write(vpu, &av1_pp0_dup_ver, 1);
2189 
2190 	hantro_reg_write(vpu, &av1_pp_in_height, height / 2);
2191 	hantro_reg_write(vpu, &av1_pp_in_width, width / 2);
2192 	hantro_reg_write(vpu, &av1_pp_out_height, height);
2193 	hantro_reg_write(vpu, &av1_pp_out_width, width);
2194 	hantro_reg_write(vpu, &av1_pp_out_y_stride,
2195 			 ctx->dst_fmt.plane_fmt[0].bytesperline);
2196 	hantro_reg_write(vpu, &av1_pp_out_c_stride,
2197 			 ctx->dst_fmt.plane_fmt[0].bytesperline);
2198 	switch (ctx->dst_fmt.pixelformat) {
2199 	case V4L2_PIX_FMT_P010:
2200 		hantro_reg_write(vpu, &av1_pp_out_format, 1);
2201 		break;
2202 	case V4L2_PIX_FMT_NV12:
2203 		hantro_reg_write(vpu, &av1_pp_out_format, 3);
2204 		break;
2205 	default:
2206 		hantro_reg_write(vpu, &av1_pp_out_format, 0);
2207 	}
2208 
2209 	hantro_reg_write(vpu, &av1_ppd_blend_exist, 0);
2210 	hantro_reg_write(vpu, &av1_ppd_dith_exist, 0);
2211 	hantro_reg_write(vpu, &av1_ablend_crop_e, 0);
2212 	hantro_reg_write(vpu, &av1_pp_format_customer1_e, 0);
2213 	hantro_reg_write(vpu, &av1_pp_crop_exist, 0);
2214 	hantro_reg_write(vpu, &av1_pp_up_level, 0);
2215 	hantro_reg_write(vpu, &av1_pp_down_level, 0);
2216 	hantro_reg_write(vpu, &av1_pp_exist, 0);
2217 
2218 	hantro_write_addr(vpu, AV1_PP_OUT_LU, dst_dma);
2219 	hantro_write_addr(vpu, AV1_PP_OUT_CH, dst_dma + chroma_offset);
2220 }
2221 
rockchip_vpu981_postproc_disable(struct hantro_ctx * ctx)2222 static void rockchip_vpu981_postproc_disable(struct hantro_ctx *ctx)
2223 {
2224 	struct hantro_dev *vpu = ctx->dev;
2225 
2226 	/* disable post processor */
2227 	hantro_reg_write(vpu, &av1_pp_out_e, 0);
2228 }
2229 
2230 const struct hantro_postproc_ops rockchip_vpu981_postproc_ops = {
2231 	.enable = rockchip_vpu981_postproc_enable,
2232 	.disable = rockchip_vpu981_postproc_disable,
2233 };
2234