1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (c) 2023, Collabora
4 *
5 * Author: Benjamin Gaignard <benjamin.gaignard@collabora.com>
6 */
7
8 #include <media/v4l2-mem2mem.h>
9 #include "hantro.h"
10 #include "hantro_v4l2.h"
11 #include "rockchip_vpu981_regs.h"
12
13 #define AV1_DEC_MODE 17
14 #define GM_GLOBAL_MODELS_PER_FRAME 7
15 #define GLOBAL_MODEL_TOTAL_SIZE (6 * 4 + 4 * 2)
16 #define GLOBAL_MODEL_SIZE ALIGN(GM_GLOBAL_MODELS_PER_FRAME * GLOBAL_MODEL_TOTAL_SIZE, 2048)
17 #define AV1_MAX_TILES 128
18 #define AV1_TILE_INFO_SIZE (AV1_MAX_TILES * 16)
19 #define AV1DEC_MAX_PIC_BUFFERS 24
20 #define AV1_REF_SCALE_SHIFT 14
21 #define AV1_INVALID_IDX -1
22 #define MAX_FRAME_DISTANCE 31
23 #define AV1_PRIMARY_REF_NONE 7
24 #define AV1_TILE_SIZE ALIGN(32 * 128, 4096)
25 /*
26 * These 3 values aren't defined enum v4l2_av1_segment_feature because
27 * they are not part of the specification
28 */
29 #define V4L2_AV1_SEG_LVL_ALT_LF_Y_H 2
30 #define V4L2_AV1_SEG_LVL_ALT_LF_U 3
31 #define V4L2_AV1_SEG_LVL_ALT_LF_V 4
32
33 #define SUPERRES_SCALE_BITS 3
34 #define SCALE_NUMERATOR 8
35 #define SUPERRES_SCALE_DENOMINATOR_MIN (SCALE_NUMERATOR + 1)
36
37 #define RS_SUBPEL_BITS 6
38 #define RS_SUBPEL_MASK ((1 << RS_SUBPEL_BITS) - 1)
39 #define RS_SCALE_SUBPEL_BITS 14
40 #define RS_SCALE_SUBPEL_MASK ((1 << RS_SCALE_SUBPEL_BITS) - 1)
41 #define RS_SCALE_EXTRA_BITS (RS_SCALE_SUBPEL_BITS - RS_SUBPEL_BITS)
42 #define RS_SCALE_EXTRA_OFF (1 << (RS_SCALE_EXTRA_BITS - 1))
43
44 #define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
45
46 #define LST_BUF_IDX (V4L2_AV1_REF_LAST_FRAME - V4L2_AV1_REF_LAST_FRAME)
47 #define LST2_BUF_IDX (V4L2_AV1_REF_LAST2_FRAME - V4L2_AV1_REF_LAST_FRAME)
48 #define LST3_BUF_IDX (V4L2_AV1_REF_LAST3_FRAME - V4L2_AV1_REF_LAST_FRAME)
49 #define GLD_BUF_IDX (V4L2_AV1_REF_GOLDEN_FRAME - V4L2_AV1_REF_LAST_FRAME)
50 #define BWD_BUF_IDX (V4L2_AV1_REF_BWDREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
51 #define ALT2_BUF_IDX (V4L2_AV1_REF_ALTREF2_FRAME - V4L2_AV1_REF_LAST_FRAME)
52 #define ALT_BUF_IDX (V4L2_AV1_REF_ALTREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
53
54 #define DIV_LUT_PREC_BITS 14
55 #define DIV_LUT_BITS 8
56 #define DIV_LUT_NUM BIT(DIV_LUT_BITS)
57 #define WARP_PARAM_REDUCE_BITS 6
58 #define WARPEDMODEL_PREC_BITS 16
59
60 #define AV1_DIV_ROUND_UP_POW2(value, n) \
61 ({ \
62 typeof(n) _n = n; \
63 typeof(value) _value = value; \
64 (_value + (BIT(_n) >> 1)) >> _n; \
65 })
66
67 #define AV1_DIV_ROUND_UP_POW2_SIGNED(value, n) \
68 ({ \
69 typeof(n) _n_ = n; \
70 typeof(value) _value_ = value; \
71 (((_value_) < 0) ? -AV1_DIV_ROUND_UP_POW2(-(_value_), (_n_)) \
72 : AV1_DIV_ROUND_UP_POW2((_value_), (_n_))); \
73 })
74
75 struct rockchip_av1_film_grain {
76 u8 scaling_lut_y[256];
77 u8 scaling_lut_cb[256];
78 u8 scaling_lut_cr[256];
79 s16 cropped_luma_grain_block[4096];
80 s16 cropped_chroma_grain_block[1024 * 2];
81 };
82
83 static const short div_lut[DIV_LUT_NUM + 1] = {
84 16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
85 15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
86 15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
87 14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
88 13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
89 13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
90 13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
91 12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
92 12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
93 11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
94 11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
95 11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
96 10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
97 10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
98 10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
99 9963, 9939, 9916, 9892, 9869, 9846, 9823, 9800, 9777, 9754, 9732,
100 9709, 9687, 9664, 9642, 9620, 9598, 9576, 9554, 9533, 9511, 9489,
101 9468, 9447, 9425, 9404, 9383, 9362, 9341, 9321, 9300, 9279, 9259,
102 9239, 9218, 9198, 9178, 9158, 9138, 9118, 9098, 9079, 9059, 9039,
103 9020, 9001, 8981, 8962, 8943, 8924, 8905, 8886, 8867, 8849, 8830,
104 8812, 8793, 8775, 8756, 8738, 8720, 8702, 8684, 8666, 8648, 8630,
105 8613, 8595, 8577, 8560, 8542, 8525, 8508, 8490, 8473, 8456, 8439,
106 8422, 8405, 8389, 8372, 8355, 8339, 8322, 8306, 8289, 8273, 8257,
107 8240, 8224, 8208, 8192,
108 };
109
rockchip_vpu981_get_frame_index(struct hantro_ctx * ctx,int ref)110 static int rockchip_vpu981_get_frame_index(struct hantro_ctx *ctx, int ref)
111 {
112 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
113 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
114 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
115 u64 timestamp;
116 int i, idx = frame->ref_frame_idx[ref];
117
118 if (idx >= V4L2_AV1_TOTAL_REFS_PER_FRAME || idx < 0)
119 return AV1_INVALID_IDX;
120
121 timestamp = frame->reference_frame_ts[idx];
122 for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
123 if (!av1_dec->frame_refs[i].used)
124 continue;
125 if (av1_dec->frame_refs[i].timestamp == timestamp)
126 return i;
127 }
128
129 return AV1_INVALID_IDX;
130 }
131
rockchip_vpu981_get_order_hint(struct hantro_ctx * ctx,int ref)132 static int rockchip_vpu981_get_order_hint(struct hantro_ctx *ctx, int ref)
133 {
134 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
135 int idx = rockchip_vpu981_get_frame_index(ctx, ref);
136
137 if (idx != AV1_INVALID_IDX)
138 return av1_dec->frame_refs[idx].order_hint;
139
140 return 0;
141 }
142
rockchip_vpu981_av1_dec_frame_ref(struct hantro_ctx * ctx,u64 timestamp)143 static int rockchip_vpu981_av1_dec_frame_ref(struct hantro_ctx *ctx,
144 u64 timestamp)
145 {
146 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
147 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
148 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
149 int i;
150
151 for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
152 int j;
153
154 if (av1_dec->frame_refs[i].used)
155 continue;
156
157 av1_dec->frame_refs[i].width = frame->frame_width_minus_1 + 1;
158 av1_dec->frame_refs[i].height = frame->frame_height_minus_1 + 1;
159 av1_dec->frame_refs[i].mi_cols = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
160 av1_dec->frame_refs[i].mi_rows = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
161 av1_dec->frame_refs[i].timestamp = timestamp;
162 av1_dec->frame_refs[i].frame_type = frame->frame_type;
163 av1_dec->frame_refs[i].order_hint = frame->order_hint;
164 if (!av1_dec->frame_refs[i].vb2_ref)
165 av1_dec->frame_refs[i].vb2_ref = hantro_get_dst_buf(ctx);
166
167 for (j = 0; j < V4L2_AV1_TOTAL_REFS_PER_FRAME; j++)
168 av1_dec->frame_refs[i].order_hints[j] = frame->order_hints[j];
169 av1_dec->frame_refs[i].used = true;
170 av1_dec->current_frame_index = i;
171
172 return i;
173 }
174
175 return AV1_INVALID_IDX;
176 }
177
rockchip_vpu981_av1_dec_frame_unref(struct hantro_ctx * ctx,int idx)178 static void rockchip_vpu981_av1_dec_frame_unref(struct hantro_ctx *ctx, int idx)
179 {
180 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
181
182 if (idx >= 0)
183 av1_dec->frame_refs[idx].used = false;
184 }
185
rockchip_vpu981_av1_dec_clean_refs(struct hantro_ctx * ctx)186 static void rockchip_vpu981_av1_dec_clean_refs(struct hantro_ctx *ctx)
187 {
188 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
189 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
190
191 int ref, idx;
192
193 for (idx = 0; idx < AV1_MAX_FRAME_BUF_COUNT; idx++) {
194 u64 timestamp = av1_dec->frame_refs[idx].timestamp;
195 bool used = false;
196
197 if (!av1_dec->frame_refs[idx].used)
198 continue;
199
200 for (ref = 0; ref < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref++) {
201 if (ctrls->frame->reference_frame_ts[ref] == timestamp)
202 used = true;
203 }
204
205 if (!used)
206 rockchip_vpu981_av1_dec_frame_unref(ctx, idx);
207 }
208 }
209
rockchip_vpu981_av1_dec_luma_size(struct hantro_ctx * ctx)210 static size_t rockchip_vpu981_av1_dec_luma_size(struct hantro_ctx *ctx)
211 {
212 return ctx->dst_fmt.width * ctx->dst_fmt.height * ctx->bit_depth / 8;
213 }
214
rockchip_vpu981_av1_dec_chroma_size(struct hantro_ctx * ctx)215 static size_t rockchip_vpu981_av1_dec_chroma_size(struct hantro_ctx *ctx)
216 {
217 size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
218
219 return ALIGN((cr_offset * 3) / 2, 64);
220 }
221
rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx * ctx)222 static void rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx *ctx)
223 {
224 struct hantro_dev *vpu = ctx->dev;
225 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
226
227 if (av1_dec->db_data_col.cpu)
228 dma_free_coherent(vpu->dev, av1_dec->db_data_col.size,
229 av1_dec->db_data_col.cpu,
230 av1_dec->db_data_col.dma);
231 av1_dec->db_data_col.cpu = NULL;
232
233 if (av1_dec->db_ctrl_col.cpu)
234 dma_free_coherent(vpu->dev, av1_dec->db_ctrl_col.size,
235 av1_dec->db_ctrl_col.cpu,
236 av1_dec->db_ctrl_col.dma);
237 av1_dec->db_ctrl_col.cpu = NULL;
238
239 if (av1_dec->cdef_col.cpu)
240 dma_free_coherent(vpu->dev, av1_dec->cdef_col.size,
241 av1_dec->cdef_col.cpu, av1_dec->cdef_col.dma);
242 av1_dec->cdef_col.cpu = NULL;
243
244 if (av1_dec->sr_col.cpu)
245 dma_free_coherent(vpu->dev, av1_dec->sr_col.size,
246 av1_dec->sr_col.cpu, av1_dec->sr_col.dma);
247 av1_dec->sr_col.cpu = NULL;
248
249 if (av1_dec->lr_col.cpu)
250 dma_free_coherent(vpu->dev, av1_dec->lr_col.size,
251 av1_dec->lr_col.cpu, av1_dec->lr_col.dma);
252 av1_dec->lr_col.cpu = NULL;
253 }
254
rockchip_vpu981_av1_dec_tiles_reallocate(struct hantro_ctx * ctx)255 static int rockchip_vpu981_av1_dec_tiles_reallocate(struct hantro_ctx *ctx)
256 {
257 struct hantro_dev *vpu = ctx->dev;
258 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
259 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
260 const struct v4l2_av1_tile_info *tile_info = &ctrls->frame->tile_info;
261 unsigned int num_tile_cols = tile_info->tile_cols;
262 unsigned int height = ALIGN(ctrls->frame->frame_height_minus_1 + 1, 64);
263 unsigned int height_in_sb = height / 64;
264 unsigned int stripe_num = ((height + 8) + 63) / 64;
265 size_t size;
266
267 if (av1_dec->db_data_col.size >=
268 ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols)
269 return 0;
270
271 rockchip_vpu981_av1_dec_tiles_free(ctx);
272
273 size = ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols;
274 av1_dec->db_data_col.cpu = dma_alloc_coherent(vpu->dev, size,
275 &av1_dec->db_data_col.dma,
276 GFP_KERNEL);
277 if (!av1_dec->db_data_col.cpu)
278 goto buffer_allocation_error;
279 av1_dec->db_data_col.size = size;
280
281 size = ALIGN(height * 2 * 16 / 4, 128) * num_tile_cols;
282 av1_dec->db_ctrl_col.cpu = dma_alloc_coherent(vpu->dev, size,
283 &av1_dec->db_ctrl_col.dma,
284 GFP_KERNEL);
285 if (!av1_dec->db_ctrl_col.cpu)
286 goto buffer_allocation_error;
287 av1_dec->db_ctrl_col.size = size;
288
289 size = ALIGN(height_in_sb * 44 * ctx->bit_depth * 16 / 8, 128) * num_tile_cols;
290 av1_dec->cdef_col.cpu = dma_alloc_coherent(vpu->dev, size,
291 &av1_dec->cdef_col.dma,
292 GFP_KERNEL);
293 if (!av1_dec->cdef_col.cpu)
294 goto buffer_allocation_error;
295 av1_dec->cdef_col.size = size;
296
297 size = ALIGN(height_in_sb * (3040 + 1280), 128) * num_tile_cols;
298 av1_dec->sr_col.cpu = dma_alloc_coherent(vpu->dev, size,
299 &av1_dec->sr_col.dma,
300 GFP_KERNEL);
301 if (!av1_dec->sr_col.cpu)
302 goto buffer_allocation_error;
303 av1_dec->sr_col.size = size;
304
305 size = ALIGN(stripe_num * 1536 * ctx->bit_depth / 8, 128) * num_tile_cols;
306 av1_dec->lr_col.cpu = dma_alloc_coherent(vpu->dev, size,
307 &av1_dec->lr_col.dma,
308 GFP_KERNEL);
309 if (!av1_dec->lr_col.cpu)
310 goto buffer_allocation_error;
311 av1_dec->lr_col.size = size;
312
313 av1_dec->num_tile_cols_allocated = num_tile_cols;
314 return 0;
315
316 buffer_allocation_error:
317 rockchip_vpu981_av1_dec_tiles_free(ctx);
318 return -ENOMEM;
319 }
320
rockchip_vpu981_av1_dec_exit(struct hantro_ctx * ctx)321 void rockchip_vpu981_av1_dec_exit(struct hantro_ctx *ctx)
322 {
323 struct hantro_dev *vpu = ctx->dev;
324 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
325
326 if (av1_dec->global_model.cpu)
327 dma_free_coherent(vpu->dev, av1_dec->global_model.size,
328 av1_dec->global_model.cpu,
329 av1_dec->global_model.dma);
330 av1_dec->global_model.cpu = NULL;
331
332 if (av1_dec->tile_info.cpu)
333 dma_free_coherent(vpu->dev, av1_dec->tile_info.size,
334 av1_dec->tile_info.cpu,
335 av1_dec->tile_info.dma);
336 av1_dec->tile_info.cpu = NULL;
337
338 if (av1_dec->film_grain.cpu)
339 dma_free_coherent(vpu->dev, av1_dec->film_grain.size,
340 av1_dec->film_grain.cpu,
341 av1_dec->film_grain.dma);
342 av1_dec->film_grain.cpu = NULL;
343
344 if (av1_dec->prob_tbl.cpu)
345 dma_free_coherent(vpu->dev, av1_dec->prob_tbl.size,
346 av1_dec->prob_tbl.cpu, av1_dec->prob_tbl.dma);
347 av1_dec->prob_tbl.cpu = NULL;
348
349 if (av1_dec->prob_tbl_out.cpu)
350 dma_free_coherent(vpu->dev, av1_dec->prob_tbl_out.size,
351 av1_dec->prob_tbl_out.cpu,
352 av1_dec->prob_tbl_out.dma);
353 av1_dec->prob_tbl_out.cpu = NULL;
354
355 if (av1_dec->tile_buf.cpu)
356 dma_free_coherent(vpu->dev, av1_dec->tile_buf.size,
357 av1_dec->tile_buf.cpu, av1_dec->tile_buf.dma);
358 av1_dec->tile_buf.cpu = NULL;
359
360 rockchip_vpu981_av1_dec_tiles_free(ctx);
361 }
362
rockchip_vpu981_av1_dec_init(struct hantro_ctx * ctx)363 int rockchip_vpu981_av1_dec_init(struct hantro_ctx *ctx)
364 {
365 struct hantro_dev *vpu = ctx->dev;
366 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
367
368 memset(av1_dec, 0, sizeof(*av1_dec));
369
370 av1_dec->global_model.cpu = dma_alloc_coherent(vpu->dev, GLOBAL_MODEL_SIZE,
371 &av1_dec->global_model.dma,
372 GFP_KERNEL);
373 if (!av1_dec->global_model.cpu)
374 return -ENOMEM;
375 av1_dec->global_model.size = GLOBAL_MODEL_SIZE;
376
377 av1_dec->tile_info.cpu = dma_alloc_coherent(vpu->dev, AV1_MAX_TILES,
378 &av1_dec->tile_info.dma,
379 GFP_KERNEL);
380 if (!av1_dec->tile_info.cpu)
381 return -ENOMEM;
382 av1_dec->tile_info.size = AV1_MAX_TILES;
383
384 av1_dec->film_grain.cpu = dma_alloc_coherent(vpu->dev,
385 ALIGN(sizeof(struct rockchip_av1_film_grain), 2048),
386 &av1_dec->film_grain.dma,
387 GFP_KERNEL);
388 if (!av1_dec->film_grain.cpu)
389 return -ENOMEM;
390 av1_dec->film_grain.size = ALIGN(sizeof(struct rockchip_av1_film_grain), 2048);
391
392 av1_dec->prob_tbl.cpu = dma_alloc_coherent(vpu->dev,
393 ALIGN(sizeof(struct av1cdfs), 2048),
394 &av1_dec->prob_tbl.dma,
395 GFP_KERNEL);
396 if (!av1_dec->prob_tbl.cpu)
397 return -ENOMEM;
398 av1_dec->prob_tbl.size = ALIGN(sizeof(struct av1cdfs), 2048);
399
400 av1_dec->prob_tbl_out.cpu = dma_alloc_coherent(vpu->dev,
401 ALIGN(sizeof(struct av1cdfs), 2048),
402 &av1_dec->prob_tbl_out.dma,
403 GFP_KERNEL);
404 if (!av1_dec->prob_tbl_out.cpu)
405 return -ENOMEM;
406 av1_dec->prob_tbl_out.size = ALIGN(sizeof(struct av1cdfs), 2048);
407 av1_dec->cdfs = &av1_dec->default_cdfs;
408 av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
409
410 rockchip_av1_set_default_cdfs(av1_dec->cdfs, av1_dec->cdfs_ndvc);
411
412 av1_dec->tile_buf.cpu = dma_alloc_coherent(vpu->dev,
413 AV1_TILE_SIZE,
414 &av1_dec->tile_buf.dma,
415 GFP_KERNEL);
416 if (!av1_dec->tile_buf.cpu)
417 return -ENOMEM;
418 av1_dec->tile_buf.size = AV1_TILE_SIZE;
419
420 return 0;
421 }
422
rockchip_vpu981_av1_dec_prepare_run(struct hantro_ctx * ctx)423 static int rockchip_vpu981_av1_dec_prepare_run(struct hantro_ctx *ctx)
424 {
425 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
426 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
427
428 ctrls->sequence = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_SEQUENCE);
429 if (WARN_ON(!ctrls->sequence))
430 return -EINVAL;
431
432 ctrls->tile_group_entry =
433 hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY);
434 if (WARN_ON(!ctrls->tile_group_entry))
435 return -EINVAL;
436
437 ctrls->frame = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FRAME);
438 if (WARN_ON(!ctrls->frame))
439 return -EINVAL;
440
441 ctrls->film_grain =
442 hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FILM_GRAIN);
443
444 return rockchip_vpu981_av1_dec_tiles_reallocate(ctx);
445 }
446
rockchip_vpu981_av1_dec_get_msb(u32 n)447 static inline int rockchip_vpu981_av1_dec_get_msb(u32 n)
448 {
449 if (n == 0)
450 return 0;
451 return 31 ^ __builtin_clz(n);
452 }
453
rockchip_vpu981_av1_dec_resolve_divisor_32(u32 d,short * shift)454 static short rockchip_vpu981_av1_dec_resolve_divisor_32(u32 d, short *shift)
455 {
456 int f;
457 u64 e;
458
459 *shift = rockchip_vpu981_av1_dec_get_msb(d);
460 /* e is obtained from D after resetting the most significant 1 bit. */
461 e = d - ((u32)1 << *shift);
462 /* Get the most significant DIV_LUT_BITS (8) bits of e into f */
463 if (*shift > DIV_LUT_BITS)
464 f = AV1_DIV_ROUND_UP_POW2(e, *shift - DIV_LUT_BITS);
465 else
466 f = e << (DIV_LUT_BITS - *shift);
467 if (f > DIV_LUT_NUM)
468 return -1;
469 *shift += DIV_LUT_PREC_BITS;
470 /* Use f as lookup into the precomputed table of multipliers */
471 return div_lut[f];
472 }
473
474 static void
rockchip_vpu981_av1_dec_get_shear_params(const u32 * params,s64 * alpha,s64 * beta,s64 * gamma,s64 * delta)475 rockchip_vpu981_av1_dec_get_shear_params(const u32 *params, s64 *alpha,
476 s64 *beta, s64 *gamma, s64 *delta)
477 {
478 const int *mat = params;
479 short shift;
480 short y;
481 long long gv, dv;
482
483 if (mat[2] <= 0)
484 return;
485
486 *alpha = clamp_val(mat[2] - (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX);
487 *beta = clamp_val(mat[3], S16_MIN, S16_MAX);
488
489 y = rockchip_vpu981_av1_dec_resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
490
491 gv = ((long long)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
492
493 *gamma = clamp_val((int)AV1_DIV_ROUND_UP_POW2_SIGNED(gv, shift), S16_MIN, S16_MAX);
494
495 dv = ((long long)mat[3] * mat[4]) * y;
496 *delta = clamp_val(mat[5] -
497 (int)AV1_DIV_ROUND_UP_POW2_SIGNED(dv, shift) - (1 << WARPEDMODEL_PREC_BITS),
498 S16_MIN, S16_MAX);
499
500 *alpha = AV1_DIV_ROUND_UP_POW2_SIGNED(*alpha, WARP_PARAM_REDUCE_BITS)
501 * (1 << WARP_PARAM_REDUCE_BITS);
502 *beta = AV1_DIV_ROUND_UP_POW2_SIGNED(*beta, WARP_PARAM_REDUCE_BITS)
503 * (1 << WARP_PARAM_REDUCE_BITS);
504 *gamma = AV1_DIV_ROUND_UP_POW2_SIGNED(*gamma, WARP_PARAM_REDUCE_BITS)
505 * (1 << WARP_PARAM_REDUCE_BITS);
506 *delta = AV1_DIV_ROUND_UP_POW2_SIGNED(*delta, WARP_PARAM_REDUCE_BITS)
507 * (1 << WARP_PARAM_REDUCE_BITS);
508 }
509
rockchip_vpu981_av1_dec_set_global_model(struct hantro_ctx * ctx)510 static void rockchip_vpu981_av1_dec_set_global_model(struct hantro_ctx *ctx)
511 {
512 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
513 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
514 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
515 const struct v4l2_av1_global_motion *gm = &frame->global_motion;
516 u8 *dst = av1_dec->global_model.cpu;
517 struct hantro_dev *vpu = ctx->dev;
518 int ref_frame, i;
519
520 memset(dst, 0, GLOBAL_MODEL_SIZE);
521 for (ref_frame = 0; ref_frame < V4L2_AV1_REFS_PER_FRAME; ++ref_frame) {
522 s64 alpha = 0, beta = 0, gamma = 0, delta = 0;
523
524 for (i = 0; i < 6; ++i) {
525 if (i == 2)
526 *(s32 *)dst =
527 gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][3];
528 else if (i == 3)
529 *(s32 *)dst =
530 gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][2];
531 else
532 *(s32 *)dst =
533 gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][i];
534 dst += 4;
535 }
536
537 if (gm->type[V4L2_AV1_REF_LAST_FRAME + ref_frame] <= V4L2_AV1_WARP_MODEL_AFFINE)
538 rockchip_vpu981_av1_dec_get_shear_params(&gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][0],
539 &alpha, &beta, &gamma, &delta);
540
541 *(s16 *)dst = alpha;
542 dst += 2;
543 *(s16 *)dst = beta;
544 dst += 2;
545 *(s16 *)dst = gamma;
546 dst += 2;
547 *(s16 *)dst = delta;
548 dst += 2;
549 }
550
551 hantro_write_addr(vpu, AV1_GLOBAL_MODEL, av1_dec->global_model.dma);
552 }
553
rockchip_vpu981_av1_tile_log2(int target)554 static int rockchip_vpu981_av1_tile_log2(int target)
555 {
556 int k;
557
558 /*
559 * returns the smallest value for k such that 1 << k is greater
560 * than or equal to target
561 */
562 for (k = 0; (1 << k) < target; k++);
563
564 return k;
565 }
566
rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx * ctx)567 static void rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx *ctx)
568 {
569 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
570 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
571 const struct v4l2_av1_tile_info *tile_info = &ctrls->frame->tile_info;
572 const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
573 ctrls->tile_group_entry;
574 int context_update_y =
575 tile_info->context_update_tile_id / tile_info->tile_cols;
576 int context_update_x =
577 tile_info->context_update_tile_id % tile_info->tile_cols;
578 int context_update_tile_id =
579 context_update_x * tile_info->tile_rows + context_update_y;
580 u8 *dst = av1_dec->tile_info.cpu;
581 struct hantro_dev *vpu = ctx->dev;
582 int tile0, tile1;
583
584 memset(dst, 0, av1_dec->tile_info.size);
585
586 for (tile0 = 0; tile0 < tile_info->tile_cols; tile0++) {
587 for (tile1 = 0; tile1 < tile_info->tile_rows; tile1++) {
588 int tile_id = tile1 * tile_info->tile_cols + tile0;
589 u32 start, end;
590 u32 y0 =
591 tile_info->height_in_sbs_minus_1[tile1] + 1;
592 u32 x0 = tile_info->width_in_sbs_minus_1[tile0] + 1;
593
594 /* tile size in SB units (width,height) */
595 *dst++ = x0;
596 *dst++ = 0;
597 *dst++ = 0;
598 *dst++ = 0;
599 *dst++ = y0;
600 *dst++ = 0;
601 *dst++ = 0;
602 *dst++ = 0;
603
604 /* tile start position */
605 start = group_entry[tile_id].tile_offset - group_entry[0].tile_offset;
606 *dst++ = start & 255;
607 *dst++ = (start >> 8) & 255;
608 *dst++ = (start >> 16) & 255;
609 *dst++ = (start >> 24) & 255;
610
611 /* number of bytes in tile data */
612 end = start + group_entry[tile_id].tile_size;
613 *dst++ = end & 255;
614 *dst++ = (end >> 8) & 255;
615 *dst++ = (end >> 16) & 255;
616 *dst++ = (end >> 24) & 255;
617 }
618 }
619
620 hantro_reg_write(vpu, &av1_multicore_expect_context_update, !!(context_update_x == 0));
621 hantro_reg_write(vpu, &av1_tile_enable,
622 !!((tile_info->tile_cols > 1) || (tile_info->tile_rows > 1)));
623 hantro_reg_write(vpu, &av1_num_tile_cols_8k, tile_info->tile_cols);
624 hantro_reg_write(vpu, &av1_num_tile_rows_8k, tile_info->tile_rows);
625 hantro_reg_write(vpu, &av1_context_update_tile_id, context_update_tile_id);
626 hantro_reg_write(vpu, &av1_tile_transpose, 1);
627 if (rockchip_vpu981_av1_tile_log2(tile_info->tile_cols) ||
628 rockchip_vpu981_av1_tile_log2(tile_info->tile_rows))
629 hantro_reg_write(vpu, &av1_dec_tile_size_mag, tile_info->tile_size_bytes - 1);
630 else
631 hantro_reg_write(vpu, &av1_dec_tile_size_mag, 3);
632
633 hantro_write_addr(vpu, AV1_TILE_BASE, av1_dec->tile_info.dma);
634 }
635
rockchip_vpu981_av1_dec_get_dist(struct hantro_ctx * ctx,int a,int b)636 static int rockchip_vpu981_av1_dec_get_dist(struct hantro_ctx *ctx,
637 int a, int b)
638 {
639 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
640 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
641 int bits = ctrls->sequence->order_hint_bits - 1;
642 int diff, m;
643
644 if (!ctrls->sequence->order_hint_bits)
645 return 0;
646
647 diff = a - b;
648 m = 1 << bits;
649 diff = (diff & (m - 1)) - (diff & m);
650
651 return diff;
652 }
653
rockchip_vpu981_av1_dec_set_frame_sign_bias(struct hantro_ctx * ctx)654 static void rockchip_vpu981_av1_dec_set_frame_sign_bias(struct hantro_ctx *ctx)
655 {
656 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
657 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
658 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
659 const struct v4l2_ctrl_av1_sequence *sequence = ctrls->sequence;
660 int i;
661
662 if (!sequence->order_hint_bits || IS_INTRA(frame->frame_type)) {
663 for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++)
664 av1_dec->ref_frame_sign_bias[i] = 0;
665
666 return;
667 }
668 // Identify the nearest forward and backward references.
669 for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; i++) {
670 if (rockchip_vpu981_get_frame_index(ctx, i) >= 0) {
671 int rel_off =
672 rockchip_vpu981_av1_dec_get_dist(ctx,
673 rockchip_vpu981_get_order_hint(ctx, i),
674 frame->order_hint);
675 av1_dec->ref_frame_sign_bias[i + 1] = (rel_off <= 0) ? 0 : 1;
676 }
677 }
678 }
679
680 static bool
rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx * ctx,int ref,int idx,int width,int height)681 rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx *ctx, int ref, int idx,
682 int width, int height)
683 {
684 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
685 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
686 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
687 struct hantro_dev *vpu = ctx->dev;
688 struct hantro_decoded_buffer *dst;
689 dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
690 size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
691 size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
692 int cur_width = frame->frame_width_minus_1 + 1;
693 int cur_height = frame->frame_height_minus_1 + 1;
694 int scale_width =
695 ((width << AV1_REF_SCALE_SHIFT) + cur_width / 2) / cur_width;
696 int scale_height =
697 ((height << AV1_REF_SCALE_SHIFT) + cur_height / 2) / cur_height;
698
699 switch (ref) {
700 case 0:
701 hantro_reg_write(vpu, &av1_ref0_height, height);
702 hantro_reg_write(vpu, &av1_ref0_width, width);
703 hantro_reg_write(vpu, &av1_ref0_ver_scale, scale_width);
704 hantro_reg_write(vpu, &av1_ref0_hor_scale, scale_height);
705 break;
706 case 1:
707 hantro_reg_write(vpu, &av1_ref1_height, height);
708 hantro_reg_write(vpu, &av1_ref1_width, width);
709 hantro_reg_write(vpu, &av1_ref1_ver_scale, scale_width);
710 hantro_reg_write(vpu, &av1_ref1_hor_scale, scale_height);
711 break;
712 case 2:
713 hantro_reg_write(vpu, &av1_ref2_height, height);
714 hantro_reg_write(vpu, &av1_ref2_width, width);
715 hantro_reg_write(vpu, &av1_ref2_ver_scale, scale_width);
716 hantro_reg_write(vpu, &av1_ref2_hor_scale, scale_height);
717 break;
718 case 3:
719 hantro_reg_write(vpu, &av1_ref3_height, height);
720 hantro_reg_write(vpu, &av1_ref3_width, width);
721 hantro_reg_write(vpu, &av1_ref3_ver_scale, scale_width);
722 hantro_reg_write(vpu, &av1_ref3_hor_scale, scale_height);
723 break;
724 case 4:
725 hantro_reg_write(vpu, &av1_ref4_height, height);
726 hantro_reg_write(vpu, &av1_ref4_width, width);
727 hantro_reg_write(vpu, &av1_ref4_ver_scale, scale_width);
728 hantro_reg_write(vpu, &av1_ref4_hor_scale, scale_height);
729 break;
730 case 5:
731 hantro_reg_write(vpu, &av1_ref5_height, height);
732 hantro_reg_write(vpu, &av1_ref5_width, width);
733 hantro_reg_write(vpu, &av1_ref5_ver_scale, scale_width);
734 hantro_reg_write(vpu, &av1_ref5_hor_scale, scale_height);
735 break;
736 case 6:
737 hantro_reg_write(vpu, &av1_ref6_height, height);
738 hantro_reg_write(vpu, &av1_ref6_width, width);
739 hantro_reg_write(vpu, &av1_ref6_ver_scale, scale_width);
740 hantro_reg_write(vpu, &av1_ref6_hor_scale, scale_height);
741 break;
742 default:
743 pr_warn("AV1 invalid reference frame index\n");
744 }
745
746 dst = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
747 luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
748 chroma_addr = luma_addr + cr_offset;
749 mv_addr = luma_addr + mv_offset;
750
751 hantro_write_addr(vpu, AV1_REFERENCE_Y(ref), luma_addr);
752 hantro_write_addr(vpu, AV1_REFERENCE_CB(ref), chroma_addr);
753 hantro_write_addr(vpu, AV1_REFERENCE_MV(ref), mv_addr);
754
755 return (scale_width != (1 << AV1_REF_SCALE_SHIFT)) ||
756 (scale_height != (1 << AV1_REF_SCALE_SHIFT));
757 }
758
rockchip_vpu981_av1_dec_set_sign_bias(struct hantro_ctx * ctx,int ref,int val)759 static void rockchip_vpu981_av1_dec_set_sign_bias(struct hantro_ctx *ctx,
760 int ref, int val)
761 {
762 struct hantro_dev *vpu = ctx->dev;
763
764 switch (ref) {
765 case 0:
766 hantro_reg_write(vpu, &av1_ref0_sign_bias, val);
767 break;
768 case 1:
769 hantro_reg_write(vpu, &av1_ref1_sign_bias, val);
770 break;
771 case 2:
772 hantro_reg_write(vpu, &av1_ref2_sign_bias, val);
773 break;
774 case 3:
775 hantro_reg_write(vpu, &av1_ref3_sign_bias, val);
776 break;
777 case 4:
778 hantro_reg_write(vpu, &av1_ref4_sign_bias, val);
779 break;
780 case 5:
781 hantro_reg_write(vpu, &av1_ref5_sign_bias, val);
782 break;
783 case 6:
784 hantro_reg_write(vpu, &av1_ref6_sign_bias, val);
785 break;
786 default:
787 pr_warn("AV1 invalid sign bias index\n");
788 break;
789 }
790 }
791
rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx * ctx)792 static void rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx *ctx)
793 {
794 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
795 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
796 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
797 const struct v4l2_av1_segmentation *seg = &frame->segmentation;
798 u32 segval[V4L2_AV1_MAX_SEGMENTS][V4L2_AV1_SEG_LVL_MAX] = { 0 };
799 struct hantro_dev *vpu = ctx->dev;
800 u8 segsign = 0, preskip_segid = 0, last_active_seg = 0, i, j;
801
802 if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED) &&
803 frame->primary_ref_frame < V4L2_AV1_REFS_PER_FRAME) {
804 int idx = rockchip_vpu981_get_frame_index(ctx, frame->primary_ref_frame);
805
806 if (idx >= 0) {
807 dma_addr_t luma_addr, mv_addr = 0;
808 struct hantro_decoded_buffer *seg;
809 size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
810
811 seg = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
812 luma_addr = hantro_get_dec_buf_addr(ctx, &seg->base.vb.vb2_buf);
813 mv_addr = luma_addr + mv_offset;
814
815 hantro_write_addr(vpu, AV1_SEGMENTATION, mv_addr);
816 hantro_reg_write(vpu, &av1_use_temporal3_mvs, 1);
817 }
818 }
819
820 hantro_reg_write(vpu, &av1_segment_temp_upd_e,
821 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_TEMPORAL_UPDATE));
822 hantro_reg_write(vpu, &av1_segment_upd_e,
823 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_UPDATE_MAP));
824 hantro_reg_write(vpu, &av1_segment_e,
825 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED));
826
827 hantro_reg_write(vpu, &av1_error_resilient,
828 !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE));
829
830 if (IS_INTRA(frame->frame_type) ||
831 !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE)) {
832 hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
833 }
834
835 if (seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED) {
836 int s;
837
838 for (s = 0; s < V4L2_AV1_MAX_SEGMENTS; s++) {
839 if (seg->feature_enabled[s] &
840 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
841 segval[s][V4L2_AV1_SEG_LVL_ALT_Q] =
842 clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q]),
843 0, 255);
844 segsign |=
845 (seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q] < 0) << s;
846 }
847
848 if (seg->feature_enabled[s] &
849 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_V))
850 segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V] =
851 clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]),
852 -63, 63);
853
854 if (seg->feature_enabled[s] &
855 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_H))
856 segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H] =
857 clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]),
858 -63, 63);
859
860 if (seg->feature_enabled[s] &
861 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_U))
862 segval[s][V4L2_AV1_SEG_LVL_ALT_LF_U] =
863 clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_U]),
864 -63, 63);
865
866 if (seg->feature_enabled[s] &
867 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_V))
868 segval[s][V4L2_AV1_SEG_LVL_ALT_LF_V] =
869 clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_V]),
870 -63, 63);
871
872 if (frame->frame_type && seg->feature_enabled[s] &
873 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_FRAME))
874 segval[s][V4L2_AV1_SEG_LVL_REF_FRAME]++;
875
876 if (seg->feature_enabled[s] &
877 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_SKIP))
878 segval[s][V4L2_AV1_SEG_LVL_REF_SKIP] = 1;
879
880 if (seg->feature_enabled[s] &
881 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_GLOBALMV))
882 segval[s][V4L2_AV1_SEG_LVL_REF_GLOBALMV] = 1;
883 }
884 }
885
886 for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
887 for (j = 0; j < V4L2_AV1_SEG_LVL_MAX; j++) {
888 if (seg->feature_enabled[i]
889 & V4L2_AV1_SEGMENT_FEATURE_ENABLED(j)) {
890 preskip_segid |= (j >= V4L2_AV1_SEG_LVL_REF_FRAME);
891 last_active_seg = max(i, last_active_seg);
892 }
893 }
894 }
895
896 hantro_reg_write(vpu, &av1_last_active_seg, last_active_seg);
897 hantro_reg_write(vpu, &av1_preskip_segid, preskip_segid);
898
899 hantro_reg_write(vpu, &av1_seg_quant_sign, segsign);
900
901 /* Write QP, filter level, ref frame and skip for every segment */
902 hantro_reg_write(vpu, &av1_quant_seg0,
903 segval[0][V4L2_AV1_SEG_LVL_ALT_Q]);
904 hantro_reg_write(vpu, &av1_filt_level_delta0_seg0,
905 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
906 hantro_reg_write(vpu, &av1_filt_level_delta1_seg0,
907 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
908 hantro_reg_write(vpu, &av1_filt_level_delta2_seg0,
909 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_U]);
910 hantro_reg_write(vpu, &av1_filt_level_delta3_seg0,
911 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_V]);
912 hantro_reg_write(vpu, &av1_refpic_seg0,
913 segval[0][V4L2_AV1_SEG_LVL_REF_FRAME]);
914 hantro_reg_write(vpu, &av1_skip_seg0,
915 segval[0][V4L2_AV1_SEG_LVL_REF_SKIP]);
916 hantro_reg_write(vpu, &av1_global_mv_seg0,
917 segval[0][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
918
919 hantro_reg_write(vpu, &av1_quant_seg1,
920 segval[1][V4L2_AV1_SEG_LVL_ALT_Q]);
921 hantro_reg_write(vpu, &av1_filt_level_delta0_seg1,
922 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
923 hantro_reg_write(vpu, &av1_filt_level_delta1_seg1,
924 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
925 hantro_reg_write(vpu, &av1_filt_level_delta2_seg1,
926 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_U]);
927 hantro_reg_write(vpu, &av1_filt_level_delta3_seg1,
928 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_V]);
929 hantro_reg_write(vpu, &av1_refpic_seg1,
930 segval[1][V4L2_AV1_SEG_LVL_REF_FRAME]);
931 hantro_reg_write(vpu, &av1_skip_seg1,
932 segval[1][V4L2_AV1_SEG_LVL_REF_SKIP]);
933 hantro_reg_write(vpu, &av1_global_mv_seg1,
934 segval[1][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
935
936 hantro_reg_write(vpu, &av1_quant_seg2,
937 segval[2][V4L2_AV1_SEG_LVL_ALT_Q]);
938 hantro_reg_write(vpu, &av1_filt_level_delta0_seg2,
939 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
940 hantro_reg_write(vpu, &av1_filt_level_delta1_seg2,
941 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
942 hantro_reg_write(vpu, &av1_filt_level_delta2_seg2,
943 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_U]);
944 hantro_reg_write(vpu, &av1_filt_level_delta3_seg2,
945 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_V]);
946 hantro_reg_write(vpu, &av1_refpic_seg2,
947 segval[2][V4L2_AV1_SEG_LVL_REF_FRAME]);
948 hantro_reg_write(vpu, &av1_skip_seg2,
949 segval[2][V4L2_AV1_SEG_LVL_REF_SKIP]);
950 hantro_reg_write(vpu, &av1_global_mv_seg2,
951 segval[2][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
952
953 hantro_reg_write(vpu, &av1_quant_seg3,
954 segval[3][V4L2_AV1_SEG_LVL_ALT_Q]);
955 hantro_reg_write(vpu, &av1_filt_level_delta0_seg3,
956 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
957 hantro_reg_write(vpu, &av1_filt_level_delta1_seg3,
958 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
959 hantro_reg_write(vpu, &av1_filt_level_delta2_seg3,
960 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_U]);
961 hantro_reg_write(vpu, &av1_filt_level_delta3_seg3,
962 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_V]);
963 hantro_reg_write(vpu, &av1_refpic_seg3,
964 segval[3][V4L2_AV1_SEG_LVL_REF_FRAME]);
965 hantro_reg_write(vpu, &av1_skip_seg3,
966 segval[3][V4L2_AV1_SEG_LVL_REF_SKIP]);
967 hantro_reg_write(vpu, &av1_global_mv_seg3,
968 segval[3][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
969
970 hantro_reg_write(vpu, &av1_quant_seg4,
971 segval[4][V4L2_AV1_SEG_LVL_ALT_Q]);
972 hantro_reg_write(vpu, &av1_filt_level_delta0_seg4,
973 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
974 hantro_reg_write(vpu, &av1_filt_level_delta1_seg4,
975 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
976 hantro_reg_write(vpu, &av1_filt_level_delta2_seg4,
977 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_U]);
978 hantro_reg_write(vpu, &av1_filt_level_delta3_seg4,
979 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_V]);
980 hantro_reg_write(vpu, &av1_refpic_seg4,
981 segval[4][V4L2_AV1_SEG_LVL_REF_FRAME]);
982 hantro_reg_write(vpu, &av1_skip_seg4,
983 segval[4][V4L2_AV1_SEG_LVL_REF_SKIP]);
984 hantro_reg_write(vpu, &av1_global_mv_seg4,
985 segval[4][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
986
987 hantro_reg_write(vpu, &av1_quant_seg5,
988 segval[5][V4L2_AV1_SEG_LVL_ALT_Q]);
989 hantro_reg_write(vpu, &av1_filt_level_delta0_seg5,
990 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
991 hantro_reg_write(vpu, &av1_filt_level_delta1_seg5,
992 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
993 hantro_reg_write(vpu, &av1_filt_level_delta2_seg5,
994 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_U]);
995 hantro_reg_write(vpu, &av1_filt_level_delta3_seg5,
996 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_V]);
997 hantro_reg_write(vpu, &av1_refpic_seg5,
998 segval[5][V4L2_AV1_SEG_LVL_REF_FRAME]);
999 hantro_reg_write(vpu, &av1_skip_seg5,
1000 segval[5][V4L2_AV1_SEG_LVL_REF_SKIP]);
1001 hantro_reg_write(vpu, &av1_global_mv_seg5,
1002 segval[5][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1003
1004 hantro_reg_write(vpu, &av1_quant_seg6,
1005 segval[6][V4L2_AV1_SEG_LVL_ALT_Q]);
1006 hantro_reg_write(vpu, &av1_filt_level_delta0_seg6,
1007 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
1008 hantro_reg_write(vpu, &av1_filt_level_delta1_seg6,
1009 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
1010 hantro_reg_write(vpu, &av1_filt_level_delta2_seg6,
1011 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_U]);
1012 hantro_reg_write(vpu, &av1_filt_level_delta3_seg6,
1013 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_V]);
1014 hantro_reg_write(vpu, &av1_refpic_seg6,
1015 segval[6][V4L2_AV1_SEG_LVL_REF_FRAME]);
1016 hantro_reg_write(vpu, &av1_skip_seg6,
1017 segval[6][V4L2_AV1_SEG_LVL_REF_SKIP]);
1018 hantro_reg_write(vpu, &av1_global_mv_seg6,
1019 segval[6][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1020
1021 hantro_reg_write(vpu, &av1_quant_seg7,
1022 segval[7][V4L2_AV1_SEG_LVL_ALT_Q]);
1023 hantro_reg_write(vpu, &av1_filt_level_delta0_seg7,
1024 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
1025 hantro_reg_write(vpu, &av1_filt_level_delta1_seg7,
1026 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
1027 hantro_reg_write(vpu, &av1_filt_level_delta2_seg7,
1028 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_U]);
1029 hantro_reg_write(vpu, &av1_filt_level_delta3_seg7,
1030 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_V]);
1031 hantro_reg_write(vpu, &av1_refpic_seg7,
1032 segval[7][V4L2_AV1_SEG_LVL_REF_FRAME]);
1033 hantro_reg_write(vpu, &av1_skip_seg7,
1034 segval[7][V4L2_AV1_SEG_LVL_REF_SKIP]);
1035 hantro_reg_write(vpu, &av1_global_mv_seg7,
1036 segval[7][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1037 }
1038
rockchip_vpu981_av1_dec_is_lossless(struct hantro_ctx * ctx)1039 static bool rockchip_vpu981_av1_dec_is_lossless(struct hantro_ctx *ctx)
1040 {
1041 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1042 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1043 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1044 const struct v4l2_av1_segmentation *segmentation = &frame->segmentation;
1045 const struct v4l2_av1_quantization *quantization = &frame->quantization;
1046 int i;
1047
1048 for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
1049 int qindex = quantization->base_q_idx;
1050
1051 if (segmentation->feature_enabled[i] &
1052 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
1053 qindex += segmentation->feature_data[i][V4L2_AV1_SEG_LVL_ALT_Q];
1054 }
1055 qindex = clamp(qindex, 0, 255);
1056
1057 if (qindex ||
1058 quantization->delta_q_y_dc ||
1059 quantization->delta_q_u_dc ||
1060 quantization->delta_q_u_ac ||
1061 quantization->delta_q_v_dc ||
1062 quantization->delta_q_v_ac)
1063 return false;
1064 }
1065 return true;
1066 }
1067
rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx * ctx)1068 static void rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx *ctx)
1069 {
1070 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1071 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1072 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1073 const struct v4l2_av1_loop_filter *loop_filter = &frame->loop_filter;
1074 bool filtering_dis = (loop_filter->level[0] == 0) && (loop_filter->level[1] == 0);
1075 struct hantro_dev *vpu = ctx->dev;
1076
1077 hantro_reg_write(vpu, &av1_filtering_dis, filtering_dis);
1078 hantro_reg_write(vpu, &av1_filt_level_base_gt32, loop_filter->level[0] > 32);
1079 hantro_reg_write(vpu, &av1_filt_sharpness, loop_filter->sharpness);
1080
1081 hantro_reg_write(vpu, &av1_filt_level0, loop_filter->level[0]);
1082 hantro_reg_write(vpu, &av1_filt_level1, loop_filter->level[1]);
1083 hantro_reg_write(vpu, &av1_filt_level2, loop_filter->level[2]);
1084 hantro_reg_write(vpu, &av1_filt_level3, loop_filter->level[3]);
1085
1086 if (loop_filter->flags & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED &&
1087 !rockchip_vpu981_av1_dec_is_lossless(ctx) &&
1088 !(frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC)) {
1089 hantro_reg_write(vpu, &av1_filt_ref_adj_0,
1090 loop_filter->ref_deltas[0]);
1091 hantro_reg_write(vpu, &av1_filt_ref_adj_1,
1092 loop_filter->ref_deltas[1]);
1093 hantro_reg_write(vpu, &av1_filt_ref_adj_2,
1094 loop_filter->ref_deltas[2]);
1095 hantro_reg_write(vpu, &av1_filt_ref_adj_3,
1096 loop_filter->ref_deltas[3]);
1097 hantro_reg_write(vpu, &av1_filt_ref_adj_4,
1098 loop_filter->ref_deltas[4]);
1099 hantro_reg_write(vpu, &av1_filt_ref_adj_5,
1100 loop_filter->ref_deltas[5]);
1101 hantro_reg_write(vpu, &av1_filt_ref_adj_6,
1102 loop_filter->ref_deltas[6]);
1103 hantro_reg_write(vpu, &av1_filt_ref_adj_7,
1104 loop_filter->ref_deltas[7]);
1105 hantro_reg_write(vpu, &av1_filt_mb_adj_0,
1106 loop_filter->mode_deltas[0]);
1107 hantro_reg_write(vpu, &av1_filt_mb_adj_1,
1108 loop_filter->mode_deltas[1]);
1109 } else {
1110 hantro_reg_write(vpu, &av1_filt_ref_adj_0, 0);
1111 hantro_reg_write(vpu, &av1_filt_ref_adj_1, 0);
1112 hantro_reg_write(vpu, &av1_filt_ref_adj_2, 0);
1113 hantro_reg_write(vpu, &av1_filt_ref_adj_3, 0);
1114 hantro_reg_write(vpu, &av1_filt_ref_adj_4, 0);
1115 hantro_reg_write(vpu, &av1_filt_ref_adj_5, 0);
1116 hantro_reg_write(vpu, &av1_filt_ref_adj_6, 0);
1117 hantro_reg_write(vpu, &av1_filt_ref_adj_7, 0);
1118 hantro_reg_write(vpu, &av1_filt_mb_adj_0, 0);
1119 hantro_reg_write(vpu, &av1_filt_mb_adj_1, 0);
1120 }
1121
1122 hantro_write_addr(vpu, AV1_DB_DATA_COL, av1_dec->db_data_col.dma);
1123 hantro_write_addr(vpu, AV1_DB_CTRL_COL, av1_dec->db_ctrl_col.dma);
1124 }
1125
rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx * ctx)1126 static void rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx *ctx)
1127 {
1128 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1129 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1130 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1131 bool frame_is_intra = IS_INTRA(frame->frame_type);
1132 struct av1cdfs *out_cdfs = (struct av1cdfs *)av1_dec->prob_tbl_out.cpu;
1133 int i;
1134
1135 if (frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF)
1136 return;
1137
1138 for (i = 0; i < NUM_REF_FRAMES; i++) {
1139 if (frame->refresh_frame_flags & BIT(i)) {
1140 struct mvcdfs stored_mv_cdf;
1141
1142 rockchip_av1_get_cdfs(ctx, i);
1143 stored_mv_cdf = av1_dec->cdfs->mv_cdf;
1144 *av1_dec->cdfs = *out_cdfs;
1145 if (frame_is_intra) {
1146 av1_dec->cdfs->mv_cdf = stored_mv_cdf;
1147 *av1_dec->cdfs_ndvc = out_cdfs->mv_cdf;
1148 }
1149 rockchip_av1_store_cdfs(ctx,
1150 frame->refresh_frame_flags);
1151 break;
1152 }
1153 }
1154 }
1155
rockchip_vpu981_av1_dec_done(struct hantro_ctx * ctx)1156 void rockchip_vpu981_av1_dec_done(struct hantro_ctx *ctx)
1157 {
1158 rockchip_vpu981_av1_dec_update_prob(ctx);
1159 }
1160
rockchip_vpu981_av1_dec_set_prob(struct hantro_ctx * ctx)1161 static void rockchip_vpu981_av1_dec_set_prob(struct hantro_ctx *ctx)
1162 {
1163 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1164 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1165 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1166 const struct v4l2_av1_quantization *quantization = &frame->quantization;
1167 struct hantro_dev *vpu = ctx->dev;
1168 bool error_resilient_mode =
1169 !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE);
1170 bool frame_is_intra = IS_INTRA(frame->frame_type);
1171
1172 if (error_resilient_mode || frame_is_intra ||
1173 frame->primary_ref_frame == AV1_PRIMARY_REF_NONE) {
1174 av1_dec->cdfs = &av1_dec->default_cdfs;
1175 av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
1176 rockchip_av1_default_coeff_probs(quantization->base_q_idx,
1177 av1_dec->cdfs);
1178 } else {
1179 rockchip_av1_get_cdfs(ctx, frame->ref_frame_idx[frame->primary_ref_frame]);
1180 }
1181 rockchip_av1_store_cdfs(ctx, frame->refresh_frame_flags);
1182
1183 memcpy(av1_dec->prob_tbl.cpu, av1_dec->cdfs, sizeof(struct av1cdfs));
1184
1185 if (frame_is_intra) {
1186 int mv_offset = offsetof(struct av1cdfs, mv_cdf);
1187 /* Overwrite MV context area with intrabc MV context */
1188 memcpy(av1_dec->prob_tbl.cpu + mv_offset, av1_dec->cdfs_ndvc,
1189 sizeof(struct mvcdfs));
1190 }
1191
1192 hantro_write_addr(vpu, AV1_PROP_TABLE_OUT, av1_dec->prob_tbl_out.dma);
1193 hantro_write_addr(vpu, AV1_PROP_TABLE, av1_dec->prob_tbl.dma);
1194 }
1195
1196 static void
rockchip_vpu981_av1_dec_init_scaling_function(const u8 * values,const u8 * scaling,u8 num_points,u8 * scaling_lut)1197 rockchip_vpu981_av1_dec_init_scaling_function(const u8 *values, const u8 *scaling,
1198 u8 num_points, u8 *scaling_lut)
1199 {
1200 int i, point;
1201
1202 if (num_points == 0) {
1203 memset(scaling_lut, 0, 256);
1204 return;
1205 }
1206
1207 for (point = 0; point < num_points - 1; point++) {
1208 int x;
1209 s32 delta_y = scaling[point + 1] - scaling[point];
1210 s32 delta_x = values[point + 1] - values[point];
1211 s64 delta =
1212 delta_x ? delta_y * ((65536 + (delta_x >> 1)) /
1213 delta_x) : 0;
1214
1215 for (x = 0; x < delta_x; x++) {
1216 scaling_lut[values[point] + x] =
1217 scaling[point] +
1218 (s32)((x * delta + 32768) >> 16);
1219 }
1220 }
1221
1222 for (i = values[num_points - 1]; i < 256; i++)
1223 scaling_lut[i] = scaling[num_points - 1];
1224 }
1225
rockchip_vpu981_av1_dec_set_fgs(struct hantro_ctx * ctx)1226 static void rockchip_vpu981_av1_dec_set_fgs(struct hantro_ctx *ctx)
1227 {
1228 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1229 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1230 const struct v4l2_ctrl_av1_film_grain *film_grain = ctrls->film_grain;
1231 struct rockchip_av1_film_grain *fgmem = av1_dec->film_grain.cpu;
1232 struct hantro_dev *vpu = ctx->dev;
1233 bool scaling_from_luma =
1234 !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA);
1235 s32 (*ar_coeffs_y)[24];
1236 s32 (*ar_coeffs_cb)[25];
1237 s32 (*ar_coeffs_cr)[25];
1238 s32 (*luma_grain_block)[73][82];
1239 s32 (*cb_grain_block)[38][44];
1240 s32 (*cr_grain_block)[38][44];
1241 s32 ar_coeff_lag, ar_coeff_shift;
1242 s32 grain_scale_shift, bitdepth;
1243 s32 grain_center, grain_min, grain_max;
1244 int i, j;
1245
1246 hantro_reg_write(vpu, &av1_apply_grain, 0);
1247
1248 if (!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_APPLY_GRAIN)) {
1249 hantro_reg_write(vpu, &av1_num_y_points_b, 0);
1250 hantro_reg_write(vpu, &av1_num_cb_points_b, 0);
1251 hantro_reg_write(vpu, &av1_num_cr_points_b, 0);
1252 hantro_reg_write(vpu, &av1_scaling_shift, 0);
1253 hantro_reg_write(vpu, &av1_cb_mult, 0);
1254 hantro_reg_write(vpu, &av1_cb_luma_mult, 0);
1255 hantro_reg_write(vpu, &av1_cb_offset, 0);
1256 hantro_reg_write(vpu, &av1_cr_mult, 0);
1257 hantro_reg_write(vpu, &av1_cr_luma_mult, 0);
1258 hantro_reg_write(vpu, &av1_cr_offset, 0);
1259 hantro_reg_write(vpu, &av1_overlap_flag, 0);
1260 hantro_reg_write(vpu, &av1_clip_to_restricted_range, 0);
1261 hantro_reg_write(vpu, &av1_chroma_scaling_from_luma, 0);
1262 hantro_reg_write(vpu, &av1_random_seed, 0);
1263 hantro_write_addr(vpu, AV1_FILM_GRAIN, 0);
1264 return;
1265 }
1266
1267 ar_coeffs_y = kzalloc(sizeof(int32_t) * 24, GFP_KERNEL);
1268 ar_coeffs_cb = kzalloc(sizeof(int32_t) * 25, GFP_KERNEL);
1269 ar_coeffs_cr = kzalloc(sizeof(int32_t) * 25, GFP_KERNEL);
1270 luma_grain_block = kzalloc(sizeof(int32_t) * 73 * 82, GFP_KERNEL);
1271 cb_grain_block = kzalloc(sizeof(int32_t) * 38 * 44, GFP_KERNEL);
1272 cr_grain_block = kzalloc(sizeof(int32_t) * 38 * 44, GFP_KERNEL);
1273
1274 if (!ar_coeffs_y || !ar_coeffs_cb || !ar_coeffs_cr ||
1275 !luma_grain_block || !cb_grain_block || !cr_grain_block) {
1276 pr_warn("Fail allocating memory for film grain parameters\n");
1277 goto alloc_fail;
1278 }
1279
1280 hantro_reg_write(vpu, &av1_apply_grain, 1);
1281
1282 hantro_reg_write(vpu, &av1_num_y_points_b,
1283 film_grain->num_y_points > 0);
1284 hantro_reg_write(vpu, &av1_num_cb_points_b,
1285 film_grain->num_cb_points > 0);
1286 hantro_reg_write(vpu, &av1_num_cr_points_b,
1287 film_grain->num_cr_points > 0);
1288 hantro_reg_write(vpu, &av1_scaling_shift,
1289 film_grain->grain_scaling_minus_8 + 8);
1290
1291 if (!scaling_from_luma) {
1292 hantro_reg_write(vpu, &av1_cb_mult, film_grain->cb_mult - 128);
1293 hantro_reg_write(vpu, &av1_cb_luma_mult, film_grain->cb_luma_mult - 128);
1294 hantro_reg_write(vpu, &av1_cb_offset, film_grain->cb_offset - 256);
1295 hantro_reg_write(vpu, &av1_cr_mult, film_grain->cr_mult - 128);
1296 hantro_reg_write(vpu, &av1_cr_luma_mult, film_grain->cr_luma_mult - 128);
1297 hantro_reg_write(vpu, &av1_cr_offset, film_grain->cr_offset - 256);
1298 } else {
1299 hantro_reg_write(vpu, &av1_cb_mult, 0);
1300 hantro_reg_write(vpu, &av1_cb_luma_mult, 0);
1301 hantro_reg_write(vpu, &av1_cb_offset, 0);
1302 hantro_reg_write(vpu, &av1_cr_mult, 0);
1303 hantro_reg_write(vpu, &av1_cr_luma_mult, 0);
1304 hantro_reg_write(vpu, &av1_cr_offset, 0);
1305 }
1306
1307 hantro_reg_write(vpu, &av1_overlap_flag,
1308 !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_OVERLAP));
1309 hantro_reg_write(vpu, &av1_clip_to_restricted_range,
1310 !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_CLIP_TO_RESTRICTED_RANGE));
1311 hantro_reg_write(vpu, &av1_chroma_scaling_from_luma, scaling_from_luma);
1312 hantro_reg_write(vpu, &av1_random_seed, film_grain->grain_seed);
1313
1314 rockchip_vpu981_av1_dec_init_scaling_function(film_grain->point_y_value,
1315 film_grain->point_y_scaling,
1316 film_grain->num_y_points,
1317 fgmem->scaling_lut_y);
1318
1319 if (film_grain->flags &
1320 V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA) {
1321 memcpy(fgmem->scaling_lut_cb, fgmem->scaling_lut_y,
1322 sizeof(*fgmem->scaling_lut_y) * 256);
1323 memcpy(fgmem->scaling_lut_cr, fgmem->scaling_lut_y,
1324 sizeof(*fgmem->scaling_lut_y) * 256);
1325 } else {
1326 rockchip_vpu981_av1_dec_init_scaling_function
1327 (film_grain->point_cb_value, film_grain->point_cb_scaling,
1328 film_grain->num_cb_points, fgmem->scaling_lut_cb);
1329 rockchip_vpu981_av1_dec_init_scaling_function
1330 (film_grain->point_cr_value, film_grain->point_cr_scaling,
1331 film_grain->num_cr_points, fgmem->scaling_lut_cr);
1332 }
1333
1334 for (i = 0; i < V4L2_AV1_AR_COEFFS_SIZE; i++) {
1335 if (i < 24)
1336 (*ar_coeffs_y)[i] = film_grain->ar_coeffs_y_plus_128[i] - 128;
1337 (*ar_coeffs_cb)[i] = film_grain->ar_coeffs_cb_plus_128[i] - 128;
1338 (*ar_coeffs_cr)[i] = film_grain->ar_coeffs_cr_plus_128[i] - 128;
1339 }
1340
1341 ar_coeff_lag = film_grain->ar_coeff_lag;
1342 ar_coeff_shift = film_grain->ar_coeff_shift_minus_6 + 6;
1343 grain_scale_shift = film_grain->grain_scale_shift;
1344 bitdepth = ctx->bit_depth;
1345 grain_center = 128 << (bitdepth - 8);
1346 grain_min = 0 - grain_center;
1347 grain_max = (256 << (bitdepth - 8)) - 1 - grain_center;
1348
1349 rockchip_av1_generate_luma_grain_block(luma_grain_block, bitdepth,
1350 film_grain->num_y_points, grain_scale_shift,
1351 ar_coeff_lag, ar_coeffs_y, ar_coeff_shift,
1352 grain_min, grain_max, film_grain->grain_seed);
1353
1354 rockchip_av1_generate_chroma_grain_block(luma_grain_block, cb_grain_block,
1355 cr_grain_block, bitdepth,
1356 film_grain->num_y_points,
1357 film_grain->num_cb_points,
1358 film_grain->num_cr_points,
1359 grain_scale_shift, ar_coeff_lag, ar_coeffs_cb,
1360 ar_coeffs_cr, ar_coeff_shift, grain_min,
1361 grain_max,
1362 scaling_from_luma,
1363 film_grain->grain_seed);
1364
1365 for (i = 0; i < 64; i++) {
1366 for (j = 0; j < 64; j++)
1367 fgmem->cropped_luma_grain_block[i * 64 + j] =
1368 (*luma_grain_block)[i + 9][j + 9];
1369 }
1370
1371 for (i = 0; i < 32; i++) {
1372 for (j = 0; j < 32; j++) {
1373 fgmem->cropped_chroma_grain_block[i * 64 + 2 * j] =
1374 (*cb_grain_block)[i + 6][j + 6];
1375 fgmem->cropped_chroma_grain_block[i * 64 + 2 * j + 1] =
1376 (*cr_grain_block)[i + 6][j + 6];
1377 }
1378 }
1379
1380 hantro_write_addr(vpu, AV1_FILM_GRAIN, av1_dec->film_grain.dma);
1381
1382 alloc_fail:
1383 kfree(ar_coeffs_y);
1384 kfree(ar_coeffs_cb);
1385 kfree(ar_coeffs_cr);
1386 kfree(luma_grain_block);
1387 kfree(cb_grain_block);
1388 kfree(cr_grain_block);
1389 }
1390
rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx * ctx)1391 static void rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx *ctx)
1392 {
1393 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1394 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1395 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1396 const struct v4l2_av1_cdef *cdef = &frame->cdef;
1397 struct hantro_dev *vpu = ctx->dev;
1398 u32 luma_pri_strength = 0;
1399 u16 luma_sec_strength = 0;
1400 u32 chroma_pri_strength = 0;
1401 u16 chroma_sec_strength = 0;
1402 int i;
1403
1404 hantro_reg_write(vpu, &av1_cdef_bits, cdef->bits);
1405 hantro_reg_write(vpu, &av1_cdef_damping, cdef->damping_minus_3);
1406
1407 for (i = 0; i < BIT(cdef->bits); i++) {
1408 luma_pri_strength |= cdef->y_pri_strength[i] << (i * 4);
1409 if (cdef->y_sec_strength[i] == 4)
1410 luma_sec_strength |= 3 << (i * 2);
1411 else
1412 luma_sec_strength |= cdef->y_sec_strength[i] << (i * 2);
1413
1414 chroma_pri_strength |= cdef->uv_pri_strength[i] << (i * 4);
1415 if (cdef->uv_sec_strength[i] == 4)
1416 chroma_sec_strength |= 3 << (i * 2);
1417 else
1418 chroma_sec_strength |= cdef->uv_sec_strength[i] << (i * 2);
1419 }
1420
1421 hantro_reg_write(vpu, &av1_cdef_luma_primary_strength,
1422 luma_pri_strength);
1423 hantro_reg_write(vpu, &av1_cdef_luma_secondary_strength,
1424 luma_sec_strength);
1425 hantro_reg_write(vpu, &av1_cdef_chroma_primary_strength,
1426 chroma_pri_strength);
1427 hantro_reg_write(vpu, &av1_cdef_chroma_secondary_strength,
1428 chroma_sec_strength);
1429
1430 hantro_write_addr(vpu, AV1_CDEF_COL, av1_dec->cdef_col.dma);
1431 }
1432
rockchip_vpu981_av1_dec_set_lr(struct hantro_ctx * ctx)1433 static void rockchip_vpu981_av1_dec_set_lr(struct hantro_ctx *ctx)
1434 {
1435 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1436 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1437 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1438 const struct v4l2_av1_loop_restoration *loop_restoration =
1439 &frame->loop_restoration;
1440 struct hantro_dev *vpu = ctx->dev;
1441 u16 lr_type = 0, lr_unit_size = 0;
1442 u8 restoration_unit_size[V4L2_AV1_NUM_PLANES_MAX] = { 3, 3, 3 };
1443 int i;
1444
1445 if (loop_restoration->flags & V4L2_AV1_LOOP_RESTORATION_FLAG_USES_LR) {
1446 restoration_unit_size[0] = 1 + loop_restoration->lr_unit_shift;
1447 restoration_unit_size[1] =
1448 1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
1449 restoration_unit_size[2] =
1450 1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
1451 }
1452
1453 for (i = 0; i < V4L2_AV1_NUM_PLANES_MAX; i++) {
1454 lr_type |=
1455 loop_restoration->frame_restoration_type[i] << (i * 2);
1456 lr_unit_size |= restoration_unit_size[i] << (i * 2);
1457 }
1458
1459 hantro_reg_write(vpu, &av1_lr_type, lr_type);
1460 hantro_reg_write(vpu, &av1_lr_unit_size, lr_unit_size);
1461 hantro_write_addr(vpu, AV1_LR_COL, av1_dec->lr_col.dma);
1462 }
1463
rockchip_vpu981_av1_dec_set_superres_params(struct hantro_ctx * ctx)1464 static void rockchip_vpu981_av1_dec_set_superres_params(struct hantro_ctx *ctx)
1465 {
1466 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1467 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1468 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1469 struct hantro_dev *vpu = ctx->dev;
1470 u8 superres_scale_denominator = SCALE_NUMERATOR;
1471 int superres_luma_step = RS_SCALE_SUBPEL_BITS;
1472 int superres_chroma_step = RS_SCALE_SUBPEL_BITS;
1473 int superres_luma_step_invra = RS_SCALE_SUBPEL_BITS;
1474 int superres_chroma_step_invra = RS_SCALE_SUBPEL_BITS;
1475 int superres_init_luma_subpel_x = 0;
1476 int superres_init_chroma_subpel_x = 0;
1477 int superres_is_scaled = 0;
1478 int min_w = min_t(uint32_t, 16, frame->upscaled_width);
1479 int upscaled_luma, downscaled_luma;
1480 int downscaled_chroma, upscaled_chroma;
1481 int step_luma, step_chroma;
1482 int err_luma, err_chroma;
1483 int initial_luma, initial_chroma;
1484 int width = 0;
1485
1486 if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
1487 superres_scale_denominator = frame->superres_denom;
1488
1489 if (superres_scale_denominator <= SCALE_NUMERATOR)
1490 goto set_regs;
1491
1492 width = (frame->upscaled_width * SCALE_NUMERATOR +
1493 (superres_scale_denominator / 2)) / superres_scale_denominator;
1494
1495 if (width < min_w)
1496 width = min_w;
1497
1498 if (width == frame->upscaled_width)
1499 goto set_regs;
1500
1501 superres_is_scaled = 1;
1502 upscaled_luma = frame->upscaled_width;
1503 downscaled_luma = width;
1504 downscaled_chroma = (downscaled_luma + 1) >> 1;
1505 upscaled_chroma = (upscaled_luma + 1) >> 1;
1506 step_luma =
1507 ((downscaled_luma << RS_SCALE_SUBPEL_BITS) +
1508 (upscaled_luma / 2)) / upscaled_luma;
1509 step_chroma =
1510 ((downscaled_chroma << RS_SCALE_SUBPEL_BITS) +
1511 (upscaled_chroma / 2)) / upscaled_chroma;
1512 err_luma =
1513 (upscaled_luma * step_luma)
1514 - (downscaled_luma << RS_SCALE_SUBPEL_BITS);
1515 err_chroma =
1516 (upscaled_chroma * step_chroma)
1517 - (downscaled_chroma << RS_SCALE_SUBPEL_BITS);
1518 initial_luma =
1519 ((-((upscaled_luma - downscaled_luma) << (RS_SCALE_SUBPEL_BITS - 1))
1520 + upscaled_luma / 2)
1521 / upscaled_luma + (1 << (RS_SCALE_EXTRA_BITS - 1)) - err_luma / 2)
1522 & RS_SCALE_SUBPEL_MASK;
1523 initial_chroma =
1524 ((-((upscaled_chroma - downscaled_chroma) << (RS_SCALE_SUBPEL_BITS - 1))
1525 + upscaled_chroma / 2)
1526 / upscaled_chroma + (1 << (RS_SCALE_EXTRA_BITS - 1)) - err_chroma / 2)
1527 & RS_SCALE_SUBPEL_MASK;
1528 superres_luma_step = step_luma;
1529 superres_chroma_step = step_chroma;
1530 superres_luma_step_invra =
1531 ((upscaled_luma << RS_SCALE_SUBPEL_BITS) + (downscaled_luma / 2))
1532 / downscaled_luma;
1533 superres_chroma_step_invra =
1534 ((upscaled_chroma << RS_SCALE_SUBPEL_BITS) + (downscaled_chroma / 2))
1535 / downscaled_chroma;
1536 superres_init_luma_subpel_x = initial_luma;
1537 superres_init_chroma_subpel_x = initial_chroma;
1538
1539 set_regs:
1540 hantro_reg_write(vpu, &av1_superres_pic_width, frame->upscaled_width);
1541
1542 if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
1543 hantro_reg_write(vpu, &av1_scale_denom_minus9,
1544 frame->superres_denom - SUPERRES_SCALE_DENOMINATOR_MIN);
1545 else
1546 hantro_reg_write(vpu, &av1_scale_denom_minus9, frame->superres_denom);
1547
1548 hantro_reg_write(vpu, &av1_superres_luma_step, superres_luma_step);
1549 hantro_reg_write(vpu, &av1_superres_chroma_step, superres_chroma_step);
1550 hantro_reg_write(vpu, &av1_superres_luma_step_invra,
1551 superres_luma_step_invra);
1552 hantro_reg_write(vpu, &av1_superres_chroma_step_invra,
1553 superres_chroma_step_invra);
1554 hantro_reg_write(vpu, &av1_superres_init_luma_subpel_x,
1555 superres_init_luma_subpel_x);
1556 hantro_reg_write(vpu, &av1_superres_init_chroma_subpel_x,
1557 superres_init_chroma_subpel_x);
1558 hantro_reg_write(vpu, &av1_superres_is_scaled, superres_is_scaled);
1559
1560 hantro_write_addr(vpu, AV1_SR_COL, av1_dec->sr_col.dma);
1561 }
1562
rockchip_vpu981_av1_dec_set_picture_dimensions(struct hantro_ctx * ctx)1563 static void rockchip_vpu981_av1_dec_set_picture_dimensions(struct hantro_ctx *ctx)
1564 {
1565 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1566 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1567 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1568 struct hantro_dev *vpu = ctx->dev;
1569 int pic_width_in_cbs = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
1570 int pic_height_in_cbs = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
1571 int pic_width_pad = ALIGN(frame->frame_width_minus_1 + 1, 8)
1572 - (frame->frame_width_minus_1 + 1);
1573 int pic_height_pad = ALIGN(frame->frame_height_minus_1 + 1, 8)
1574 - (frame->frame_height_minus_1 + 1);
1575
1576 hantro_reg_write(vpu, &av1_pic_width_in_cbs, pic_width_in_cbs);
1577 hantro_reg_write(vpu, &av1_pic_height_in_cbs, pic_height_in_cbs);
1578 hantro_reg_write(vpu, &av1_pic_width_pad, pic_width_pad);
1579 hantro_reg_write(vpu, &av1_pic_height_pad, pic_height_pad);
1580
1581 rockchip_vpu981_av1_dec_set_superres_params(ctx);
1582 }
1583
rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx * ctx)1584 static void rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx *ctx)
1585 {
1586 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1587 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1588 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1589 struct hantro_dev *vpu = ctx->dev;
1590 bool use_ref_frame_mvs =
1591 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS);
1592 int cur_frame_offset = frame->order_hint;
1593 int alt_frame_offset = 0;
1594 int gld_frame_offset = 0;
1595 int bwd_frame_offset = 0;
1596 int alt2_frame_offset = 0;
1597 int refs_selected[3] = { 0, 0, 0 };
1598 int cur_mi_cols = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
1599 int cur_mi_rows = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
1600 int cur_offset[V4L2_AV1_TOTAL_REFS_PER_FRAME - 1];
1601 int cur_roffset[V4L2_AV1_TOTAL_REFS_PER_FRAME - 1];
1602 int mf_types[3] = { 0, 0, 0 };
1603 int ref_stamp = 2;
1604 int ref_ind = 0;
1605 int rf, idx;
1606
1607 alt_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT_BUF_IDX);
1608 gld_frame_offset = rockchip_vpu981_get_order_hint(ctx, GLD_BUF_IDX);
1609 bwd_frame_offset = rockchip_vpu981_get_order_hint(ctx, BWD_BUF_IDX);
1610 alt2_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT2_BUF_IDX);
1611
1612 idx = rockchip_vpu981_get_frame_index(ctx, LST_BUF_IDX);
1613 if (idx >= 0) {
1614 int alt_frame_offset_in_lst =
1615 av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME];
1616 bool is_lst_overlay =
1617 (alt_frame_offset_in_lst == gld_frame_offset);
1618
1619 if (!is_lst_overlay) {
1620 int lst_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1621 int lst_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1622 bool lst_intra_only =
1623 IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1624
1625 if (lst_mi_cols == cur_mi_cols &&
1626 lst_mi_rows == cur_mi_rows && !lst_intra_only) {
1627 mf_types[ref_ind] = V4L2_AV1_REF_LAST_FRAME;
1628 refs_selected[ref_ind++] = LST_BUF_IDX;
1629 }
1630 }
1631 ref_stamp--;
1632 }
1633
1634 idx = rockchip_vpu981_get_frame_index(ctx, BWD_BUF_IDX);
1635 if (rockchip_vpu981_av1_dec_get_dist(ctx, bwd_frame_offset, cur_frame_offset) > 0) {
1636 int bwd_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1637 int bwd_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1638 bool bwd_intra_only =
1639 IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1640
1641 if (bwd_mi_cols == cur_mi_cols && bwd_mi_rows == cur_mi_rows &&
1642 !bwd_intra_only) {
1643 mf_types[ref_ind] = V4L2_AV1_REF_BWDREF_FRAME;
1644 refs_selected[ref_ind++] = BWD_BUF_IDX;
1645 ref_stamp--;
1646 }
1647 }
1648
1649 idx = rockchip_vpu981_get_frame_index(ctx, ALT2_BUF_IDX);
1650 if (rockchip_vpu981_av1_dec_get_dist(ctx, alt2_frame_offset, cur_frame_offset) > 0) {
1651 int alt2_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1652 int alt2_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1653 bool alt2_intra_only =
1654 IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1655
1656 if (alt2_mi_cols == cur_mi_cols && alt2_mi_rows == cur_mi_rows &&
1657 !alt2_intra_only) {
1658 mf_types[ref_ind] = V4L2_AV1_REF_ALTREF2_FRAME;
1659 refs_selected[ref_ind++] = ALT2_BUF_IDX;
1660 ref_stamp--;
1661 }
1662 }
1663
1664 idx = rockchip_vpu981_get_frame_index(ctx, ALT_BUF_IDX);
1665 if (rockchip_vpu981_av1_dec_get_dist(ctx, alt_frame_offset, cur_frame_offset) > 0 &&
1666 ref_stamp >= 0) {
1667 int alt_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1668 int alt_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1669 bool alt_intra_only =
1670 IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1671
1672 if (alt_mi_cols == cur_mi_cols && alt_mi_rows == cur_mi_rows &&
1673 !alt_intra_only) {
1674 mf_types[ref_ind] = V4L2_AV1_REF_ALTREF_FRAME;
1675 refs_selected[ref_ind++] = ALT_BUF_IDX;
1676 ref_stamp--;
1677 }
1678 }
1679
1680 idx = rockchip_vpu981_get_frame_index(ctx, LST2_BUF_IDX);
1681 if (idx >= 0 && ref_stamp >= 0) {
1682 int lst2_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1683 int lst2_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1684 bool lst2_intra_only =
1685 IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1686
1687 if (lst2_mi_cols == cur_mi_cols && lst2_mi_rows == cur_mi_rows &&
1688 !lst2_intra_only) {
1689 mf_types[ref_ind] = V4L2_AV1_REF_LAST2_FRAME;
1690 refs_selected[ref_ind++] = LST2_BUF_IDX;
1691 ref_stamp--;
1692 }
1693 }
1694
1695 for (rf = 0; rf < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; ++rf) {
1696 idx = rockchip_vpu981_get_frame_index(ctx, rf);
1697 if (idx >= 0) {
1698 int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, rf);
1699
1700 cur_offset[rf] =
1701 rockchip_vpu981_av1_dec_get_dist(ctx, cur_frame_offset, rf_order_hint);
1702 cur_roffset[rf] =
1703 rockchip_vpu981_av1_dec_get_dist(ctx, rf_order_hint, cur_frame_offset);
1704 } else {
1705 cur_offset[rf] = 0;
1706 cur_roffset[rf] = 0;
1707 }
1708 }
1709
1710 hantro_reg_write(vpu, &av1_use_temporal0_mvs, 0);
1711 hantro_reg_write(vpu, &av1_use_temporal1_mvs, 0);
1712 hantro_reg_write(vpu, &av1_use_temporal2_mvs, 0);
1713 hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
1714
1715 hantro_reg_write(vpu, &av1_mf1_last_offset, 0);
1716 hantro_reg_write(vpu, &av1_mf1_last2_offset, 0);
1717 hantro_reg_write(vpu, &av1_mf1_last3_offset, 0);
1718 hantro_reg_write(vpu, &av1_mf1_golden_offset, 0);
1719 hantro_reg_write(vpu, &av1_mf1_bwdref_offset, 0);
1720 hantro_reg_write(vpu, &av1_mf1_altref2_offset, 0);
1721 hantro_reg_write(vpu, &av1_mf1_altref_offset, 0);
1722
1723 if (use_ref_frame_mvs && ref_ind > 0 &&
1724 cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1725 cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1726 int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[0]);
1727 int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[0]);
1728 u32 *oh = av1_dec->frame_refs[idx].order_hints;
1729 int val;
1730
1731 hantro_reg_write(vpu, &av1_use_temporal0_mvs, 1);
1732
1733 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1734 hantro_reg_write(vpu, &av1_mf1_last_offset, val);
1735
1736 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1737 hantro_reg_write(vpu, &av1_mf1_last2_offset, val);
1738
1739 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1740 hantro_reg_write(vpu, &av1_mf1_last3_offset, val);
1741
1742 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1743 hantro_reg_write(vpu, &av1_mf1_golden_offset, val);
1744
1745 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1746 hantro_reg_write(vpu, &av1_mf1_bwdref_offset, val);
1747
1748 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1749 hantro_reg_write(vpu, &av1_mf1_altref2_offset, val);
1750
1751 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1752 hantro_reg_write(vpu, &av1_mf1_altref_offset, val);
1753 }
1754
1755 hantro_reg_write(vpu, &av1_mf2_last_offset, 0);
1756 hantro_reg_write(vpu, &av1_mf2_last2_offset, 0);
1757 hantro_reg_write(vpu, &av1_mf2_last3_offset, 0);
1758 hantro_reg_write(vpu, &av1_mf2_golden_offset, 0);
1759 hantro_reg_write(vpu, &av1_mf2_bwdref_offset, 0);
1760 hantro_reg_write(vpu, &av1_mf2_altref2_offset, 0);
1761 hantro_reg_write(vpu, &av1_mf2_altref_offset, 0);
1762
1763 if (use_ref_frame_mvs && ref_ind > 1 &&
1764 cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1765 cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1766 int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[1]);
1767 int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[1]);
1768 u32 *oh = av1_dec->frame_refs[idx].order_hints;
1769 int val;
1770
1771 hantro_reg_write(vpu, &av1_use_temporal1_mvs, 1);
1772
1773 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1774 hantro_reg_write(vpu, &av1_mf2_last_offset, val);
1775
1776 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1777 hantro_reg_write(vpu, &av1_mf2_last2_offset, val);
1778
1779 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1780 hantro_reg_write(vpu, &av1_mf2_last3_offset, val);
1781
1782 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1783 hantro_reg_write(vpu, &av1_mf2_golden_offset, val);
1784
1785 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1786 hantro_reg_write(vpu, &av1_mf2_bwdref_offset, val);
1787
1788 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1789 hantro_reg_write(vpu, &av1_mf2_altref2_offset, val);
1790
1791 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1792 hantro_reg_write(vpu, &av1_mf2_altref_offset, val);
1793 }
1794
1795 hantro_reg_write(vpu, &av1_mf3_last_offset, 0);
1796 hantro_reg_write(vpu, &av1_mf3_last2_offset, 0);
1797 hantro_reg_write(vpu, &av1_mf3_last3_offset, 0);
1798 hantro_reg_write(vpu, &av1_mf3_golden_offset, 0);
1799 hantro_reg_write(vpu, &av1_mf3_bwdref_offset, 0);
1800 hantro_reg_write(vpu, &av1_mf3_altref2_offset, 0);
1801 hantro_reg_write(vpu, &av1_mf3_altref_offset, 0);
1802
1803 if (use_ref_frame_mvs && ref_ind > 2 &&
1804 cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1805 cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1806 int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[2]);
1807 int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[2]);
1808 u32 *oh = av1_dec->frame_refs[idx].order_hints;
1809 int val;
1810
1811 hantro_reg_write(vpu, &av1_use_temporal2_mvs, 1);
1812
1813 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1814 hantro_reg_write(vpu, &av1_mf3_last_offset, val);
1815
1816 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1817 hantro_reg_write(vpu, &av1_mf3_last2_offset, val);
1818
1819 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1820 hantro_reg_write(vpu, &av1_mf3_last3_offset, val);
1821
1822 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1823 hantro_reg_write(vpu, &av1_mf3_golden_offset, val);
1824
1825 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1826 hantro_reg_write(vpu, &av1_mf3_bwdref_offset, val);
1827
1828 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1829 hantro_reg_write(vpu, &av1_mf3_altref2_offset, val);
1830
1831 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1832 hantro_reg_write(vpu, &av1_mf3_altref_offset, val);
1833 }
1834
1835 hantro_reg_write(vpu, &av1_cur_last_offset, cur_offset[0]);
1836 hantro_reg_write(vpu, &av1_cur_last2_offset, cur_offset[1]);
1837 hantro_reg_write(vpu, &av1_cur_last3_offset, cur_offset[2]);
1838 hantro_reg_write(vpu, &av1_cur_golden_offset, cur_offset[3]);
1839 hantro_reg_write(vpu, &av1_cur_bwdref_offset, cur_offset[4]);
1840 hantro_reg_write(vpu, &av1_cur_altref2_offset, cur_offset[5]);
1841 hantro_reg_write(vpu, &av1_cur_altref_offset, cur_offset[6]);
1842
1843 hantro_reg_write(vpu, &av1_cur_last_roffset, cur_roffset[0]);
1844 hantro_reg_write(vpu, &av1_cur_last2_roffset, cur_roffset[1]);
1845 hantro_reg_write(vpu, &av1_cur_last3_roffset, cur_roffset[2]);
1846 hantro_reg_write(vpu, &av1_cur_golden_roffset, cur_roffset[3]);
1847 hantro_reg_write(vpu, &av1_cur_bwdref_roffset, cur_roffset[4]);
1848 hantro_reg_write(vpu, &av1_cur_altref2_roffset, cur_roffset[5]);
1849 hantro_reg_write(vpu, &av1_cur_altref_roffset, cur_roffset[6]);
1850
1851 hantro_reg_write(vpu, &av1_mf1_type, mf_types[0] - V4L2_AV1_REF_LAST_FRAME);
1852 hantro_reg_write(vpu, &av1_mf2_type, mf_types[1] - V4L2_AV1_REF_LAST_FRAME);
1853 hantro_reg_write(vpu, &av1_mf3_type, mf_types[2] - V4L2_AV1_REF_LAST_FRAME);
1854 }
1855
rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx * ctx)1856 static void rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx *ctx)
1857 {
1858 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1859 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1860 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1861 int frame_type = frame->frame_type;
1862 bool allow_intrabc = !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC);
1863 int ref_count[AV1DEC_MAX_PIC_BUFFERS] = { 0 };
1864 struct hantro_dev *vpu = ctx->dev;
1865 int i, ref_frames = 0;
1866 bool scale_enable = false;
1867
1868 if (IS_INTRA(frame_type) && !allow_intrabc)
1869 return;
1870
1871 if (!allow_intrabc) {
1872 for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) {
1873 int idx = rockchip_vpu981_get_frame_index(ctx, i);
1874
1875 if (idx >= 0)
1876 ref_count[idx]++;
1877 }
1878
1879 for (i = 0; i < AV1DEC_MAX_PIC_BUFFERS; i++) {
1880 if (ref_count[i])
1881 ref_frames++;
1882 }
1883 } else {
1884 ref_frames = 1;
1885 }
1886 hantro_reg_write(vpu, &av1_ref_frames, ref_frames);
1887
1888 rockchip_vpu981_av1_dec_set_frame_sign_bias(ctx);
1889
1890 for (i = V4L2_AV1_REF_LAST_FRAME; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++) {
1891 u32 ref = i - 1;
1892 int idx = 0;
1893 int width, height;
1894
1895 if (allow_intrabc) {
1896 idx = av1_dec->current_frame_index;
1897 width = frame->frame_width_minus_1 + 1;
1898 height = frame->frame_height_minus_1 + 1;
1899 } else {
1900 if (rockchip_vpu981_get_frame_index(ctx, ref) > 0)
1901 idx = rockchip_vpu981_get_frame_index(ctx, ref);
1902 width = av1_dec->frame_refs[idx].width;
1903 height = av1_dec->frame_refs[idx].height;
1904 }
1905
1906 scale_enable |=
1907 rockchip_vpu981_av1_dec_set_ref(ctx, ref, idx, width,
1908 height);
1909
1910 rockchip_vpu981_av1_dec_set_sign_bias(ctx, ref,
1911 av1_dec->ref_frame_sign_bias[i]);
1912 }
1913 hantro_reg_write(vpu, &av1_ref_scaling_enable, scale_enable);
1914
1915 hantro_reg_write(vpu, &av1_ref0_gm_mode,
1916 frame->global_motion.type[V4L2_AV1_REF_LAST_FRAME]);
1917 hantro_reg_write(vpu, &av1_ref1_gm_mode,
1918 frame->global_motion.type[V4L2_AV1_REF_LAST2_FRAME]);
1919 hantro_reg_write(vpu, &av1_ref2_gm_mode,
1920 frame->global_motion.type[V4L2_AV1_REF_LAST3_FRAME]);
1921 hantro_reg_write(vpu, &av1_ref3_gm_mode,
1922 frame->global_motion.type[V4L2_AV1_REF_GOLDEN_FRAME]);
1923 hantro_reg_write(vpu, &av1_ref4_gm_mode,
1924 frame->global_motion.type[V4L2_AV1_REF_BWDREF_FRAME]);
1925 hantro_reg_write(vpu, &av1_ref5_gm_mode,
1926 frame->global_motion.type[V4L2_AV1_REF_ALTREF2_FRAME]);
1927 hantro_reg_write(vpu, &av1_ref6_gm_mode,
1928 frame->global_motion.type[V4L2_AV1_REF_ALTREF_FRAME]);
1929
1930 rockchip_vpu981_av1_dec_set_other_frames(ctx);
1931 }
1932
rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx * ctx)1933 static void rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx *ctx)
1934 {
1935 struct hantro_dev *vpu = ctx->dev;
1936 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1937 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1938
1939 hantro_reg_write(vpu, &av1_skip_mode,
1940 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SKIP_MODE_PRESENT));
1941 hantro_reg_write(vpu, &av1_tempor_mvp_e,
1942 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS));
1943 hantro_reg_write(vpu, &av1_delta_lf_res_log,
1944 ctrls->frame->loop_filter.delta_lf_res);
1945 hantro_reg_write(vpu, &av1_delta_lf_multi,
1946 !!(ctrls->frame->loop_filter.flags
1947 & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI));
1948 hantro_reg_write(vpu, &av1_delta_lf_present,
1949 !!(ctrls->frame->loop_filter.flags
1950 & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT));
1951 hantro_reg_write(vpu, &av1_disable_cdf_update,
1952 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_CDF_UPDATE));
1953 hantro_reg_write(vpu, &av1_allow_warp,
1954 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_WARPED_MOTION));
1955 hantro_reg_write(vpu, &av1_show_frame,
1956 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SHOW_FRAME));
1957 hantro_reg_write(vpu, &av1_switchable_motion_mode,
1958 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_IS_MOTION_MODE_SWITCHABLE));
1959 hantro_reg_write(vpu, &av1_enable_cdef,
1960 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF));
1961 hantro_reg_write(vpu, &av1_allow_masked_compound,
1962 !!(ctrls->sequence->flags
1963 & V4L2_AV1_SEQUENCE_FLAG_ENABLE_MASKED_COMPOUND));
1964 hantro_reg_write(vpu, &av1_allow_interintra,
1965 !!(ctrls->sequence->flags
1966 & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTERINTRA_COMPOUND));
1967 hantro_reg_write(vpu, &av1_enable_intra_edge_filter,
1968 !!(ctrls->sequence->flags
1969 & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTRA_EDGE_FILTER));
1970 hantro_reg_write(vpu, &av1_allow_filter_intra,
1971 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_FILTER_INTRA));
1972 hantro_reg_write(vpu, &av1_enable_jnt_comp,
1973 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_JNT_COMP));
1974 hantro_reg_write(vpu, &av1_enable_dual_filter,
1975 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_DUAL_FILTER));
1976 hantro_reg_write(vpu, &av1_reduced_tx_set_used,
1977 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REDUCED_TX_SET));
1978 hantro_reg_write(vpu, &av1_allow_screen_content_tools,
1979 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS));
1980 hantro_reg_write(vpu, &av1_allow_intrabc,
1981 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC));
1982
1983 if (!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS))
1984 hantro_reg_write(vpu, &av1_force_interger_mv, 0);
1985 else
1986 hantro_reg_write(vpu, &av1_force_interger_mv,
1987 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_FORCE_INTEGER_MV));
1988
1989 hantro_reg_write(vpu, &av1_blackwhite_e, 0);
1990 hantro_reg_write(vpu, &av1_delta_q_res_log, ctrls->frame->quantization.delta_q_res);
1991 hantro_reg_write(vpu, &av1_delta_q_present,
1992 !!(ctrls->frame->quantization.flags
1993 & V4L2_AV1_QUANTIZATION_FLAG_DELTA_Q_PRESENT));
1994
1995 hantro_reg_write(vpu, &av1_idr_pic_e, !ctrls->frame->frame_type);
1996 hantro_reg_write(vpu, &av1_quant_base_qindex, ctrls->frame->quantization.base_q_idx);
1997 hantro_reg_write(vpu, &av1_bit_depth_y_minus8, ctx->bit_depth - 8);
1998 hantro_reg_write(vpu, &av1_bit_depth_c_minus8, ctx->bit_depth - 8);
1999
2000 hantro_reg_write(vpu, &av1_mcomp_filt_type, ctrls->frame->interpolation_filter);
2001 hantro_reg_write(vpu, &av1_high_prec_mv_e,
2002 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_HIGH_PRECISION_MV));
2003 hantro_reg_write(vpu, &av1_comp_pred_mode,
2004 (ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REFERENCE_SELECT) ? 2 : 0);
2005 hantro_reg_write(vpu, &av1_transform_mode, (ctrls->frame->tx_mode == 1) ? 3 : 4);
2006 hantro_reg_write(vpu, &av1_max_cb_size,
2007 (ctrls->sequence->flags
2008 & V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK) ? 7 : 6);
2009 hantro_reg_write(vpu, &av1_min_cb_size, 3);
2010
2011 hantro_reg_write(vpu, &av1_comp_pred_fixed_ref, 0);
2012 hantro_reg_write(vpu, &av1_comp_pred_var_ref0_av1, 0);
2013 hantro_reg_write(vpu, &av1_comp_pred_var_ref1_av1, 0);
2014 hantro_reg_write(vpu, &av1_filt_level_seg0, 0);
2015 hantro_reg_write(vpu, &av1_filt_level_seg1, 0);
2016 hantro_reg_write(vpu, &av1_filt_level_seg2, 0);
2017 hantro_reg_write(vpu, &av1_filt_level_seg3, 0);
2018 hantro_reg_write(vpu, &av1_filt_level_seg4, 0);
2019 hantro_reg_write(vpu, &av1_filt_level_seg5, 0);
2020 hantro_reg_write(vpu, &av1_filt_level_seg6, 0);
2021 hantro_reg_write(vpu, &av1_filt_level_seg7, 0);
2022
2023 hantro_reg_write(vpu, &av1_qp_delta_y_dc_av1, ctrls->frame->quantization.delta_q_y_dc);
2024 hantro_reg_write(vpu, &av1_qp_delta_ch_dc_av1, ctrls->frame->quantization.delta_q_u_dc);
2025 hantro_reg_write(vpu, &av1_qp_delta_ch_ac_av1, ctrls->frame->quantization.delta_q_u_ac);
2026 if (ctrls->frame->quantization.flags & V4L2_AV1_QUANTIZATION_FLAG_USING_QMATRIX) {
2027 hantro_reg_write(vpu, &av1_qmlevel_y, ctrls->frame->quantization.qm_y);
2028 hantro_reg_write(vpu, &av1_qmlevel_u, ctrls->frame->quantization.qm_u);
2029 hantro_reg_write(vpu, &av1_qmlevel_v, ctrls->frame->quantization.qm_v);
2030 } else {
2031 hantro_reg_write(vpu, &av1_qmlevel_y, 0xff);
2032 hantro_reg_write(vpu, &av1_qmlevel_u, 0xff);
2033 hantro_reg_write(vpu, &av1_qmlevel_v, 0xff);
2034 }
2035
2036 hantro_reg_write(vpu, &av1_lossless_e, rockchip_vpu981_av1_dec_is_lossless(ctx));
2037 hantro_reg_write(vpu, &av1_quant_delta_v_dc, ctrls->frame->quantization.delta_q_v_dc);
2038 hantro_reg_write(vpu, &av1_quant_delta_v_ac, ctrls->frame->quantization.delta_q_v_ac);
2039
2040 hantro_reg_write(vpu, &av1_skip_ref0,
2041 (ctrls->frame->skip_mode_frame[0]) ? ctrls->frame->skip_mode_frame[0] : 1);
2042 hantro_reg_write(vpu, &av1_skip_ref1,
2043 (ctrls->frame->skip_mode_frame[1]) ? ctrls->frame->skip_mode_frame[1] : 1);
2044
2045 hantro_write_addr(vpu, AV1_MC_SYNC_CURR, av1_dec->tile_buf.dma);
2046 hantro_write_addr(vpu, AV1_MC_SYNC_LEFT, av1_dec->tile_buf.dma);
2047 }
2048
2049 static void
rockchip_vpu981_av1_dec_set_input_buffer(struct hantro_ctx * ctx,struct vb2_v4l2_buffer * vb2_src)2050 rockchip_vpu981_av1_dec_set_input_buffer(struct hantro_ctx *ctx,
2051 struct vb2_v4l2_buffer *vb2_src)
2052 {
2053 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
2054 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
2055 const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
2056 ctrls->tile_group_entry;
2057 struct hantro_dev *vpu = ctx->dev;
2058 dma_addr_t src_dma;
2059 u32 src_len, src_buf_len;
2060 int start_bit, offset;
2061
2062 src_dma = vb2_dma_contig_plane_dma_addr(&vb2_src->vb2_buf, 0);
2063 src_len = vb2_get_plane_payload(&vb2_src->vb2_buf, 0);
2064 src_buf_len = vb2_plane_size(&vb2_src->vb2_buf, 0);
2065
2066 start_bit = (group_entry[0].tile_offset & 0xf) * 8;
2067 offset = group_entry[0].tile_offset & ~0xf;
2068
2069 hantro_reg_write(vpu, &av1_strm_buffer_len, src_buf_len);
2070 hantro_reg_write(vpu, &av1_strm_start_bit, start_bit);
2071 hantro_reg_write(vpu, &av1_stream_len, src_len);
2072 hantro_reg_write(vpu, &av1_strm_start_offset, 0);
2073 hantro_write_addr(vpu, AV1_INPUT_STREAM, src_dma + offset);
2074 }
2075
2076 static void
rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx * ctx)2077 rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx *ctx)
2078 {
2079 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
2080 struct hantro_dev *vpu = ctx->dev;
2081 struct hantro_decoded_buffer *dst;
2082 struct vb2_v4l2_buffer *vb2_dst;
2083 dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
2084 size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
2085 size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
2086
2087 vb2_dst = av1_dec->frame_refs[av1_dec->current_frame_index].vb2_ref;
2088 dst = vb2_to_hantro_decoded_buf(&vb2_dst->vb2_buf);
2089 luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
2090 chroma_addr = luma_addr + cr_offset;
2091 mv_addr = luma_addr + mv_offset;
2092
2093 hantro_write_addr(vpu, AV1_TILE_OUT_LU, luma_addr);
2094 hantro_write_addr(vpu, AV1_TILE_OUT_CH, chroma_addr);
2095 hantro_write_addr(vpu, AV1_TILE_OUT_MV, mv_addr);
2096 }
2097
rockchip_vpu981_av1_dec_run(struct hantro_ctx * ctx)2098 int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx)
2099 {
2100 struct hantro_dev *vpu = ctx->dev;
2101 struct vb2_v4l2_buffer *vb2_src;
2102 int ret;
2103
2104 hantro_start_prepare_run(ctx);
2105
2106 ret = rockchip_vpu981_av1_dec_prepare_run(ctx);
2107 if (ret)
2108 goto prepare_error;
2109
2110 vb2_src = hantro_get_src_buf(ctx);
2111 if (!vb2_src) {
2112 ret = -EINVAL;
2113 goto prepare_error;
2114 }
2115
2116 rockchip_vpu981_av1_dec_clean_refs(ctx);
2117 rockchip_vpu981_av1_dec_frame_ref(ctx, vb2_src->vb2_buf.timestamp);
2118
2119 rockchip_vpu981_av1_dec_set_parameters(ctx);
2120 rockchip_vpu981_av1_dec_set_global_model(ctx);
2121 rockchip_vpu981_av1_dec_set_tile_info(ctx);
2122 rockchip_vpu981_av1_dec_set_reference_frames(ctx);
2123 rockchip_vpu981_av1_dec_set_segmentation(ctx);
2124 rockchip_vpu981_av1_dec_set_loopfilter(ctx);
2125 rockchip_vpu981_av1_dec_set_picture_dimensions(ctx);
2126 rockchip_vpu981_av1_dec_set_cdef(ctx);
2127 rockchip_vpu981_av1_dec_set_lr(ctx);
2128 rockchip_vpu981_av1_dec_set_fgs(ctx);
2129 rockchip_vpu981_av1_dec_set_prob(ctx);
2130
2131 hantro_reg_write(vpu, &av1_dec_mode, AV1_DEC_MODE);
2132 hantro_reg_write(vpu, &av1_dec_out_ec_byte_word, 0);
2133 hantro_reg_write(vpu, &av1_write_mvs_e, 1);
2134 hantro_reg_write(vpu, &av1_dec_out_ec_bypass, 1);
2135 hantro_reg_write(vpu, &av1_dec_clk_gate_e, 1);
2136
2137 hantro_reg_write(vpu, &av1_dec_abort_e, 0);
2138 hantro_reg_write(vpu, &av1_dec_tile_int_e, 0);
2139
2140 hantro_reg_write(vpu, &av1_dec_alignment, 64);
2141 hantro_reg_write(vpu, &av1_apf_disable, 0);
2142 hantro_reg_write(vpu, &av1_apf_threshold, 8);
2143 hantro_reg_write(vpu, &av1_dec_buswidth, 2);
2144 hantro_reg_write(vpu, &av1_dec_max_burst, 16);
2145 hantro_reg_write(vpu, &av1_error_conceal_e, 0);
2146 hantro_reg_write(vpu, &av1_axi_rd_ostd_threshold, 64);
2147 hantro_reg_write(vpu, &av1_axi_wr_ostd_threshold, 64);
2148
2149 hantro_reg_write(vpu, &av1_ext_timeout_cycles, 0xfffffff);
2150 hantro_reg_write(vpu, &av1_ext_timeout_override_e, 1);
2151 hantro_reg_write(vpu, &av1_timeout_cycles, 0xfffffff);
2152 hantro_reg_write(vpu, &av1_timeout_override_e, 1);
2153
2154 rockchip_vpu981_av1_dec_set_output_buffer(ctx);
2155 rockchip_vpu981_av1_dec_set_input_buffer(ctx, vb2_src);
2156
2157 hantro_end_prepare_run(ctx);
2158
2159 hantro_reg_write(vpu, &av1_dec_e, 1);
2160
2161 return 0;
2162
2163 prepare_error:
2164 hantro_end_prepare_run(ctx);
2165 hantro_irq_done(vpu, VB2_BUF_STATE_ERROR);
2166 return ret;
2167 }
2168
rockchip_vpu981_postproc_enable(struct hantro_ctx * ctx)2169 static void rockchip_vpu981_postproc_enable(struct hantro_ctx *ctx)
2170 {
2171 struct hantro_dev *vpu = ctx->dev;
2172 int width = ctx->dst_fmt.width;
2173 int height = ctx->dst_fmt.height;
2174 struct vb2_v4l2_buffer *vb2_dst;
2175 size_t chroma_offset;
2176 dma_addr_t dst_dma;
2177
2178 vb2_dst = hantro_get_dst_buf(ctx);
2179
2180 dst_dma = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
2181 chroma_offset = ctx->dst_fmt.plane_fmt[0].bytesperline *
2182 ctx->dst_fmt.height;
2183
2184 /* enable post processor */
2185 hantro_reg_write(vpu, &av1_pp_out_e, 1);
2186 hantro_reg_write(vpu, &av1_pp_in_format, 0);
2187 hantro_reg_write(vpu, &av1_pp0_dup_hor, 1);
2188 hantro_reg_write(vpu, &av1_pp0_dup_ver, 1);
2189
2190 hantro_reg_write(vpu, &av1_pp_in_height, height / 2);
2191 hantro_reg_write(vpu, &av1_pp_in_width, width / 2);
2192 hantro_reg_write(vpu, &av1_pp_out_height, height);
2193 hantro_reg_write(vpu, &av1_pp_out_width, width);
2194 hantro_reg_write(vpu, &av1_pp_out_y_stride,
2195 ctx->dst_fmt.plane_fmt[0].bytesperline);
2196 hantro_reg_write(vpu, &av1_pp_out_c_stride,
2197 ctx->dst_fmt.plane_fmt[0].bytesperline);
2198 switch (ctx->dst_fmt.pixelformat) {
2199 case V4L2_PIX_FMT_P010:
2200 hantro_reg_write(vpu, &av1_pp_out_format, 1);
2201 break;
2202 case V4L2_PIX_FMT_NV12:
2203 hantro_reg_write(vpu, &av1_pp_out_format, 3);
2204 break;
2205 default:
2206 hantro_reg_write(vpu, &av1_pp_out_format, 0);
2207 }
2208
2209 hantro_reg_write(vpu, &av1_ppd_blend_exist, 0);
2210 hantro_reg_write(vpu, &av1_ppd_dith_exist, 0);
2211 hantro_reg_write(vpu, &av1_ablend_crop_e, 0);
2212 hantro_reg_write(vpu, &av1_pp_format_customer1_e, 0);
2213 hantro_reg_write(vpu, &av1_pp_crop_exist, 0);
2214 hantro_reg_write(vpu, &av1_pp_up_level, 0);
2215 hantro_reg_write(vpu, &av1_pp_down_level, 0);
2216 hantro_reg_write(vpu, &av1_pp_exist, 0);
2217
2218 hantro_write_addr(vpu, AV1_PP_OUT_LU, dst_dma);
2219 hantro_write_addr(vpu, AV1_PP_OUT_CH, dst_dma + chroma_offset);
2220 }
2221
rockchip_vpu981_postproc_disable(struct hantro_ctx * ctx)2222 static void rockchip_vpu981_postproc_disable(struct hantro_ctx *ctx)
2223 {
2224 struct hantro_dev *vpu = ctx->dev;
2225
2226 /* disable post processor */
2227 hantro_reg_write(vpu, &av1_pp_out_e, 0);
2228 }
2229
2230 const struct hantro_postproc_ops rockchip_vpu981_postproc_ops = {
2231 .enable = rockchip_vpu981_postproc_enable,
2232 .disable = rockchip_vpu981_postproc_disable,
2233 };
2234