1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2023 MediaTek Inc.
4 * Author: Xiaoyong Lu <xiaoyong.lu@mediatek.com>
5 */
6
7 #include <linux/module.h>
8 #include <linux/slab.h>
9 #include <media/videobuf2-dma-contig.h>
10
11 #include "../mtk_vcodec_dec.h"
12 #include "../../common/mtk_vcodec_intr.h"
13 #include "../vdec_drv_base.h"
14 #include "../vdec_drv_if.h"
15 #include "../vdec_vpu_if.h"
16
17 #define AV1_MAX_FRAME_BUF_COUNT (V4L2_AV1_TOTAL_REFS_PER_FRAME + 1)
18 #define AV1_TILE_BUF_SIZE 64
19 #define AV1_SCALE_SUBPEL_BITS 10
20 #define AV1_REF_SCALE_SHIFT 14
21 #define AV1_REF_NO_SCALE BIT(AV1_REF_SCALE_SHIFT)
22 #define AV1_REF_INVALID_SCALE -1
23 #define AV1_CDF_TABLE_BUFFER_SIZE 16384
24 #define AV1_PRIMARY_REF_NONE 7
25
26 #define AV1_INVALID_IDX -1
27
28 #define AV1_DIV_ROUND_UP_POW2(value, n) \
29 ({ \
30 typeof(n) _n = n; \
31 typeof(value) _value = value; \
32 (_value + (BIT(_n) >> 1)) >> _n; \
33 })
34
35 #define AV1_DIV_ROUND_UP_POW2_SIGNED(value, n) \
36 ({ \
37 typeof(n) _n_ = n; \
38 typeof(value) _value_ = value; \
39 (((_value_) < 0) ? -AV1_DIV_ROUND_UP_POW2(-(_value_), (_n_)) \
40 : AV1_DIV_ROUND_UP_POW2((_value_), (_n_))); \
41 })
42
43 #define BIT_FLAG(x, bit) (!!((x)->flags & (bit)))
44 #define SEGMENTATION_FLAG(x, name) (!!((x)->flags & V4L2_AV1_SEGMENTATION_FLAG_##name))
45 #define QUANT_FLAG(x, name) (!!((x)->flags & V4L2_AV1_QUANTIZATION_FLAG_##name))
46 #define SEQUENCE_FLAG(x, name) (!!((x)->flags & V4L2_AV1_SEQUENCE_FLAG_##name))
47 #define FH_FLAG(x, name) (!!((x)->flags & V4L2_AV1_FRAME_FLAG_##name))
48
49 #define MINQ 0
50 #define MAXQ 255
51
52 #define DIV_LUT_PREC_BITS 14
53 #define DIV_LUT_BITS 8
54 #define DIV_LUT_NUM BIT(DIV_LUT_BITS)
55 #define WARP_PARAM_REDUCE_BITS 6
56 #define WARPEDMODEL_PREC_BITS 16
57
58 #define SEG_LVL_ALT_Q 0
59 #define SECONDARY_FILTER_STRENGTH_NUM_BITS 2
60
61 static const short div_lut[DIV_LUT_NUM + 1] = {
62 16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
63 15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
64 15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
65 14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
66 13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
67 13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
68 13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
69 12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
70 12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
71 11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
72 11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
73 11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
74 10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
75 10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
76 10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
77 9963, 9939, 9916, 9892, 9869, 9846, 9823, 9800, 9777, 9754, 9732,
78 9709, 9687, 9664, 9642, 9620, 9598, 9576, 9554, 9533, 9511, 9489,
79 9468, 9447, 9425, 9404, 9383, 9362, 9341, 9321, 9300, 9279, 9259,
80 9239, 9218, 9198, 9178, 9158, 9138, 9118, 9098, 9079, 9059, 9039,
81 9020, 9001, 8981, 8962, 8943, 8924, 8905, 8886, 8867, 8849, 8830,
82 8812, 8793, 8775, 8756, 8738, 8720, 8702, 8684, 8666, 8648, 8630,
83 8613, 8595, 8577, 8560, 8542, 8525, 8508, 8490, 8473, 8456, 8439,
84 8422, 8405, 8389, 8372, 8355, 8339, 8322, 8306, 8289, 8273, 8257,
85 8240, 8224, 8208, 8192,
86 };
87
88 /**
89 * struct vdec_av1_slice_init_vsi - VSI used to initialize instance
90 * @architecture: architecture type
91 * @reserved: reserved
92 * @core_vsi: for core vsi
93 * @cdf_table_addr: cdf table addr
94 * @cdf_table_size: cdf table size
95 * @iq_table_addr: iq table addr
96 * @iq_table_size: iq table size
97 * @vsi_size: share vsi structure size
98 */
99 struct vdec_av1_slice_init_vsi {
100 u32 architecture;
101 u32 reserved;
102 u64 core_vsi;
103 u64 cdf_table_addr;
104 u32 cdf_table_size;
105 u64 iq_table_addr;
106 u32 iq_table_size;
107 u32 vsi_size;
108 };
109
110 /**
111 * struct vdec_av1_slice_mem - memory address and size
112 * @buf: dma_addr padding
113 * @dma_addr: buffer address
114 * @size: buffer size
115 * @dma_addr_end: buffer end address
116 * @padding: for padding
117 */
118 struct vdec_av1_slice_mem {
119 union {
120 u64 buf;
121 dma_addr_t dma_addr;
122 };
123 union {
124 size_t size;
125 dma_addr_t dma_addr_end;
126 u64 padding;
127 };
128 };
129
130 /**
131 * struct vdec_av1_slice_state - decoding state
132 * @err : err type for decode
133 * @full : transcoded buffer is full or not
134 * @timeout : decode timeout or not
135 * @perf : performance enable
136 * @crc : hw checksum
137 * @out_size : hw output size
138 */
139 struct vdec_av1_slice_state {
140 int err;
141 u32 full;
142 u32 timeout;
143 u32 perf;
144 u32 crc[16];
145 u32 out_size;
146 };
147
148 /*
149 * enum vdec_av1_slice_resolution_level - resolution level
150 */
151 enum vdec_av1_slice_resolution_level {
152 AV1_RES_NONE,
153 AV1_RES_FHD,
154 AV1_RES_4K,
155 AV1_RES_8K,
156 };
157
158 /*
159 * enum vdec_av1_slice_frame_type - av1 frame type
160 */
161 enum vdec_av1_slice_frame_type {
162 AV1_KEY_FRAME = 0,
163 AV1_INTER_FRAME,
164 AV1_INTRA_ONLY_FRAME,
165 AV1_SWITCH_FRAME,
166 AV1_FRAME_TYPES,
167 };
168
169 /*
170 * enum vdec_av1_slice_reference_mode - reference mode type
171 */
172 enum vdec_av1_slice_reference_mode {
173 AV1_SINGLE_REFERENCE = 0,
174 AV1_COMPOUND_REFERENCE,
175 AV1_REFERENCE_MODE_SELECT,
176 AV1_REFERENCE_MODES,
177 };
178
179 /**
180 * struct vdec_av1_slice_tile_group - info for each tile
181 * @num_tiles: tile number
182 * @tile_size: input size for each tile
183 * @tile_start_offset: tile offset to input buffer
184 */
185 struct vdec_av1_slice_tile_group {
186 u32 num_tiles;
187 u32 tile_size[V4L2_AV1_MAX_TILE_COUNT];
188 u32 tile_start_offset[V4L2_AV1_MAX_TILE_COUNT];
189 };
190
191 /**
192 * struct vdec_av1_slice_scale_factors - scale info for each ref frame
193 * @is_scaled: frame is scaled or not
194 * @x_scale: frame width scale coefficient
195 * @y_scale: frame height scale coefficient
196 * @x_step: width step for x_scale
197 * @y_step: height step for y_scale
198 */
199 struct vdec_av1_slice_scale_factors {
200 u8 is_scaled;
201 int x_scale;
202 int y_scale;
203 int x_step;
204 int y_step;
205 };
206
207 /**
208 * struct vdec_av1_slice_frame_refs - ref frame info
209 * @ref_fb_idx: ref slot index
210 * @ref_map_idx: ref frame index
211 * @scale_factors: scale factors for each ref frame
212 */
213 struct vdec_av1_slice_frame_refs {
214 int ref_fb_idx;
215 int ref_map_idx;
216 struct vdec_av1_slice_scale_factors scale_factors;
217 };
218
219 /**
220 * struct vdec_av1_slice_gm - AV1 Global Motion parameters
221 * @wmtype: The type of global motion transform used
222 * @wmmat: gm_params
223 * @alpha: alpha info
224 * @beta: beta info
225 * @gamma: gamma info
226 * @delta: delta info
227 * @invalid: is invalid or not
228 */
229 struct vdec_av1_slice_gm {
230 int wmtype;
231 int wmmat[8];
232 short alpha;
233 short beta;
234 short gamma;
235 short delta;
236 char invalid;
237 };
238
239 /**
240 * struct vdec_av1_slice_sm - AV1 Skip Mode parameters
241 * @skip_mode_allowed: Skip Mode is allowed or not
242 * @skip_mode_present: specified that the skip_mode will be present or not
243 * @skip_mode_frame: specifies the frames to use for compound prediction
244 */
245 struct vdec_av1_slice_sm {
246 u8 skip_mode_allowed;
247 u8 skip_mode_present;
248 int skip_mode_frame[2];
249 };
250
251 /**
252 * struct vdec_av1_slice_seg - AV1 Segmentation params
253 * @segmentation_enabled: this frame makes use of the segmentation tool or not
254 * @segmentation_update_map: segmentation map are updated during the decoding frame
255 * @segmentation_temporal_update:segmentation map are coded relative the existing segmentaion map
256 * @segmentation_update_data: new parameters are about to be specified for each segment
257 * @feature_data: specifies the feature data for a segment feature
258 * @feature_enabled_mask: the corresponding feature value is coded or not.
259 * @segid_preskip: segment id will be read before the skip syntax element.
260 * @last_active_segid: the highest numbered segment id that has some enabled feature
261 */
262 struct vdec_av1_slice_seg {
263 u8 segmentation_enabled;
264 u8 segmentation_update_map;
265 u8 segmentation_temporal_update;
266 u8 segmentation_update_data;
267 int feature_data[V4L2_AV1_MAX_SEGMENTS][V4L2_AV1_SEG_LVL_MAX];
268 u16 feature_enabled_mask[V4L2_AV1_MAX_SEGMENTS];
269 int segid_preskip;
270 int last_active_segid;
271 };
272
273 /**
274 * struct vdec_av1_slice_delta_q_lf - AV1 Loop Filter delta parameters
275 * @delta_q_present: specified whether quantizer index delta values are present
276 * @delta_q_res: specifies the left shift which should be applied to decoded quantizer index
277 * @delta_lf_present: specifies whether loop filter delta values are present
278 * @delta_lf_res: specifies the left shift which should be applied to decoded
279 * loop filter delta values
280 * @delta_lf_multi: specifies that separate loop filter deltas are sent for horizontal
281 * luma edges,vertical luma edges,the u edges, and the v edges.
282 */
283 struct vdec_av1_slice_delta_q_lf {
284 u8 delta_q_present;
285 u8 delta_q_res;
286 u8 delta_lf_present;
287 u8 delta_lf_res;
288 u8 delta_lf_multi;
289 };
290
291 /**
292 * struct vdec_av1_slice_quantization - AV1 Quantization params
293 * @base_q_idx: indicates the base frame qindex. This is used for Y AC
294 * coefficients and as the base value for the other quantizers.
295 * @qindex: qindex
296 * @delta_qydc: indicates the Y DC quantizer relative to base_q_idx
297 * @delta_qudc: indicates the U DC quantizer relative to base_q_idx.
298 * @delta_quac: indicates the U AC quantizer relative to base_q_idx
299 * @delta_qvdc: indicates the V DC quantizer relative to base_q_idx
300 * @delta_qvac: indicates the V AC quantizer relative to base_q_idx
301 * @using_qmatrix: specifies that the quantizer matrix will be used to
302 * compute quantizers
303 * @qm_y: specifies the level in the quantizer matrix that should
304 * be used for luma plane decoding
305 * @qm_u: specifies the level in the quantizer matrix that should
306 * be used for chroma U plane decoding.
307 * @qm_v: specifies the level in the quantizer matrix that should be
308 * used for chroma V plane decoding
309 */
310 struct vdec_av1_slice_quantization {
311 int base_q_idx;
312 int qindex[V4L2_AV1_MAX_SEGMENTS];
313 int delta_qydc;
314 int delta_qudc;
315 int delta_quac;
316 int delta_qvdc;
317 int delta_qvac;
318 u8 using_qmatrix;
319 u8 qm_y;
320 u8 qm_u;
321 u8 qm_v;
322 };
323
324 /**
325 * struct vdec_av1_slice_lr - AV1 Loop Restauration parameters
326 * @use_lr: whether to use loop restoration
327 * @use_chroma_lr: whether to use chroma loop restoration
328 * @frame_restoration_type: specifies the type of restoration used for each plane
329 * @loop_restoration_size: specifies the size of loop restoration units in units
330 * of samples in the current plane
331 */
332 struct vdec_av1_slice_lr {
333 u8 use_lr;
334 u8 use_chroma_lr;
335 u8 frame_restoration_type[V4L2_AV1_NUM_PLANES_MAX];
336 u32 loop_restoration_size[V4L2_AV1_NUM_PLANES_MAX];
337 };
338
339 /**
340 * struct vdec_av1_slice_loop_filter - AV1 Loop filter parameters
341 * @loop_filter_level: an array containing loop filter strength values.
342 * @loop_filter_ref_deltas: contains the adjustment needed for the filter
343 * level based on the chosen reference frame
344 * @loop_filter_mode_deltas: contains the adjustment needed for the filter
345 * level based on the chosen mode
346 * @loop_filter_sharpness: indicates the sharpness level. The loop_filter_level
347 * and loop_filter_sharpness together determine when
348 * a block edge is filtered, and by how much the
349 * filtering can change the sample values
350 * @loop_filter_delta_enabled: filter level depends on the mode and reference
351 * frame used to predict a block
352 */
353 struct vdec_av1_slice_loop_filter {
354 u8 loop_filter_level[4];
355 int loop_filter_ref_deltas[V4L2_AV1_TOTAL_REFS_PER_FRAME];
356 int loop_filter_mode_deltas[2];
357 u8 loop_filter_sharpness;
358 u8 loop_filter_delta_enabled;
359 };
360
361 /**
362 * struct vdec_av1_slice_cdef - AV1 CDEF parameters
363 * @cdef_damping: controls the amount of damping in the deringing filter
364 * @cdef_y_strength: specifies the strength of the primary filter and secondary filter
365 * @cdef_uv_strength: specifies the strength of the primary filter and secondary filter
366 * @cdef_bits: specifies the number of bits needed to specify which
367 * CDEF filter to apply
368 */
369 struct vdec_av1_slice_cdef {
370 u8 cdef_damping;
371 u8 cdef_y_strength[8];
372 u8 cdef_uv_strength[8];
373 u8 cdef_bits;
374 };
375
376 /**
377 * struct vdec_av1_slice_mfmv - AV1 mfmv parameters
378 * @mfmv_valid_ref: mfmv_valid_ref
379 * @mfmv_dir: mfmv_dir
380 * @mfmv_ref_to_cur: mfmv_ref_to_cur
381 * @mfmv_ref_frame_idx: mfmv_ref_frame_idx
382 * @mfmv_count: mfmv_count
383 */
384 struct vdec_av1_slice_mfmv {
385 u32 mfmv_valid_ref[3];
386 u32 mfmv_dir[3];
387 int mfmv_ref_to_cur[3];
388 int mfmv_ref_frame_idx[3];
389 int mfmv_count;
390 };
391
392 /**
393 * struct vdec_av1_slice_tile - AV1 Tile info
394 * @tile_cols: specifies the number of tiles across the frame
395 * @tile_rows: specifies the number of tiles down the frame
396 * @mi_col_starts: an array specifying the start column
397 * @mi_row_starts: an array specifying the start row
398 * @context_update_tile_id: specifies which tile to use for the CDF update
399 * @uniform_tile_spacing_flag: tiles are uniformly spaced across the frame
400 * or the tile sizes are coded
401 */
402 struct vdec_av1_slice_tile {
403 u8 tile_cols;
404 u8 tile_rows;
405 int mi_col_starts[V4L2_AV1_MAX_TILE_COLS + 1];
406 int mi_row_starts[V4L2_AV1_MAX_TILE_ROWS + 1];
407 u8 context_update_tile_id;
408 u8 uniform_tile_spacing_flag;
409 };
410
411 /**
412 * struct vdec_av1_slice_uncompressed_header - Represents an AV1 Frame Header OBU
413 * @use_ref_frame_mvs: use_ref_frame_mvs flag
414 * @order_hint: specifies OrderHintBits least significant bits of the expected
415 * @gm: global motion param
416 * @upscaled_width: the upscaled width
417 * @frame_width: frame's width
418 * @frame_height: frame's height
419 * @reduced_tx_set: frame is restricted to a reduced subset of the full
420 * set of transform types
421 * @tx_mode: specifies how the transform size is determined
422 * @uniform_tile_spacing_flag: tiles are uniformly spaced across the frame
423 * or the tile sizes are coded
424 * @interpolation_filter: specifies the filter selection used for performing inter prediction
425 * @allow_warped_motion: motion_mode may be present or not
426 * @is_motion_mode_switchable : equal to 0 specifies that only the SIMPLE motion mode will be used
427 * @reference_mode : frame reference mode selected
428 * @allow_high_precision_mv: specifies that motion vectors are specified to
429 * quarter pel precision or to eighth pel precision
430 * @allow_intra_bc: allows that intra block copy may be used in this frame
431 * @force_integer_mv: specifies motion vectors will always be integers or
432 * can contain fractional bits
433 * @allow_screen_content_tools: intra blocks may use palette encoding
434 * @error_resilient_mode: error resilient mode is enable/disable
435 * @frame_type: specifies the AV1 frame type
436 * @primary_ref_frame: specifies which reference frame contains the CDF values
437 * and other state that should be loaded at the start of the frame
438 * slots will be updated with the current frame after it is decoded
439 * @disable_frame_end_update_cdf:indicates the end of frame CDF update is disable or enable
440 * @disable_cdf_update: specified whether the CDF update in the symbol
441 * decoding process should be disables
442 * @skip_mode: av1 skip mode parameters
443 * @seg: av1 segmentation parameters
444 * @delta_q_lf: av1 delta loop filter
445 * @quant: av1 Quantization params
446 * @lr: av1 Loop Restauration parameters
447 * @superres_denom: the denominator for the upscaling ratio
448 * @loop_filter: av1 Loop filter parameters
449 * @cdef: av1 CDEF parameters
450 * @mfmv: av1 mfmv parameters
451 * @tile: av1 Tile info
452 * @frame_is_intra: intra frame
453 * @loss_less_array: lossless array
454 * @coded_loss_less: coded lossless
455 * @mi_rows: size of mi unit in rows
456 * @mi_cols: size of mi unit in cols
457 */
458 struct vdec_av1_slice_uncompressed_header {
459 u8 use_ref_frame_mvs;
460 int order_hint;
461 struct vdec_av1_slice_gm gm[V4L2_AV1_TOTAL_REFS_PER_FRAME];
462 u32 upscaled_width;
463 u32 frame_width;
464 u32 frame_height;
465 u8 reduced_tx_set;
466 u8 tx_mode;
467 u8 uniform_tile_spacing_flag;
468 u8 interpolation_filter;
469 u8 allow_warped_motion;
470 u8 is_motion_mode_switchable;
471 u8 reference_mode;
472 u8 allow_high_precision_mv;
473 u8 allow_intra_bc;
474 u8 force_integer_mv;
475 u8 allow_screen_content_tools;
476 u8 error_resilient_mode;
477 u8 frame_type;
478 u8 primary_ref_frame;
479 u8 disable_frame_end_update_cdf;
480 u32 disable_cdf_update;
481 struct vdec_av1_slice_sm skip_mode;
482 struct vdec_av1_slice_seg seg;
483 struct vdec_av1_slice_delta_q_lf delta_q_lf;
484 struct vdec_av1_slice_quantization quant;
485 struct vdec_av1_slice_lr lr;
486 u32 superres_denom;
487 struct vdec_av1_slice_loop_filter loop_filter;
488 struct vdec_av1_slice_cdef cdef;
489 struct vdec_av1_slice_mfmv mfmv;
490 struct vdec_av1_slice_tile tile;
491 u8 frame_is_intra;
492 u8 loss_less_array[V4L2_AV1_MAX_SEGMENTS];
493 u8 coded_loss_less;
494 u32 mi_rows;
495 u32 mi_cols;
496 };
497
498 /**
499 * struct vdec_av1_slice_seq_header - Represents an AV1 Sequence OBU
500 * @bitdepth: the bitdepth to use for the sequence
501 * @enable_superres: specifies whether the use_superres syntax element may be present
502 * @enable_filter_intra: specifies the use_filter_intra syntax element may be present
503 * @enable_intra_edge_filter: whether the intra edge filtering process should be enabled
504 * @enable_interintra_compound: specifies the mode info fo rinter blocks may
505 * contain the syntax element interintra
506 * @enable_masked_compound: specifies the mode info fo rinter blocks may
507 * contain the syntax element compound_type
508 * @enable_dual_filter: the inter prediction filter type may be specified independently
509 * @enable_jnt_comp: distance weights process may be used for inter prediction
510 * @mono_chrome: indicates the video does not contain U and V color planes
511 * @enable_order_hint: tools based on the values of order hints may be used
512 * @order_hint_bits: the number of bits used for the order_hint field at each frame
513 * @use_128x128_superblock: indicates superblocks contain 128*128 luma samples
514 * @subsampling_x: the chroma subsamling format
515 * @subsampling_y: the chroma subsamling format
516 * @max_frame_width: the maximum frame width for the frames represented by sequence
517 * @max_frame_height: the maximum frame height for the frames represented by sequence
518 */
519 struct vdec_av1_slice_seq_header {
520 u8 bitdepth;
521 u8 enable_superres;
522 u8 enable_filter_intra;
523 u8 enable_intra_edge_filter;
524 u8 enable_interintra_compound;
525 u8 enable_masked_compound;
526 u8 enable_dual_filter;
527 u8 enable_jnt_comp;
528 u8 mono_chrome;
529 u8 enable_order_hint;
530 u8 order_hint_bits;
531 u8 use_128x128_superblock;
532 u8 subsampling_x;
533 u8 subsampling_y;
534 u32 max_frame_width;
535 u32 max_frame_height;
536 };
537
538 /**
539 * struct vdec_av1_slice_frame - Represents current Frame info
540 * @uh: uncompressed header info
541 * @seq: sequence header info
542 * @large_scale_tile: is large scale mode
543 * @cur_ts: current frame timestamp
544 * @prev_fb_idx: prev slot id
545 * @ref_frame_sign_bias: arrays for ref_frame sign bias
546 * @order_hints: arrays for ref_frame order hint
547 * @ref_frame_valid: arrays for valid ref_frame
548 * @ref_frame_map: map to slot frame info
549 * @frame_refs: ref_frame info
550 */
551 struct vdec_av1_slice_frame {
552 struct vdec_av1_slice_uncompressed_header uh;
553 struct vdec_av1_slice_seq_header seq;
554 u8 large_scale_tile;
555 u64 cur_ts;
556 int prev_fb_idx;
557 u8 ref_frame_sign_bias[V4L2_AV1_TOTAL_REFS_PER_FRAME];
558 u32 order_hints[V4L2_AV1_REFS_PER_FRAME];
559 u32 ref_frame_valid[V4L2_AV1_REFS_PER_FRAME];
560 int ref_frame_map[V4L2_AV1_TOTAL_REFS_PER_FRAME];
561 struct vdec_av1_slice_frame_refs frame_refs[V4L2_AV1_REFS_PER_FRAME];
562 };
563
564 /**
565 * struct vdec_av1_slice_work_buffer - work buffer for lat
566 * @mv_addr: mv buffer memory info
567 * @cdf_addr: cdf buffer memory info
568 * @segid_addr: segid buffer memory info
569 */
570 struct vdec_av1_slice_work_buffer {
571 struct vdec_av1_slice_mem mv_addr;
572 struct vdec_av1_slice_mem cdf_addr;
573 struct vdec_av1_slice_mem segid_addr;
574 };
575
576 /**
577 * struct vdec_av1_slice_frame_info - frame info for each slot
578 * @frame_type: frame type
579 * @frame_is_intra: is intra frame
580 * @order_hint: order hint
581 * @order_hints: referece frame order hint
582 * @upscaled_width: upscale width
583 * @pic_pitch: buffer pitch
584 * @frame_width: frane width
585 * @frame_height: frame height
586 * @mi_rows: rows in mode info
587 * @mi_cols: cols in mode info
588 * @ref_count: mark to reference frame counts
589 */
590 struct vdec_av1_slice_frame_info {
591 u8 frame_type;
592 u8 frame_is_intra;
593 int order_hint;
594 u32 order_hints[V4L2_AV1_REFS_PER_FRAME];
595 u32 upscaled_width;
596 u32 pic_pitch;
597 u32 frame_width;
598 u32 frame_height;
599 u32 mi_rows;
600 u32 mi_cols;
601 int ref_count;
602 };
603
604 /**
605 * struct vdec_av1_slice_slot - slot info that needs to be saved in the global instance
606 * @frame_info: frame info for each slot
607 * @timestamp: time stamp info
608 */
609 struct vdec_av1_slice_slot {
610 struct vdec_av1_slice_frame_info frame_info[AV1_MAX_FRAME_BUF_COUNT];
611 u64 timestamp[AV1_MAX_FRAME_BUF_COUNT];
612 };
613
614 /**
615 * struct vdec_av1_slice_fb - frame buffer for decoding
616 * @y: current y buffer address info
617 * @c: current c buffer address info
618 */
619 struct vdec_av1_slice_fb {
620 struct vdec_av1_slice_mem y;
621 struct vdec_av1_slice_mem c;
622 };
623
624 /**
625 * struct vdec_av1_slice_vsi - exchange frame information between Main CPU and MicroP
626 * @bs: input buffer info
627 * @work_buffer: working buffe for hw
628 * @cdf_table: cdf_table buffer
629 * @cdf_tmp: cdf temp buffer
630 * @rd_mv: mv buffer for lat output , core input
631 * @ube: ube buffer
632 * @trans: transcoded buffer
633 * @err_map: err map buffer
634 * @row_info: row info buffer
635 * @fb: current y/c buffer
636 * @ref: ref y/c buffer
637 * @iq_table: iq table buffer
638 * @tile: tile buffer
639 * @slots: slots info for each frame
640 * @slot_id: current frame slot id
641 * @frame: current frame info
642 * @state: status after decode done
643 * @cur_lst_tile_id: tile id for large scale
644 */
645 struct vdec_av1_slice_vsi {
646 /* lat */
647 struct vdec_av1_slice_mem bs;
648 struct vdec_av1_slice_work_buffer work_buffer[AV1_MAX_FRAME_BUF_COUNT];
649 struct vdec_av1_slice_mem cdf_table;
650 struct vdec_av1_slice_mem cdf_tmp;
651 /* LAT stage's output, Core stage's input */
652 struct vdec_av1_slice_mem rd_mv;
653 struct vdec_av1_slice_mem ube;
654 struct vdec_av1_slice_mem trans;
655 struct vdec_av1_slice_mem err_map;
656 struct vdec_av1_slice_mem row_info;
657 /* core */
658 struct vdec_av1_slice_fb fb;
659 struct vdec_av1_slice_fb ref[V4L2_AV1_REFS_PER_FRAME];
660 struct vdec_av1_slice_mem iq_table;
661 /* lat and core share*/
662 struct vdec_av1_slice_mem tile;
663 struct vdec_av1_slice_slot slots;
664 s8 slot_id;
665 struct vdec_av1_slice_frame frame;
666 struct vdec_av1_slice_state state;
667 u32 cur_lst_tile_id;
668 };
669
670 /**
671 * struct vdec_av1_slice_pfc - per-frame context that contains a local vsi.
672 * pass it from lat to core
673 * @vsi: local vsi. copy to/from remote vsi before/after decoding
674 * @ref_idx: reference buffer timestamp
675 * @seq: picture sequence
676 */
677 struct vdec_av1_slice_pfc {
678 struct vdec_av1_slice_vsi vsi;
679 u64 ref_idx[V4L2_AV1_REFS_PER_FRAME];
680 int seq;
681 };
682
683 /**
684 * struct vdec_av1_slice_instance - represent one av1 instance
685 * @ctx: pointer to codec's context
686 * @vpu: VPU instance
687 * @iq_table: iq table buffer
688 * @cdf_table: cdf table buffer
689 * @mv: mv working buffer
690 * @cdf: cdf working buffer
691 * @seg: segmentation working buffer
692 * @cdf_temp: cdf temp buffer
693 * @tile: tile buffer
694 * @slots: slots info
695 * @tile_group: tile_group entry
696 * @level: level of current resolution
697 * @width: width of last picture
698 * @height: height of last picture
699 * @frame_type: frame_type of last picture
700 * @irq_enabled: irq to Main CPU or MicroP
701 * @inneracing_mode: is inneracing mode
702 * @init_vsi: vsi used for initialized AV1 instance
703 * @vsi: vsi used for decoding/flush ...
704 * @core_vsi: vsi used for Core stage
705 * @seq: global picture sequence
706 */
707 struct vdec_av1_slice_instance {
708 struct mtk_vcodec_dec_ctx *ctx;
709 struct vdec_vpu_inst vpu;
710
711 struct mtk_vcodec_mem iq_table;
712 struct mtk_vcodec_mem cdf_table;
713
714 struct mtk_vcodec_mem mv[AV1_MAX_FRAME_BUF_COUNT];
715 struct mtk_vcodec_mem cdf[AV1_MAX_FRAME_BUF_COUNT];
716 struct mtk_vcodec_mem seg[AV1_MAX_FRAME_BUF_COUNT];
717 struct mtk_vcodec_mem cdf_temp;
718 struct mtk_vcodec_mem tile;
719 struct vdec_av1_slice_slot slots;
720 struct vdec_av1_slice_tile_group tile_group;
721
722 /* for resolution change and get_pic_info */
723 enum vdec_av1_slice_resolution_level level;
724 u32 width;
725 u32 height;
726
727 u32 frame_type;
728 u32 irq_enabled;
729 u32 inneracing_mode;
730
731 /* MicroP vsi */
732 union {
733 struct vdec_av1_slice_init_vsi *init_vsi;
734 struct vdec_av1_slice_vsi *vsi;
735 };
736 struct vdec_av1_slice_vsi *core_vsi;
737 int seq;
738 };
739
740 static int vdec_av1_slice_core_decode(struct vdec_lat_buf *lat_buf);
741
vdec_av1_slice_get_msb(u32 n)742 static inline int vdec_av1_slice_get_msb(u32 n)
743 {
744 if (n == 0)
745 return 0;
746 return 31 ^ __builtin_clz(n);
747 }
748
vdec_av1_slice_need_scale(u32 ref_width,u32 ref_height,u32 this_width,u32 this_height)749 static inline bool vdec_av1_slice_need_scale(u32 ref_width, u32 ref_height,
750 u32 this_width, u32 this_height)
751 {
752 return ((this_width << 1) >= ref_width) &&
753 ((this_height << 1) >= ref_height) &&
754 (this_width <= (ref_width << 4)) &&
755 (this_height <= (ref_height << 4));
756 }
757
vdec_av1_get_ctrl_ptr(struct mtk_vcodec_dec_ctx * ctx,int id)758 static void *vdec_av1_get_ctrl_ptr(struct mtk_vcodec_dec_ctx *ctx, int id)
759 {
760 struct v4l2_ctrl *ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl, id);
761
762 if (!ctrl)
763 return ERR_PTR(-EINVAL);
764
765 return ctrl->p_cur.p;
766 }
767
vdec_av1_slice_init_cdf_table(struct vdec_av1_slice_instance * instance)768 static int vdec_av1_slice_init_cdf_table(struct vdec_av1_slice_instance *instance)
769 {
770 u8 *remote_cdf_table;
771 struct mtk_vcodec_dec_ctx *ctx;
772 struct vdec_av1_slice_init_vsi *vsi;
773 int ret;
774
775 ctx = instance->ctx;
776 vsi = instance->vpu.vsi;
777 remote_cdf_table = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler,
778 (u32)vsi->cdf_table_addr);
779 if (IS_ERR(remote_cdf_table)) {
780 mtk_vdec_err(ctx, "failed to map cdf table\n");
781 return PTR_ERR(remote_cdf_table);
782 }
783
784 mtk_vdec_debug(ctx, "map cdf table to 0x%p\n", remote_cdf_table);
785
786 if (instance->cdf_table.va)
787 mtk_vcodec_mem_free(ctx, &instance->cdf_table);
788 instance->cdf_table.size = vsi->cdf_table_size;
789
790 ret = mtk_vcodec_mem_alloc(ctx, &instance->cdf_table);
791 if (ret)
792 return ret;
793
794 memcpy(instance->cdf_table.va, remote_cdf_table, vsi->cdf_table_size);
795
796 return 0;
797 }
798
vdec_av1_slice_init_iq_table(struct vdec_av1_slice_instance * instance)799 static int vdec_av1_slice_init_iq_table(struct vdec_av1_slice_instance *instance)
800 {
801 u8 *remote_iq_table;
802 struct mtk_vcodec_dec_ctx *ctx;
803 struct vdec_av1_slice_init_vsi *vsi;
804 int ret;
805
806 ctx = instance->ctx;
807 vsi = instance->vpu.vsi;
808 remote_iq_table = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler,
809 (u32)vsi->iq_table_addr);
810 if (IS_ERR(remote_iq_table)) {
811 mtk_vdec_err(ctx, "failed to map iq table\n");
812 return PTR_ERR(remote_iq_table);
813 }
814
815 mtk_vdec_debug(ctx, "map iq table to 0x%p\n", remote_iq_table);
816
817 if (instance->iq_table.va)
818 mtk_vcodec_mem_free(ctx, &instance->iq_table);
819 instance->iq_table.size = vsi->iq_table_size;
820
821 ret = mtk_vcodec_mem_alloc(ctx, &instance->iq_table);
822 if (ret)
823 return ret;
824
825 memcpy(instance->iq_table.va, remote_iq_table, vsi->iq_table_size);
826
827 return 0;
828 }
829
vdec_av1_slice_get_new_slot(struct vdec_av1_slice_vsi * vsi)830 static int vdec_av1_slice_get_new_slot(struct vdec_av1_slice_vsi *vsi)
831 {
832 struct vdec_av1_slice_slot *slots = &vsi->slots;
833 int new_slot_idx = AV1_INVALID_IDX;
834 int i;
835
836 for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
837 if (slots->frame_info[i].ref_count == 0) {
838 new_slot_idx = i;
839 break;
840 }
841 }
842
843 if (new_slot_idx != AV1_INVALID_IDX) {
844 slots->frame_info[new_slot_idx].ref_count++;
845 slots->timestamp[new_slot_idx] = vsi->frame.cur_ts;
846 }
847
848 return new_slot_idx;
849 }
850
vdec_av1_slice_clear_fb(struct vdec_av1_slice_frame_info * frame_info)851 static inline void vdec_av1_slice_clear_fb(struct vdec_av1_slice_frame_info *frame_info)
852 {
853 memset((void *)frame_info, 0, sizeof(struct vdec_av1_slice_frame_info));
854 }
855
vdec_av1_slice_decrease_ref_count(struct vdec_av1_slice_slot * slots,int fb_idx)856 static void vdec_av1_slice_decrease_ref_count(struct vdec_av1_slice_slot *slots, int fb_idx)
857 {
858 struct vdec_av1_slice_frame_info *frame_info = slots->frame_info;
859
860 frame_info[fb_idx].ref_count--;
861 if (frame_info[fb_idx].ref_count < 0) {
862 frame_info[fb_idx].ref_count = 0;
863 pr_err(MTK_DBG_V4L2_STR "av1_error: %s() fb_idx %d decrease ref_count error\n",
864 __func__, fb_idx);
865 }
866
867 vdec_av1_slice_clear_fb(&frame_info[fb_idx]);
868 }
869
vdec_av1_slice_cleanup_slots(struct vdec_av1_slice_slot * slots,struct vdec_av1_slice_frame * frame,struct v4l2_ctrl_av1_frame * ctrl_fh)870 static void vdec_av1_slice_cleanup_slots(struct vdec_av1_slice_slot *slots,
871 struct vdec_av1_slice_frame *frame,
872 struct v4l2_ctrl_av1_frame *ctrl_fh)
873 {
874 int slot_id, ref_id;
875
876 for (ref_id = 0; ref_id < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref_id++)
877 frame->ref_frame_map[ref_id] = AV1_INVALID_IDX;
878
879 for (slot_id = 0; slot_id < AV1_MAX_FRAME_BUF_COUNT; slot_id++) {
880 u64 timestamp = slots->timestamp[slot_id];
881 bool ref_used = false;
882
883 /* ignored unused slots */
884 if (slots->frame_info[slot_id].ref_count == 0)
885 continue;
886
887 for (ref_id = 0; ref_id < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref_id++) {
888 if (ctrl_fh->reference_frame_ts[ref_id] == timestamp) {
889 frame->ref_frame_map[ref_id] = slot_id;
890 ref_used = true;
891 }
892 }
893
894 if (!ref_used)
895 vdec_av1_slice_decrease_ref_count(slots, slot_id);
896 }
897 }
898
vdec_av1_slice_setup_slot(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_vsi * vsi,struct v4l2_ctrl_av1_frame * ctrl_fh)899 static void vdec_av1_slice_setup_slot(struct vdec_av1_slice_instance *instance,
900 struct vdec_av1_slice_vsi *vsi,
901 struct v4l2_ctrl_av1_frame *ctrl_fh)
902 {
903 struct vdec_av1_slice_frame_info *cur_frame_info;
904 struct vdec_av1_slice_uncompressed_header *uh = &vsi->frame.uh;
905 int ref_id;
906
907 memcpy(&vsi->slots, &instance->slots, sizeof(instance->slots));
908 vdec_av1_slice_cleanup_slots(&vsi->slots, &vsi->frame, ctrl_fh);
909 vsi->slot_id = vdec_av1_slice_get_new_slot(vsi);
910
911 if (vsi->slot_id == AV1_INVALID_IDX) {
912 mtk_v4l2_vdec_err(instance->ctx, "warning:av1 get invalid index slot\n");
913 vsi->slot_id = 0;
914 }
915 cur_frame_info = &vsi->slots.frame_info[vsi->slot_id];
916 cur_frame_info->frame_type = uh->frame_type;
917 cur_frame_info->frame_is_intra = ((uh->frame_type == AV1_INTRA_ONLY_FRAME) ||
918 (uh->frame_type == AV1_KEY_FRAME));
919 cur_frame_info->order_hint = uh->order_hint;
920 cur_frame_info->upscaled_width = uh->upscaled_width;
921 cur_frame_info->pic_pitch = 0;
922 cur_frame_info->frame_width = uh->frame_width;
923 cur_frame_info->frame_height = uh->frame_height;
924 cur_frame_info->mi_cols = ((uh->frame_width + 7) >> 3) << 1;
925 cur_frame_info->mi_rows = ((uh->frame_height + 7) >> 3) << 1;
926
927 /* ensure current frame is properly mapped if referenced */
928 for (ref_id = 0; ref_id < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref_id++) {
929 u64 timestamp = vsi->slots.timestamp[vsi->slot_id];
930
931 if (ctrl_fh->reference_frame_ts[ref_id] == timestamp)
932 vsi->frame.ref_frame_map[ref_id] = vsi->slot_id;
933 }
934 }
935
vdec_av1_slice_alloc_working_buffer(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_vsi * vsi)936 static int vdec_av1_slice_alloc_working_buffer(struct vdec_av1_slice_instance *instance,
937 struct vdec_av1_slice_vsi *vsi)
938 {
939 struct mtk_vcodec_dec_ctx *ctx = instance->ctx;
940 enum vdec_av1_slice_resolution_level level;
941 u32 max_sb_w, max_sb_h, max_w, max_h, w, h;
942 int i, ret;
943
944 w = vsi->frame.uh.frame_width;
945 h = vsi->frame.uh.frame_height;
946
947 if (w > VCODEC_DEC_4K_CODED_WIDTH || h > VCODEC_DEC_4K_CODED_HEIGHT)
948 /* 8K */
949 return -EINVAL;
950
951 if (w > MTK_VDEC_MAX_W || h > MTK_VDEC_MAX_H) {
952 /* 4K */
953 level = AV1_RES_4K;
954 max_w = VCODEC_DEC_4K_CODED_WIDTH;
955 max_h = VCODEC_DEC_4K_CODED_HEIGHT;
956 } else {
957 /* FHD */
958 level = AV1_RES_FHD;
959 max_w = MTK_VDEC_MAX_W;
960 max_h = MTK_VDEC_MAX_H;
961 }
962
963 if (level == instance->level)
964 return 0;
965
966 mtk_vdec_debug(ctx, "resolution level changed from %u to %u, %ux%u",
967 instance->level, level, w, h);
968
969 max_sb_w = DIV_ROUND_UP(max_w, 128);
970 max_sb_h = DIV_ROUND_UP(max_h, 128);
971
972 for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
973 if (instance->mv[i].va)
974 mtk_vcodec_mem_free(ctx, &instance->mv[i]);
975 instance->mv[i].size = max_sb_w * max_sb_h * SZ_1K;
976 ret = mtk_vcodec_mem_alloc(ctx, &instance->mv[i]);
977 if (ret)
978 goto err;
979
980 if (instance->seg[i].va)
981 mtk_vcodec_mem_free(ctx, &instance->seg[i]);
982 instance->seg[i].size = max_sb_w * max_sb_h * 512;
983 ret = mtk_vcodec_mem_alloc(ctx, &instance->seg[i]);
984 if (ret)
985 goto err;
986
987 if (instance->cdf[i].va)
988 mtk_vcodec_mem_free(ctx, &instance->cdf[i]);
989 instance->cdf[i].size = AV1_CDF_TABLE_BUFFER_SIZE;
990 ret = mtk_vcodec_mem_alloc(ctx, &instance->cdf[i]);
991 if (ret)
992 goto err;
993 }
994
995 if (!instance->cdf_temp.va) {
996 instance->cdf_temp.size = (SZ_1K * 16 * 100);
997 ret = mtk_vcodec_mem_alloc(ctx, &instance->cdf_temp);
998 if (ret)
999 goto err;
1000 vsi->cdf_tmp.buf = instance->cdf_temp.dma_addr;
1001 vsi->cdf_tmp.size = instance->cdf_temp.size;
1002 }
1003
1004 if (instance->tile.va)
1005 mtk_vcodec_mem_free(ctx, &instance->tile);
1006
1007 instance->tile.size = AV1_TILE_BUF_SIZE * V4L2_AV1_MAX_TILE_COUNT;
1008 ret = mtk_vcodec_mem_alloc(ctx, &instance->tile);
1009 if (ret)
1010 goto err;
1011
1012 instance->level = level;
1013 return 0;
1014
1015 err:
1016 instance->level = AV1_RES_NONE;
1017 return ret;
1018 }
1019
vdec_av1_slice_free_working_buffer(struct vdec_av1_slice_instance * instance)1020 static void vdec_av1_slice_free_working_buffer(struct vdec_av1_slice_instance *instance)
1021 {
1022 struct mtk_vcodec_dec_ctx *ctx = instance->ctx;
1023 int i;
1024
1025 for (i = 0; i < ARRAY_SIZE(instance->mv); i++)
1026 if (instance->mv[i].va)
1027 mtk_vcodec_mem_free(ctx, &instance->mv[i]);
1028
1029 for (i = 0; i < ARRAY_SIZE(instance->seg); i++)
1030 if (instance->seg[i].va)
1031 mtk_vcodec_mem_free(ctx, &instance->seg[i]);
1032
1033 for (i = 0; i < ARRAY_SIZE(instance->cdf); i++)
1034 if (instance->cdf[i].va)
1035 mtk_vcodec_mem_free(ctx, &instance->cdf[i]);
1036
1037
1038 if (instance->tile.va)
1039 mtk_vcodec_mem_free(ctx, &instance->tile);
1040 if (instance->cdf_temp.va)
1041 mtk_vcodec_mem_free(ctx, &instance->cdf_temp);
1042 if (instance->cdf_table.va)
1043 mtk_vcodec_mem_free(ctx, &instance->cdf_table);
1044 if (instance->iq_table.va)
1045 mtk_vcodec_mem_free(ctx, &instance->iq_table);
1046
1047 instance->level = AV1_RES_NONE;
1048 }
1049
vdec_av1_slice_vsi_from_remote(struct vdec_av1_slice_vsi * vsi,struct vdec_av1_slice_vsi * remote_vsi)1050 static inline void vdec_av1_slice_vsi_from_remote(struct vdec_av1_slice_vsi *vsi,
1051 struct vdec_av1_slice_vsi *remote_vsi)
1052 {
1053 memcpy(&vsi->trans, &remote_vsi->trans, sizeof(vsi->trans));
1054 memcpy(&vsi->state, &remote_vsi->state, sizeof(vsi->state));
1055 }
1056
vdec_av1_slice_vsi_to_remote(struct vdec_av1_slice_vsi * vsi,struct vdec_av1_slice_vsi * remote_vsi)1057 static inline void vdec_av1_slice_vsi_to_remote(struct vdec_av1_slice_vsi *vsi,
1058 struct vdec_av1_slice_vsi *remote_vsi)
1059 {
1060 memcpy(remote_vsi, vsi, sizeof(*vsi));
1061 }
1062
vdec_av1_slice_setup_lat_from_src_buf(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_vsi * vsi,struct vdec_lat_buf * lat_buf)1063 static int vdec_av1_slice_setup_lat_from_src_buf(struct vdec_av1_slice_instance *instance,
1064 struct vdec_av1_slice_vsi *vsi,
1065 struct vdec_lat_buf *lat_buf)
1066 {
1067 struct vb2_v4l2_buffer *src;
1068 struct vb2_v4l2_buffer *dst;
1069
1070 src = v4l2_m2m_next_src_buf(instance->ctx->m2m_ctx);
1071 if (!src)
1072 return -EINVAL;
1073
1074 lat_buf->src_buf_req = src->vb2_buf.req_obj.req;
1075 dst = &lat_buf->ts_info;
1076 v4l2_m2m_buf_copy_metadata(src, dst, true);
1077 vsi->frame.cur_ts = dst->vb2_buf.timestamp;
1078
1079 return 0;
1080 }
1081
vdec_av1_slice_resolve_divisor_32(u32 D,short * shift)1082 static short vdec_av1_slice_resolve_divisor_32(u32 D, short *shift)
1083 {
1084 int f;
1085 int e;
1086
1087 *shift = vdec_av1_slice_get_msb(D);
1088 /* e is obtained from D after resetting the most significant 1 bit. */
1089 e = D - ((u32)1 << *shift);
1090 /* Get the most significant DIV_LUT_BITS (8) bits of e into f */
1091 if (*shift > DIV_LUT_BITS)
1092 f = AV1_DIV_ROUND_UP_POW2(e, *shift - DIV_LUT_BITS);
1093 else
1094 f = e << (DIV_LUT_BITS - *shift);
1095 if (f > DIV_LUT_NUM)
1096 return -1;
1097 *shift += DIV_LUT_PREC_BITS;
1098 /* Use f as lookup into the precomputed table of multipliers */
1099 return div_lut[f];
1100 }
1101
vdec_av1_slice_get_shear_params(struct vdec_av1_slice_gm * gm_params)1102 static void vdec_av1_slice_get_shear_params(struct vdec_av1_slice_gm *gm_params)
1103 {
1104 const int *mat = gm_params->wmmat;
1105 short shift;
1106 short y;
1107 long long gv, dv;
1108
1109 if (gm_params->wmmat[2] <= 0)
1110 return;
1111
1112 gm_params->alpha = clamp_val(mat[2] - (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX);
1113 gm_params->beta = clamp_val(mat[3], S16_MIN, S16_MAX);
1114
1115 y = vdec_av1_slice_resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
1116
1117 gv = ((long long)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
1118 gm_params->gamma = clamp_val((int)AV1_DIV_ROUND_UP_POW2_SIGNED(gv, shift),
1119 S16_MIN, S16_MAX);
1120
1121 dv = ((long long)mat[3] * mat[4]) * y;
1122 gm_params->delta = clamp_val(mat[5] - (int)AV1_DIV_ROUND_UP_POW2_SIGNED(dv, shift) -
1123 (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX);
1124
1125 gm_params->alpha = AV1_DIV_ROUND_UP_POW2_SIGNED(gm_params->alpha, WARP_PARAM_REDUCE_BITS) *
1126 (1 << WARP_PARAM_REDUCE_BITS);
1127 gm_params->beta = AV1_DIV_ROUND_UP_POW2_SIGNED(gm_params->beta, WARP_PARAM_REDUCE_BITS) *
1128 (1 << WARP_PARAM_REDUCE_BITS);
1129 gm_params->gamma = AV1_DIV_ROUND_UP_POW2_SIGNED(gm_params->gamma, WARP_PARAM_REDUCE_BITS) *
1130 (1 << WARP_PARAM_REDUCE_BITS);
1131 gm_params->delta = AV1_DIV_ROUND_UP_POW2_SIGNED(gm_params->delta, WARP_PARAM_REDUCE_BITS) *
1132 (1 << WARP_PARAM_REDUCE_BITS);
1133 }
1134
vdec_av1_slice_setup_gm(struct vdec_av1_slice_gm * gm,struct v4l2_av1_global_motion * ctrl_gm)1135 static void vdec_av1_slice_setup_gm(struct vdec_av1_slice_gm *gm,
1136 struct v4l2_av1_global_motion *ctrl_gm)
1137 {
1138 u32 i, j;
1139
1140 for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++) {
1141 gm[i].wmtype = ctrl_gm->type[i];
1142 for (j = 0; j < 6; j++)
1143 gm[i].wmmat[j] = ctrl_gm->params[i][j];
1144
1145 gm[i].invalid = !!(ctrl_gm->invalid & BIT(i));
1146 gm[i].alpha = 0;
1147 gm[i].beta = 0;
1148 gm[i].gamma = 0;
1149 gm[i].delta = 0;
1150 if (gm[i].wmtype <= V4L2_AV1_WARP_MODEL_AFFINE)
1151 vdec_av1_slice_get_shear_params(&gm[i]);
1152 }
1153 }
1154
vdec_av1_slice_setup_seg(struct vdec_av1_slice_seg * seg,struct v4l2_av1_segmentation * ctrl_seg)1155 static void vdec_av1_slice_setup_seg(struct vdec_av1_slice_seg *seg,
1156 struct v4l2_av1_segmentation *ctrl_seg)
1157 {
1158 u32 i, j;
1159
1160 seg->segmentation_enabled = SEGMENTATION_FLAG(ctrl_seg, ENABLED);
1161 seg->segmentation_update_map = SEGMENTATION_FLAG(ctrl_seg, UPDATE_MAP);
1162 seg->segmentation_temporal_update = SEGMENTATION_FLAG(ctrl_seg, TEMPORAL_UPDATE);
1163 seg->segmentation_update_data = SEGMENTATION_FLAG(ctrl_seg, UPDATE_DATA);
1164 seg->segid_preskip = SEGMENTATION_FLAG(ctrl_seg, SEG_ID_PRE_SKIP);
1165 seg->last_active_segid = ctrl_seg->last_active_seg_id;
1166
1167 for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
1168 seg->feature_enabled_mask[i] = ctrl_seg->feature_enabled[i];
1169 for (j = 0; j < V4L2_AV1_SEG_LVL_MAX; j++)
1170 seg->feature_data[i][j] = ctrl_seg->feature_data[i][j];
1171 }
1172 }
1173
vdec_av1_slice_setup_quant(struct vdec_av1_slice_quantization * quant,struct v4l2_av1_quantization * ctrl_quant)1174 static void vdec_av1_slice_setup_quant(struct vdec_av1_slice_quantization *quant,
1175 struct v4l2_av1_quantization *ctrl_quant)
1176 {
1177 quant->base_q_idx = ctrl_quant->base_q_idx;
1178 quant->delta_qydc = ctrl_quant->delta_q_y_dc;
1179 quant->delta_qudc = ctrl_quant->delta_q_u_dc;
1180 quant->delta_quac = ctrl_quant->delta_q_u_ac;
1181 quant->delta_qvdc = ctrl_quant->delta_q_v_dc;
1182 quant->delta_qvac = ctrl_quant->delta_q_v_ac;
1183 quant->qm_y = ctrl_quant->qm_y;
1184 quant->qm_u = ctrl_quant->qm_u;
1185 quant->qm_v = ctrl_quant->qm_v;
1186 quant->using_qmatrix = QUANT_FLAG(ctrl_quant, USING_QMATRIX);
1187 }
1188
vdec_av1_slice_get_qindex(struct vdec_av1_slice_uncompressed_header * uh,int segmentation_id)1189 static int vdec_av1_slice_get_qindex(struct vdec_av1_slice_uncompressed_header *uh,
1190 int segmentation_id)
1191 {
1192 struct vdec_av1_slice_seg *seg = &uh->seg;
1193 struct vdec_av1_slice_quantization *quant = &uh->quant;
1194 int data = 0, qindex = 0;
1195
1196 if (seg->segmentation_enabled &&
1197 (seg->feature_enabled_mask[segmentation_id] & BIT(SEG_LVL_ALT_Q))) {
1198 data = seg->feature_data[segmentation_id][SEG_LVL_ALT_Q];
1199 qindex = quant->base_q_idx + data;
1200 return clamp_val(qindex, 0, MAXQ);
1201 }
1202
1203 return quant->base_q_idx;
1204 }
1205
vdec_av1_slice_setup_lr(struct vdec_av1_slice_lr * lr,struct v4l2_av1_loop_restoration * ctrl_lr)1206 static void vdec_av1_slice_setup_lr(struct vdec_av1_slice_lr *lr,
1207 struct v4l2_av1_loop_restoration *ctrl_lr)
1208 {
1209 int i;
1210
1211 lr->use_lr = 0;
1212 lr->use_chroma_lr = 0;
1213 for (i = 0; i < V4L2_AV1_NUM_PLANES_MAX; i++) {
1214 lr->frame_restoration_type[i] = ctrl_lr->frame_restoration_type[i];
1215 lr->loop_restoration_size[i] = ctrl_lr->loop_restoration_size[i];
1216 if (lr->frame_restoration_type[i]) {
1217 lr->use_lr = 1;
1218 if (i > 0)
1219 lr->use_chroma_lr = 1;
1220 }
1221 }
1222 }
1223
vdec_av1_slice_setup_lf(struct vdec_av1_slice_loop_filter * lf,struct v4l2_av1_loop_filter * ctrl_lf)1224 static void vdec_av1_slice_setup_lf(struct vdec_av1_slice_loop_filter *lf,
1225 struct v4l2_av1_loop_filter *ctrl_lf)
1226 {
1227 int i;
1228
1229 for (i = 0; i < ARRAY_SIZE(lf->loop_filter_level); i++)
1230 lf->loop_filter_level[i] = ctrl_lf->level[i];
1231
1232 for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++)
1233 lf->loop_filter_ref_deltas[i] = ctrl_lf->ref_deltas[i];
1234
1235 for (i = 0; i < ARRAY_SIZE(lf->loop_filter_mode_deltas); i++)
1236 lf->loop_filter_mode_deltas[i] = ctrl_lf->mode_deltas[i];
1237
1238 lf->loop_filter_sharpness = ctrl_lf->sharpness;
1239 lf->loop_filter_delta_enabled =
1240 BIT_FLAG(ctrl_lf, V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED);
1241 }
1242
vdec_av1_slice_setup_cdef(struct vdec_av1_slice_cdef * cdef,struct v4l2_av1_cdef * ctrl_cdef)1243 static void vdec_av1_slice_setup_cdef(struct vdec_av1_slice_cdef *cdef,
1244 struct v4l2_av1_cdef *ctrl_cdef)
1245 {
1246 int i;
1247
1248 cdef->cdef_damping = ctrl_cdef->damping_minus_3 + 3;
1249 cdef->cdef_bits = ctrl_cdef->bits;
1250
1251 for (i = 0; i < V4L2_AV1_CDEF_MAX; i++) {
1252 if (ctrl_cdef->y_sec_strength[i] == 4)
1253 ctrl_cdef->y_sec_strength[i] -= 1;
1254
1255 if (ctrl_cdef->uv_sec_strength[i] == 4)
1256 ctrl_cdef->uv_sec_strength[i] -= 1;
1257
1258 cdef->cdef_y_strength[i] =
1259 ctrl_cdef->y_pri_strength[i] << SECONDARY_FILTER_STRENGTH_NUM_BITS |
1260 ctrl_cdef->y_sec_strength[i];
1261 cdef->cdef_uv_strength[i] =
1262 ctrl_cdef->uv_pri_strength[i] << SECONDARY_FILTER_STRENGTH_NUM_BITS |
1263 ctrl_cdef->uv_sec_strength[i];
1264 }
1265 }
1266
vdec_av1_slice_setup_seq(struct vdec_av1_slice_seq_header * seq,struct v4l2_ctrl_av1_sequence * ctrl_seq)1267 static void vdec_av1_slice_setup_seq(struct vdec_av1_slice_seq_header *seq,
1268 struct v4l2_ctrl_av1_sequence *ctrl_seq)
1269 {
1270 seq->bitdepth = ctrl_seq->bit_depth;
1271 seq->max_frame_width = ctrl_seq->max_frame_width_minus_1 + 1;
1272 seq->max_frame_height = ctrl_seq->max_frame_height_minus_1 + 1;
1273 seq->enable_superres = SEQUENCE_FLAG(ctrl_seq, ENABLE_SUPERRES);
1274 seq->enable_filter_intra = SEQUENCE_FLAG(ctrl_seq, ENABLE_FILTER_INTRA);
1275 seq->enable_intra_edge_filter = SEQUENCE_FLAG(ctrl_seq, ENABLE_INTRA_EDGE_FILTER);
1276 seq->enable_interintra_compound = SEQUENCE_FLAG(ctrl_seq, ENABLE_INTERINTRA_COMPOUND);
1277 seq->enable_masked_compound = SEQUENCE_FLAG(ctrl_seq, ENABLE_MASKED_COMPOUND);
1278 seq->enable_dual_filter = SEQUENCE_FLAG(ctrl_seq, ENABLE_DUAL_FILTER);
1279 seq->enable_jnt_comp = SEQUENCE_FLAG(ctrl_seq, ENABLE_JNT_COMP);
1280 seq->mono_chrome = SEQUENCE_FLAG(ctrl_seq, MONO_CHROME);
1281 seq->enable_order_hint = SEQUENCE_FLAG(ctrl_seq, ENABLE_ORDER_HINT);
1282 seq->order_hint_bits = ctrl_seq->order_hint_bits;
1283 seq->use_128x128_superblock = SEQUENCE_FLAG(ctrl_seq, USE_128X128_SUPERBLOCK);
1284 seq->subsampling_x = SEQUENCE_FLAG(ctrl_seq, SUBSAMPLING_X);
1285 seq->subsampling_y = SEQUENCE_FLAG(ctrl_seq, SUBSAMPLING_Y);
1286 }
1287
vdec_av1_slice_setup_tile(struct vdec_av1_slice_frame * frame,struct v4l2_av1_tile_info * ctrl_tile)1288 static void vdec_av1_slice_setup_tile(struct vdec_av1_slice_frame *frame,
1289 struct v4l2_av1_tile_info *ctrl_tile)
1290 {
1291 struct vdec_av1_slice_seq_header *seq = &frame->seq;
1292 struct vdec_av1_slice_tile *tile = &frame->uh.tile;
1293 u32 mib_size_log2 = seq->use_128x128_superblock ? 5 : 4;
1294 int i;
1295
1296 tile->tile_cols = ctrl_tile->tile_cols;
1297 tile->tile_rows = ctrl_tile->tile_rows;
1298 tile->context_update_tile_id = ctrl_tile->context_update_tile_id;
1299 tile->uniform_tile_spacing_flag =
1300 BIT_FLAG(ctrl_tile, V4L2_AV1_TILE_INFO_FLAG_UNIFORM_TILE_SPACING);
1301
1302 for (i = 0; i < tile->tile_cols + 1; i++)
1303 tile->mi_col_starts[i] =
1304 ALIGN(ctrl_tile->mi_col_starts[i], BIT(mib_size_log2)) >> mib_size_log2;
1305
1306 for (i = 0; i < tile->tile_rows + 1; i++)
1307 tile->mi_row_starts[i] =
1308 ALIGN(ctrl_tile->mi_row_starts[i], BIT(mib_size_log2)) >> mib_size_log2;
1309 }
1310
vdec_av1_slice_setup_uh(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_frame * frame,struct v4l2_ctrl_av1_frame * ctrl_fh)1311 static void vdec_av1_slice_setup_uh(struct vdec_av1_slice_instance *instance,
1312 struct vdec_av1_slice_frame *frame,
1313 struct v4l2_ctrl_av1_frame *ctrl_fh)
1314 {
1315 struct vdec_av1_slice_uncompressed_header *uh = &frame->uh;
1316 int i;
1317
1318 uh->use_ref_frame_mvs = FH_FLAG(ctrl_fh, USE_REF_FRAME_MVS);
1319 uh->order_hint = ctrl_fh->order_hint;
1320 vdec_av1_slice_setup_gm(uh->gm, &ctrl_fh->global_motion);
1321 uh->upscaled_width = ctrl_fh->upscaled_width;
1322 uh->frame_width = ctrl_fh->frame_width_minus_1 + 1;
1323 uh->frame_height = ctrl_fh->frame_height_minus_1 + 1;
1324 uh->mi_cols = ((uh->frame_width + 7) >> 3) << 1;
1325 uh->mi_rows = ((uh->frame_height + 7) >> 3) << 1;
1326 uh->reduced_tx_set = FH_FLAG(ctrl_fh, REDUCED_TX_SET);
1327 uh->tx_mode = ctrl_fh->tx_mode;
1328 uh->uniform_tile_spacing_flag =
1329 BIT_FLAG(&ctrl_fh->tile_info, V4L2_AV1_TILE_INFO_FLAG_UNIFORM_TILE_SPACING);
1330 uh->interpolation_filter = ctrl_fh->interpolation_filter;
1331 uh->allow_warped_motion = FH_FLAG(ctrl_fh, ALLOW_WARPED_MOTION);
1332 uh->is_motion_mode_switchable = FH_FLAG(ctrl_fh, IS_MOTION_MODE_SWITCHABLE);
1333 uh->frame_type = ctrl_fh->frame_type;
1334 uh->frame_is_intra = (uh->frame_type == V4L2_AV1_INTRA_ONLY_FRAME ||
1335 uh->frame_type == V4L2_AV1_KEY_FRAME);
1336
1337 if (!uh->frame_is_intra && FH_FLAG(ctrl_fh, REFERENCE_SELECT))
1338 uh->reference_mode = AV1_REFERENCE_MODE_SELECT;
1339 else
1340 uh->reference_mode = AV1_SINGLE_REFERENCE;
1341
1342 uh->allow_high_precision_mv = FH_FLAG(ctrl_fh, ALLOW_HIGH_PRECISION_MV);
1343 uh->allow_intra_bc = FH_FLAG(ctrl_fh, ALLOW_INTRABC);
1344 uh->force_integer_mv = FH_FLAG(ctrl_fh, FORCE_INTEGER_MV);
1345 uh->allow_screen_content_tools = FH_FLAG(ctrl_fh, ALLOW_SCREEN_CONTENT_TOOLS);
1346 uh->error_resilient_mode = FH_FLAG(ctrl_fh, ERROR_RESILIENT_MODE);
1347 uh->primary_ref_frame = ctrl_fh->primary_ref_frame;
1348 uh->disable_frame_end_update_cdf =
1349 FH_FLAG(ctrl_fh, DISABLE_FRAME_END_UPDATE_CDF);
1350 uh->disable_cdf_update = FH_FLAG(ctrl_fh, DISABLE_CDF_UPDATE);
1351 uh->skip_mode.skip_mode_present = FH_FLAG(ctrl_fh, SKIP_MODE_PRESENT);
1352 uh->skip_mode.skip_mode_frame[0] =
1353 ctrl_fh->skip_mode_frame[0] - V4L2_AV1_REF_LAST_FRAME;
1354 uh->skip_mode.skip_mode_frame[1] =
1355 ctrl_fh->skip_mode_frame[1] - V4L2_AV1_REF_LAST_FRAME;
1356 uh->skip_mode.skip_mode_allowed = ctrl_fh->skip_mode_frame[0] ? 1 : 0;
1357
1358 vdec_av1_slice_setup_seg(&uh->seg, &ctrl_fh->segmentation);
1359 uh->delta_q_lf.delta_q_present = QUANT_FLAG(&ctrl_fh->quantization, DELTA_Q_PRESENT);
1360 uh->delta_q_lf.delta_q_res = 1 << ctrl_fh->quantization.delta_q_res;
1361 uh->delta_q_lf.delta_lf_present =
1362 BIT_FLAG(&ctrl_fh->loop_filter, V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT);
1363 uh->delta_q_lf.delta_lf_res = ctrl_fh->loop_filter.delta_lf_res;
1364 uh->delta_q_lf.delta_lf_multi =
1365 BIT_FLAG(&ctrl_fh->loop_filter, V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI);
1366 vdec_av1_slice_setup_quant(&uh->quant, &ctrl_fh->quantization);
1367
1368 uh->coded_loss_less = 1;
1369 for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
1370 uh->quant.qindex[i] = vdec_av1_slice_get_qindex(uh, i);
1371 uh->loss_less_array[i] =
1372 (uh->quant.qindex[i] == 0 && uh->quant.delta_qydc == 0 &&
1373 uh->quant.delta_quac == 0 && uh->quant.delta_qudc == 0 &&
1374 uh->quant.delta_qvac == 0 && uh->quant.delta_qvdc == 0);
1375
1376 if (!uh->loss_less_array[i])
1377 uh->coded_loss_less = 0;
1378 }
1379
1380 vdec_av1_slice_setup_lr(&uh->lr, &ctrl_fh->loop_restoration);
1381 uh->superres_denom = ctrl_fh->superres_denom;
1382 vdec_av1_slice_setup_lf(&uh->loop_filter, &ctrl_fh->loop_filter);
1383 vdec_av1_slice_setup_cdef(&uh->cdef, &ctrl_fh->cdef);
1384 vdec_av1_slice_setup_tile(frame, &ctrl_fh->tile_info);
1385 }
1386
vdec_av1_slice_setup_tile_group(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_vsi * vsi)1387 static int vdec_av1_slice_setup_tile_group(struct vdec_av1_slice_instance *instance,
1388 struct vdec_av1_slice_vsi *vsi)
1389 {
1390 struct v4l2_ctrl_av1_tile_group_entry *ctrl_tge;
1391 struct vdec_av1_slice_tile_group *tile_group = &instance->tile_group;
1392 struct vdec_av1_slice_uncompressed_header *uh = &vsi->frame.uh;
1393 struct vdec_av1_slice_tile *tile = &uh->tile;
1394 struct v4l2_ctrl *ctrl;
1395 u32 tge_size;
1396 int i;
1397
1398 ctrl = v4l2_ctrl_find(&instance->ctx->ctrl_hdl, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY);
1399 if (!ctrl)
1400 return -EINVAL;
1401
1402 tge_size = ctrl->elems;
1403 ctrl_tge = (struct v4l2_ctrl_av1_tile_group_entry *)ctrl->p_cur.p;
1404
1405 tile_group->num_tiles = tile->tile_cols * tile->tile_rows;
1406
1407 if (tile_group->num_tiles != tge_size ||
1408 tile_group->num_tiles > V4L2_AV1_MAX_TILE_COUNT) {
1409 mtk_vdec_err(instance->ctx, "invalid tge_size %d, tile_num:%d\n",
1410 tge_size, tile_group->num_tiles);
1411 return -EINVAL;
1412 }
1413
1414 for (i = 0; i < tge_size; i++) {
1415 if (i != ctrl_tge[i].tile_row * vsi->frame.uh.tile.tile_cols +
1416 ctrl_tge[i].tile_col) {
1417 mtk_vdec_err(instance->ctx, "invalid tge info %d, %d %d %d\n",
1418 i, ctrl_tge[i].tile_row, ctrl_tge[i].tile_col,
1419 vsi->frame.uh.tile.tile_rows);
1420 return -EINVAL;
1421 }
1422 tile_group->tile_size[i] = ctrl_tge[i].tile_size;
1423 tile_group->tile_start_offset[i] = ctrl_tge[i].tile_offset;
1424 }
1425
1426 return 0;
1427 }
1428
vdec_av1_slice_setup_state(struct vdec_av1_slice_vsi * vsi)1429 static inline void vdec_av1_slice_setup_state(struct vdec_av1_slice_vsi *vsi)
1430 {
1431 memset(&vsi->state, 0, sizeof(vsi->state));
1432 }
1433
vdec_av1_slice_setup_scale_factors(struct vdec_av1_slice_frame_refs * frame_ref,struct vdec_av1_slice_frame_info * ref_frame_info,struct vdec_av1_slice_uncompressed_header * uh)1434 static void vdec_av1_slice_setup_scale_factors(struct vdec_av1_slice_frame_refs *frame_ref,
1435 struct vdec_av1_slice_frame_info *ref_frame_info,
1436 struct vdec_av1_slice_uncompressed_header *uh)
1437 {
1438 struct vdec_av1_slice_scale_factors *scale_factors = &frame_ref->scale_factors;
1439 u32 ref_upscaled_width = ref_frame_info->upscaled_width;
1440 u32 ref_frame_height = ref_frame_info->frame_height;
1441 u32 frame_width = uh->frame_width;
1442 u32 frame_height = uh->frame_height;
1443
1444 if (!vdec_av1_slice_need_scale(ref_upscaled_width, ref_frame_height,
1445 frame_width, frame_height)) {
1446 scale_factors->x_scale = -1;
1447 scale_factors->y_scale = -1;
1448 scale_factors->is_scaled = 0;
1449 return;
1450 }
1451
1452 scale_factors->x_scale =
1453 ((ref_upscaled_width << AV1_REF_SCALE_SHIFT) + (frame_width >> 1)) / frame_width;
1454 scale_factors->y_scale =
1455 ((ref_frame_height << AV1_REF_SCALE_SHIFT) + (frame_height >> 1)) / frame_height;
1456 scale_factors->is_scaled =
1457 (scale_factors->x_scale != AV1_REF_INVALID_SCALE) &&
1458 (scale_factors->y_scale != AV1_REF_INVALID_SCALE) &&
1459 (scale_factors->x_scale != AV1_REF_NO_SCALE ||
1460 scale_factors->y_scale != AV1_REF_NO_SCALE);
1461 scale_factors->x_step =
1462 AV1_DIV_ROUND_UP_POW2(scale_factors->x_scale,
1463 AV1_REF_SCALE_SHIFT - AV1_SCALE_SUBPEL_BITS);
1464 scale_factors->y_step =
1465 AV1_DIV_ROUND_UP_POW2(scale_factors->y_scale,
1466 AV1_REF_SCALE_SHIFT - AV1_SCALE_SUBPEL_BITS);
1467 }
1468
vdec_av1_slice_get_sign_bias(int a,int b,u8 enable_order_hint,u8 order_hint_bits)1469 static unsigned char vdec_av1_slice_get_sign_bias(int a,
1470 int b,
1471 u8 enable_order_hint,
1472 u8 order_hint_bits)
1473 {
1474 int diff = 0;
1475 int m = 0;
1476 unsigned char result = 0;
1477
1478 if (!enable_order_hint)
1479 return 0;
1480
1481 diff = a - b;
1482 m = 1 << (order_hint_bits - 1);
1483 diff = (diff & (m - 1)) - (diff & m);
1484
1485 if (diff > 0)
1486 result = 1;
1487
1488 return result;
1489 }
1490
vdec_av1_slice_setup_ref(struct vdec_av1_slice_pfc * pfc,struct v4l2_ctrl_av1_frame * ctrl_fh)1491 static void vdec_av1_slice_setup_ref(struct vdec_av1_slice_pfc *pfc,
1492 struct v4l2_ctrl_av1_frame *ctrl_fh)
1493 {
1494 struct vdec_av1_slice_vsi *vsi = &pfc->vsi;
1495 struct vdec_av1_slice_frame *frame = &vsi->frame;
1496 struct vdec_av1_slice_slot *slots = &vsi->slots;
1497 struct vdec_av1_slice_uncompressed_header *uh = &frame->uh;
1498 struct vdec_av1_slice_seq_header *seq = &frame->seq;
1499 struct vdec_av1_slice_frame_info *cur_frame_info =
1500 &slots->frame_info[vsi->slot_id];
1501 struct vdec_av1_slice_frame_info *frame_info;
1502 int i, slot_id;
1503
1504 if (uh->frame_is_intra)
1505 return;
1506
1507 for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) {
1508 int ref_idx = ctrl_fh->ref_frame_idx[i];
1509
1510 pfc->ref_idx[i] = ctrl_fh->reference_frame_ts[ref_idx];
1511 slot_id = frame->ref_frame_map[ref_idx];
1512 frame_info = &slots->frame_info[slot_id];
1513 if (slot_id == AV1_INVALID_IDX) {
1514 pr_err(MTK_DBG_V4L2_STR "cannot match reference[%d] 0x%llx\n", i,
1515 ctrl_fh->reference_frame_ts[ref_idx]);
1516 frame->order_hints[i] = 0;
1517 frame->ref_frame_valid[i] = 0;
1518 continue;
1519 }
1520
1521 frame->frame_refs[i].ref_fb_idx = slot_id;
1522 vdec_av1_slice_setup_scale_factors(&frame->frame_refs[i],
1523 frame_info, uh);
1524 if (!seq->enable_order_hint)
1525 frame->ref_frame_sign_bias[i + 1] = 0;
1526 else
1527 frame->ref_frame_sign_bias[i + 1] =
1528 vdec_av1_slice_get_sign_bias(frame_info->order_hint,
1529 uh->order_hint,
1530 seq->enable_order_hint,
1531 seq->order_hint_bits);
1532
1533 frame->order_hints[i] = ctrl_fh->order_hints[i + 1];
1534 cur_frame_info->order_hints[i] = frame->order_hints[i];
1535 frame->ref_frame_valid[i] = 1;
1536 }
1537 }
1538
vdec_av1_slice_get_previous(struct vdec_av1_slice_vsi * vsi)1539 static void vdec_av1_slice_get_previous(struct vdec_av1_slice_vsi *vsi)
1540 {
1541 struct vdec_av1_slice_frame *frame = &vsi->frame;
1542
1543 if (frame->uh.primary_ref_frame == AV1_PRIMARY_REF_NONE)
1544 frame->prev_fb_idx = AV1_INVALID_IDX;
1545 else
1546 frame->prev_fb_idx = frame->frame_refs[frame->uh.primary_ref_frame].ref_fb_idx;
1547 }
1548
vdec_av1_slice_setup_operating_mode(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_frame * frame)1549 static inline void vdec_av1_slice_setup_operating_mode(struct vdec_av1_slice_instance *instance,
1550 struct vdec_av1_slice_frame *frame)
1551 {
1552 frame->large_scale_tile = 0;
1553 }
1554
vdec_av1_slice_setup_pfc(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_pfc * pfc)1555 static int vdec_av1_slice_setup_pfc(struct vdec_av1_slice_instance *instance,
1556 struct vdec_av1_slice_pfc *pfc)
1557 {
1558 struct v4l2_ctrl_av1_frame *ctrl_fh;
1559 struct v4l2_ctrl_av1_sequence *ctrl_seq;
1560 struct vdec_av1_slice_vsi *vsi = &pfc->vsi;
1561 int ret = 0;
1562
1563 /* frame header */
1564 ctrl_fh = (struct v4l2_ctrl_av1_frame *)
1565 vdec_av1_get_ctrl_ptr(instance->ctx,
1566 V4L2_CID_STATELESS_AV1_FRAME);
1567 if (IS_ERR(ctrl_fh))
1568 return PTR_ERR(ctrl_fh);
1569
1570 ctrl_seq = (struct v4l2_ctrl_av1_sequence *)
1571 vdec_av1_get_ctrl_ptr(instance->ctx,
1572 V4L2_CID_STATELESS_AV1_SEQUENCE);
1573 if (IS_ERR(ctrl_seq))
1574 return PTR_ERR(ctrl_seq);
1575
1576 /* setup vsi information */
1577 vdec_av1_slice_setup_seq(&vsi->frame.seq, ctrl_seq);
1578 vdec_av1_slice_setup_uh(instance, &vsi->frame, ctrl_fh);
1579 vdec_av1_slice_setup_operating_mode(instance, &vsi->frame);
1580
1581 vdec_av1_slice_setup_state(vsi);
1582 vdec_av1_slice_setup_slot(instance, vsi, ctrl_fh);
1583 vdec_av1_slice_setup_ref(pfc, ctrl_fh);
1584 vdec_av1_slice_get_previous(vsi);
1585
1586 pfc->seq = instance->seq;
1587 instance->seq++;
1588
1589 return ret;
1590 }
1591
vdec_av1_slice_setup_lat_buffer(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_vsi * vsi,struct mtk_vcodec_mem * bs,struct vdec_lat_buf * lat_buf)1592 static void vdec_av1_slice_setup_lat_buffer(struct vdec_av1_slice_instance *instance,
1593 struct vdec_av1_slice_vsi *vsi,
1594 struct mtk_vcodec_mem *bs,
1595 struct vdec_lat_buf *lat_buf)
1596 {
1597 struct vdec_av1_slice_work_buffer *work_buffer;
1598 int i;
1599
1600 vsi->bs.dma_addr = bs->dma_addr;
1601 vsi->bs.size = bs->size;
1602
1603 vsi->ube.dma_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr;
1604 vsi->ube.size = lat_buf->ctx->msg_queue.wdma_addr.size;
1605 vsi->trans.dma_addr = lat_buf->ctx->msg_queue.wdma_wptr_addr;
1606 /* used to store trans end */
1607 vsi->trans.dma_addr_end = lat_buf->ctx->msg_queue.wdma_rptr_addr;
1608 vsi->err_map.dma_addr = lat_buf->wdma_err_addr.dma_addr;
1609 vsi->err_map.size = lat_buf->wdma_err_addr.size;
1610 vsi->rd_mv.dma_addr = lat_buf->rd_mv_addr.dma_addr;
1611 vsi->rd_mv.size = lat_buf->rd_mv_addr.size;
1612
1613 vsi->row_info.buf = 0;
1614 vsi->row_info.size = 0;
1615
1616 work_buffer = vsi->work_buffer;
1617
1618 for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
1619 work_buffer[i].mv_addr.buf = instance->mv[i].dma_addr;
1620 work_buffer[i].mv_addr.size = instance->mv[i].size;
1621 work_buffer[i].segid_addr.buf = instance->seg[i].dma_addr;
1622 work_buffer[i].segid_addr.size = instance->seg[i].size;
1623 work_buffer[i].cdf_addr.buf = instance->cdf[i].dma_addr;
1624 work_buffer[i].cdf_addr.size = instance->cdf[i].size;
1625 }
1626
1627 vsi->cdf_tmp.buf = instance->cdf_temp.dma_addr;
1628 vsi->cdf_tmp.size = instance->cdf_temp.size;
1629
1630 vsi->tile.buf = instance->tile.dma_addr;
1631 vsi->tile.size = instance->tile.size;
1632 memcpy(lat_buf->tile_addr.va, instance->tile.va, 64 * instance->tile_group.num_tiles);
1633
1634 vsi->cdf_table.buf = instance->cdf_table.dma_addr;
1635 vsi->cdf_table.size = instance->cdf_table.size;
1636 vsi->iq_table.buf = instance->iq_table.dma_addr;
1637 vsi->iq_table.size = instance->iq_table.size;
1638 }
1639
vdec_av1_slice_setup_seg_buffer(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_vsi * vsi)1640 static void vdec_av1_slice_setup_seg_buffer(struct vdec_av1_slice_instance *instance,
1641 struct vdec_av1_slice_vsi *vsi)
1642 {
1643 struct vdec_av1_slice_uncompressed_header *uh = &vsi->frame.uh;
1644 struct mtk_vcodec_mem *buf;
1645
1646 /* reset segment buffer */
1647 if (uh->primary_ref_frame == AV1_PRIMARY_REF_NONE || !uh->seg.segmentation_enabled) {
1648 mtk_vdec_debug(instance->ctx, "reset seg %d\n", vsi->slot_id);
1649 if (vsi->slot_id != AV1_INVALID_IDX) {
1650 buf = &instance->seg[vsi->slot_id];
1651 memset(buf->va, 0, buf->size);
1652 }
1653 }
1654 }
1655
vdec_av1_slice_setup_tile_buffer(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_vsi * vsi,struct mtk_vcodec_mem * bs)1656 static void vdec_av1_slice_setup_tile_buffer(struct vdec_av1_slice_instance *instance,
1657 struct vdec_av1_slice_vsi *vsi,
1658 struct mtk_vcodec_mem *bs)
1659 {
1660 struct vdec_av1_slice_tile_group *tile_group = &instance->tile_group;
1661 struct vdec_av1_slice_uncompressed_header *uh = &vsi->frame.uh;
1662 struct vdec_av1_slice_tile *tile = &uh->tile;
1663 u32 tile_num, tile_row, tile_col;
1664 u32 allow_update_cdf = 0;
1665 u32 sb_boundary_x_m1 = 0, sb_boundary_y_m1 = 0;
1666 int tile_info_base;
1667 u64 tile_buf_pa;
1668 u32 *tile_info_buf = instance->tile.va;
1669 u64 pa = (u64)bs->dma_addr;
1670
1671 if (uh->disable_cdf_update == 0)
1672 allow_update_cdf = 1;
1673
1674 for (tile_num = 0; tile_num < tile_group->num_tiles; tile_num++) {
1675 /* each uint32 takes place of 4 bytes */
1676 tile_info_base = (AV1_TILE_BUF_SIZE * tile_num) >> 2;
1677 tile_row = tile_num / tile->tile_cols;
1678 tile_col = tile_num % tile->tile_cols;
1679 tile_info_buf[tile_info_base + 0] = (tile_group->tile_size[tile_num] << 3);
1680 tile_buf_pa = pa + tile_group->tile_start_offset[tile_num];
1681
1682 /* save av1 tile high 4bits(bit 32-35) address in lower 4 bits position
1683 * and clear original for hw requirement.
1684 */
1685 tile_info_buf[tile_info_base + 1] = (tile_buf_pa & 0xFFFFFFF0ull) |
1686 ((tile_buf_pa & 0xF00000000ull) >> 32);
1687 tile_info_buf[tile_info_base + 2] = (tile_buf_pa & 0xFull) << 3;
1688
1689 sb_boundary_x_m1 =
1690 (tile->mi_col_starts[tile_col + 1] - tile->mi_col_starts[tile_col] - 1) &
1691 0x3f;
1692 sb_boundary_y_m1 =
1693 (tile->mi_row_starts[tile_row + 1] - tile->mi_row_starts[tile_row] - 1) &
1694 0x1ff;
1695
1696 tile_info_buf[tile_info_base + 3] = (sb_boundary_y_m1 << 7) | sb_boundary_x_m1;
1697 tile_info_buf[tile_info_base + 4] = ((allow_update_cdf << 18) | (1 << 16));
1698
1699 if (tile_num == tile->context_update_tile_id &&
1700 uh->disable_frame_end_update_cdf == 0)
1701 tile_info_buf[tile_info_base + 4] |= (1 << 17);
1702
1703 mtk_vdec_debug(instance->ctx, "// tile buf %d pos(%dx%d) offset 0x%x\n",
1704 tile_num, tile_row, tile_col, tile_info_base);
1705 mtk_vdec_debug(instance->ctx, "// %08x %08x %08x %08x\n",
1706 tile_info_buf[tile_info_base + 0],
1707 tile_info_buf[tile_info_base + 1],
1708 tile_info_buf[tile_info_base + 2],
1709 tile_info_buf[tile_info_base + 3]);
1710 mtk_vdec_debug(instance->ctx, "// %08x %08x %08x %08x\n",
1711 tile_info_buf[tile_info_base + 4],
1712 tile_info_buf[tile_info_base + 5],
1713 tile_info_buf[tile_info_base + 6],
1714 tile_info_buf[tile_info_base + 7]);
1715 }
1716 }
1717
vdec_av1_slice_setup_lat(struct vdec_av1_slice_instance * instance,struct mtk_vcodec_mem * bs,struct vdec_lat_buf * lat_buf,struct vdec_av1_slice_pfc * pfc)1718 static int vdec_av1_slice_setup_lat(struct vdec_av1_slice_instance *instance,
1719 struct mtk_vcodec_mem *bs,
1720 struct vdec_lat_buf *lat_buf,
1721 struct vdec_av1_slice_pfc *pfc)
1722 {
1723 struct vdec_av1_slice_vsi *vsi = &pfc->vsi;
1724 int ret;
1725
1726 ret = vdec_av1_slice_setup_lat_from_src_buf(instance, vsi, lat_buf);
1727 if (ret)
1728 return ret;
1729
1730 ret = vdec_av1_slice_setup_pfc(instance, pfc);
1731 if (ret)
1732 return ret;
1733
1734 ret = vdec_av1_slice_setup_tile_group(instance, vsi);
1735 if (ret)
1736 return ret;
1737
1738 ret = vdec_av1_slice_alloc_working_buffer(instance, vsi);
1739 if (ret)
1740 return ret;
1741
1742 vdec_av1_slice_setup_seg_buffer(instance, vsi);
1743 vdec_av1_slice_setup_tile_buffer(instance, vsi, bs);
1744 vdec_av1_slice_setup_lat_buffer(instance, vsi, bs, lat_buf);
1745
1746 return 0;
1747 }
1748
vdec_av1_slice_update_lat(struct vdec_av1_slice_instance * instance,struct vdec_lat_buf * lat_buf,struct vdec_av1_slice_pfc * pfc)1749 static int vdec_av1_slice_update_lat(struct vdec_av1_slice_instance *instance,
1750 struct vdec_lat_buf *lat_buf,
1751 struct vdec_av1_slice_pfc *pfc)
1752 {
1753 struct vdec_av1_slice_vsi *vsi;
1754
1755 vsi = &pfc->vsi;
1756 mtk_vdec_debug(instance->ctx, "frame %u LAT CRC 0x%08x, output size is %d\n",
1757 pfc->seq, vsi->state.crc[0], vsi->state.out_size);
1758
1759 /* buffer full, need to re-decode */
1760 if (vsi->state.full) {
1761 /* buffer not enough */
1762 if (vsi->trans.dma_addr_end - vsi->trans.dma_addr == vsi->ube.size)
1763 return -ENOMEM;
1764 return -EAGAIN;
1765 }
1766
1767 instance->width = vsi->frame.uh.upscaled_width;
1768 instance->height = vsi->frame.uh.frame_height;
1769 instance->frame_type = vsi->frame.uh.frame_type;
1770
1771 return 0;
1772 }
1773
vdec_av1_slice_setup_core_to_dst_buf(struct vdec_av1_slice_instance * instance,struct vdec_lat_buf * lat_buf)1774 static int vdec_av1_slice_setup_core_to_dst_buf(struct vdec_av1_slice_instance *instance,
1775 struct vdec_lat_buf *lat_buf)
1776 {
1777 struct vb2_v4l2_buffer *dst;
1778
1779 dst = v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx);
1780 if (!dst)
1781 return -EINVAL;
1782
1783 v4l2_m2m_buf_copy_metadata(&lat_buf->ts_info, dst, true);
1784
1785 return 0;
1786 }
1787
vdec_av1_slice_setup_core_buffer(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_pfc * pfc,struct vdec_av1_slice_vsi * vsi,struct vdec_fb * fb,struct vdec_lat_buf * lat_buf)1788 static int vdec_av1_slice_setup_core_buffer(struct vdec_av1_slice_instance *instance,
1789 struct vdec_av1_slice_pfc *pfc,
1790 struct vdec_av1_slice_vsi *vsi,
1791 struct vdec_fb *fb,
1792 struct vdec_lat_buf *lat_buf)
1793 {
1794 struct vb2_buffer *vb;
1795 struct vb2_queue *vq;
1796 int w, h, plane, size;
1797 int i;
1798
1799 plane = instance->ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes;
1800 w = vsi->frame.uh.upscaled_width;
1801 h = vsi->frame.uh.frame_height;
1802 size = ALIGN(w, VCODEC_DEC_ALIGNED_64) * ALIGN(h, VCODEC_DEC_ALIGNED_64);
1803
1804 /* frame buffer */
1805 vsi->fb.y.dma_addr = fb->base_y.dma_addr;
1806 if (plane == 1)
1807 vsi->fb.c.dma_addr = fb->base_y.dma_addr + size;
1808 else
1809 vsi->fb.c.dma_addr = fb->base_c.dma_addr;
1810
1811 /* reference buffers */
1812 vq = v4l2_m2m_get_vq(instance->ctx->m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE);
1813 if (!vq)
1814 return -EINVAL;
1815
1816 /* get current output buffer */
1817 vb = &v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx)->vb2_buf;
1818 if (!vb)
1819 return -EINVAL;
1820
1821 /* get buffer address from vb2buf */
1822 for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) {
1823 struct vdec_av1_slice_fb *vref = &vsi->ref[i];
1824
1825 vb = vb2_find_buffer(vq, pfc->ref_idx[i]);
1826 if (!vb) {
1827 memset(vref, 0, sizeof(*vref));
1828 continue;
1829 }
1830
1831 vref->y.dma_addr = vb2_dma_contig_plane_dma_addr(vb, 0);
1832 if (plane == 1)
1833 vref->c.dma_addr = vref->y.dma_addr + size;
1834 else
1835 vref->c.dma_addr = vb2_dma_contig_plane_dma_addr(vb, 1);
1836 }
1837 vsi->tile.dma_addr = lat_buf->tile_addr.dma_addr;
1838 vsi->tile.size = lat_buf->tile_addr.size;
1839
1840 return 0;
1841 }
1842
vdec_av1_slice_setup_core(struct vdec_av1_slice_instance * instance,struct vdec_fb * fb,struct vdec_lat_buf * lat_buf,struct vdec_av1_slice_pfc * pfc)1843 static int vdec_av1_slice_setup_core(struct vdec_av1_slice_instance *instance,
1844 struct vdec_fb *fb,
1845 struct vdec_lat_buf *lat_buf,
1846 struct vdec_av1_slice_pfc *pfc)
1847 {
1848 struct vdec_av1_slice_vsi *vsi = &pfc->vsi;
1849 int ret;
1850
1851 ret = vdec_av1_slice_setup_core_to_dst_buf(instance, lat_buf);
1852 if (ret)
1853 return ret;
1854
1855 ret = vdec_av1_slice_setup_core_buffer(instance, pfc, vsi, fb, lat_buf);
1856 if (ret)
1857 return ret;
1858
1859 return 0;
1860 }
1861
vdec_av1_slice_update_core(struct vdec_av1_slice_instance * instance,struct vdec_lat_buf * lat_buf,struct vdec_av1_slice_pfc * pfc)1862 static int vdec_av1_slice_update_core(struct vdec_av1_slice_instance *instance,
1863 struct vdec_lat_buf *lat_buf,
1864 struct vdec_av1_slice_pfc *pfc)
1865 {
1866 struct vdec_av1_slice_vsi *vsi = instance->core_vsi;
1867
1868 mtk_vdec_debug(instance->ctx, "frame %u Y_CRC %08x %08x %08x %08x\n",
1869 pfc->seq, vsi->state.crc[0], vsi->state.crc[1],
1870 vsi->state.crc[2], vsi->state.crc[3]);
1871 mtk_vdec_debug(instance->ctx, "frame %u C_CRC %08x %08x %08x %08x\n",
1872 pfc->seq, vsi->state.crc[8], vsi->state.crc[9],
1873 vsi->state.crc[10], vsi->state.crc[11]);
1874
1875 return 0;
1876 }
1877
vdec_av1_slice_init(struct mtk_vcodec_dec_ctx * ctx)1878 static int vdec_av1_slice_init(struct mtk_vcodec_dec_ctx *ctx)
1879 {
1880 struct vdec_av1_slice_instance *instance;
1881 struct vdec_av1_slice_init_vsi *vsi;
1882 int ret;
1883
1884 instance = kzalloc(sizeof(*instance), GFP_KERNEL);
1885 if (!instance)
1886 return -ENOMEM;
1887
1888 instance->ctx = ctx;
1889 instance->vpu.id = SCP_IPI_VDEC_LAT;
1890 instance->vpu.core_id = SCP_IPI_VDEC_CORE;
1891 instance->vpu.ctx = ctx;
1892 instance->vpu.codec_type = ctx->current_codec;
1893
1894 ret = vpu_dec_init(&instance->vpu);
1895 if (ret) {
1896 mtk_vdec_err(ctx, "failed to init vpu dec, ret %d\n", ret);
1897 goto error_vpu_init;
1898 }
1899
1900 /* init vsi and global flags */
1901 vsi = instance->vpu.vsi;
1902 if (!vsi) {
1903 mtk_vdec_err(ctx, "failed to get AV1 vsi\n");
1904 ret = -EINVAL;
1905 goto error_vsi;
1906 }
1907 instance->init_vsi = vsi;
1908 instance->core_vsi = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler, (u32)vsi->core_vsi);
1909
1910 if (!instance->core_vsi) {
1911 mtk_vdec_err(ctx, "failed to get AV1 core vsi\n");
1912 ret = -EINVAL;
1913 goto error_vsi;
1914 }
1915
1916 if (vsi->vsi_size != sizeof(struct vdec_av1_slice_vsi))
1917 mtk_vdec_err(ctx, "remote vsi size 0x%x mismatch! expected: 0x%zx\n",
1918 vsi->vsi_size, sizeof(struct vdec_av1_slice_vsi));
1919
1920 instance->irq_enabled = 1;
1921 instance->inneracing_mode = IS_VDEC_INNER_RACING(instance->ctx->dev->dec_capability);
1922
1923 mtk_vdec_debug(ctx, "vsi 0x%p core_vsi 0x%llx 0x%p, inneracing_mode %d\n",
1924 vsi, vsi->core_vsi, instance->core_vsi, instance->inneracing_mode);
1925
1926 ret = vdec_av1_slice_init_cdf_table(instance);
1927 if (ret)
1928 goto error_vsi;
1929
1930 ret = vdec_av1_slice_init_iq_table(instance);
1931 if (ret)
1932 goto error_vsi;
1933
1934 ctx->drv_handle = instance;
1935
1936 return 0;
1937 error_vsi:
1938 vpu_dec_deinit(&instance->vpu);
1939 error_vpu_init:
1940 kfree(instance);
1941
1942 return ret;
1943 }
1944
vdec_av1_slice_deinit(void * h_vdec)1945 static void vdec_av1_slice_deinit(void *h_vdec)
1946 {
1947 struct vdec_av1_slice_instance *instance = h_vdec;
1948
1949 if (!instance)
1950 return;
1951 mtk_vdec_debug(instance->ctx, "h_vdec 0x%p\n", h_vdec);
1952 vpu_dec_deinit(&instance->vpu);
1953 vdec_av1_slice_free_working_buffer(instance);
1954 vdec_msg_queue_deinit(&instance->ctx->msg_queue, instance->ctx);
1955 kfree(instance);
1956 }
1957
vdec_av1_slice_flush(void * h_vdec,struct mtk_vcodec_mem * bs,struct vdec_fb * fb,bool * res_chg)1958 static int vdec_av1_slice_flush(void *h_vdec, struct mtk_vcodec_mem *bs,
1959 struct vdec_fb *fb, bool *res_chg)
1960 {
1961 struct vdec_av1_slice_instance *instance = h_vdec;
1962 int i;
1963
1964 mtk_vdec_debug(instance->ctx, "flush ...\n");
1965
1966 vdec_msg_queue_wait_lat_buf_full(&instance->ctx->msg_queue);
1967
1968 for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++)
1969 vdec_av1_slice_clear_fb(&instance->slots.frame_info[i]);
1970
1971 return vpu_dec_reset(&instance->vpu);
1972 }
1973
vdec_av1_slice_get_pic_info(struct vdec_av1_slice_instance * instance)1974 static void vdec_av1_slice_get_pic_info(struct vdec_av1_slice_instance *instance)
1975 {
1976 struct mtk_vcodec_dec_ctx *ctx = instance->ctx;
1977 u32 data[3];
1978
1979 mtk_vdec_debug(ctx, "w %u h %u\n", ctx->picinfo.pic_w, ctx->picinfo.pic_h);
1980
1981 data[0] = ctx->picinfo.pic_w;
1982 data[1] = ctx->picinfo.pic_h;
1983 data[2] = ctx->capture_fourcc;
1984 vpu_dec_get_param(&instance->vpu, data, 3, GET_PARAM_PIC_INFO);
1985
1986 ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, VCODEC_DEC_ALIGNED_64);
1987 ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, VCODEC_DEC_ALIGNED_64);
1988 ctx->picinfo.fb_sz[0] = instance->vpu.fb_sz[0];
1989 ctx->picinfo.fb_sz[1] = instance->vpu.fb_sz[1];
1990 }
1991
vdec_av1_slice_get_dpb_size(struct vdec_av1_slice_instance * instance,u32 * dpb_sz)1992 static inline void vdec_av1_slice_get_dpb_size(struct vdec_av1_slice_instance *instance,
1993 u32 *dpb_sz)
1994 {
1995 /* refer av1 specification */
1996 *dpb_sz = V4L2_AV1_TOTAL_REFS_PER_FRAME + 1;
1997 }
1998
vdec_av1_slice_get_crop_info(struct vdec_av1_slice_instance * instance,struct v4l2_rect * cr)1999 static void vdec_av1_slice_get_crop_info(struct vdec_av1_slice_instance *instance,
2000 struct v4l2_rect *cr)
2001 {
2002 struct mtk_vcodec_dec_ctx *ctx = instance->ctx;
2003
2004 cr->left = 0;
2005 cr->top = 0;
2006 cr->width = ctx->picinfo.pic_w;
2007 cr->height = ctx->picinfo.pic_h;
2008
2009 mtk_vdec_debug(ctx, "l=%d, t=%d, w=%d, h=%d\n",
2010 cr->left, cr->top, cr->width, cr->height);
2011 }
2012
vdec_av1_slice_get_param(void * h_vdec,enum vdec_get_param_type type,void * out)2013 static int vdec_av1_slice_get_param(void *h_vdec, enum vdec_get_param_type type, void *out)
2014 {
2015 struct vdec_av1_slice_instance *instance = h_vdec;
2016
2017 switch (type) {
2018 case GET_PARAM_PIC_INFO:
2019 vdec_av1_slice_get_pic_info(instance);
2020 break;
2021 case GET_PARAM_DPB_SIZE:
2022 vdec_av1_slice_get_dpb_size(instance, out);
2023 break;
2024 case GET_PARAM_CROP_INFO:
2025 vdec_av1_slice_get_crop_info(instance, out);
2026 break;
2027 default:
2028 mtk_vdec_err(instance->ctx, "invalid get parameter type=%d\n", type);
2029 return -EINVAL;
2030 }
2031
2032 return 0;
2033 }
2034
vdec_av1_slice_lat_decode(void * h_vdec,struct mtk_vcodec_mem * bs,struct vdec_fb * fb,bool * res_chg)2035 static int vdec_av1_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
2036 struct vdec_fb *fb, bool *res_chg)
2037 {
2038 struct vdec_av1_slice_instance *instance = h_vdec;
2039 struct vdec_lat_buf *lat_buf;
2040 struct vdec_av1_slice_pfc *pfc;
2041 struct vdec_av1_slice_vsi *vsi;
2042 struct mtk_vcodec_dec_ctx *ctx;
2043 int ret;
2044
2045 if (!instance || !instance->ctx)
2046 return -EINVAL;
2047
2048 ctx = instance->ctx;
2049 /* init msgQ for the first time */
2050 if (vdec_msg_queue_init(&ctx->msg_queue, ctx,
2051 vdec_av1_slice_core_decode, sizeof(*pfc))) {
2052 mtk_vdec_err(ctx, "failed to init AV1 msg queue\n");
2053 return -ENOMEM;
2054 }
2055
2056 /* bs NULL means flush decoder */
2057 if (!bs)
2058 return vdec_av1_slice_flush(h_vdec, bs, fb, res_chg);
2059
2060 lat_buf = vdec_msg_queue_dqbuf(&ctx->msg_queue.lat_ctx);
2061 if (!lat_buf) {
2062 mtk_vdec_err(ctx, "failed to get AV1 lat buf\n");
2063 return -EAGAIN;
2064 }
2065 pfc = (struct vdec_av1_slice_pfc *)lat_buf->private_data;
2066 if (!pfc) {
2067 ret = -EINVAL;
2068 goto err_free_fb_out;
2069 }
2070 vsi = &pfc->vsi;
2071
2072 ret = vdec_av1_slice_setup_lat(instance, bs, lat_buf, pfc);
2073 if (ret) {
2074 mtk_vdec_err(ctx, "failed to setup AV1 lat ret %d\n", ret);
2075 goto err_free_fb_out;
2076 }
2077
2078 vdec_av1_slice_vsi_to_remote(vsi, instance->vsi);
2079 ret = vpu_dec_start(&instance->vpu, NULL, 0);
2080 if (ret) {
2081 mtk_vdec_err(ctx, "failed to dec AV1 ret %d\n", ret);
2082 goto err_free_fb_out;
2083 }
2084 if (instance->inneracing_mode)
2085 vdec_msg_queue_qbuf(&ctx->msg_queue.core_ctx, lat_buf);
2086
2087 if (instance->irq_enabled) {
2088 ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED,
2089 WAIT_INTR_TIMEOUT_MS,
2090 MTK_VDEC_LAT0);
2091 /* update remote vsi if decode timeout */
2092 if (ret) {
2093 mtk_vdec_err(ctx, "AV1 Frame %d decode timeout %d\n", pfc->seq, ret);
2094 WRITE_ONCE(instance->vsi->state.timeout, 1);
2095 }
2096 vpu_dec_end(&instance->vpu);
2097 }
2098
2099 vdec_av1_slice_vsi_from_remote(vsi, instance->vsi);
2100 ret = vdec_av1_slice_update_lat(instance, lat_buf, pfc);
2101
2102 /* LAT trans full, re-decode */
2103 if (ret == -EAGAIN) {
2104 mtk_vdec_err(ctx, "AV1 Frame %d trans full\n", pfc->seq);
2105 if (!instance->inneracing_mode)
2106 vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf);
2107 return 0;
2108 }
2109
2110 /* LAT trans full, no more UBE or decode timeout */
2111 if (ret == -ENOMEM || vsi->state.timeout) {
2112 mtk_vdec_err(ctx, "AV1 Frame %d insufficient buffer or timeout\n", pfc->seq);
2113 if (!instance->inneracing_mode)
2114 vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf);
2115 return -EBUSY;
2116 }
2117 vsi->trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr;
2118 mtk_vdec_debug(ctx, "lat dma 1 0x%pad 0x%pad\n",
2119 &pfc->vsi.trans.dma_addr, &pfc->vsi.trans.dma_addr_end);
2120
2121 vdec_msg_queue_update_ube_wptr(&ctx->msg_queue, vsi->trans.dma_addr_end);
2122
2123 if (!instance->inneracing_mode)
2124 vdec_msg_queue_qbuf(&ctx->msg_queue.core_ctx, lat_buf);
2125 memcpy(&instance->slots, &vsi->slots, sizeof(instance->slots));
2126
2127 return 0;
2128
2129 err_free_fb_out:
2130 vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf);
2131
2132 if (pfc)
2133 mtk_vdec_err(ctx, "slice dec number: %d err: %d", pfc->seq, ret);
2134
2135 return ret;
2136 }
2137
vdec_av1_slice_core_decode(struct vdec_lat_buf * lat_buf)2138 static int vdec_av1_slice_core_decode(struct vdec_lat_buf *lat_buf)
2139 {
2140 struct vdec_av1_slice_instance *instance;
2141 struct vdec_av1_slice_pfc *pfc;
2142 struct mtk_vcodec_dec_ctx *ctx = NULL;
2143 struct vdec_fb *fb = NULL;
2144 int ret = -EINVAL;
2145
2146 if (!lat_buf)
2147 return -EINVAL;
2148
2149 pfc = lat_buf->private_data;
2150 ctx = lat_buf->ctx;
2151 if (!pfc || !ctx)
2152 return -EINVAL;
2153
2154 instance = ctx->drv_handle;
2155 if (!instance)
2156 goto err;
2157
2158 fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx);
2159 if (!fb) {
2160 ret = -EBUSY;
2161 goto err;
2162 }
2163
2164 ret = vdec_av1_slice_setup_core(instance, fb, lat_buf, pfc);
2165 if (ret) {
2166 mtk_vdec_err(ctx, "vdec_av1_slice_setup_core\n");
2167 goto err;
2168 }
2169 vdec_av1_slice_vsi_to_remote(&pfc->vsi, instance->core_vsi);
2170 ret = vpu_dec_core(&instance->vpu);
2171 if (ret) {
2172 mtk_vdec_err(ctx, "vpu_dec_core\n");
2173 goto err;
2174 }
2175
2176 if (instance->irq_enabled) {
2177 ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED,
2178 WAIT_INTR_TIMEOUT_MS,
2179 MTK_VDEC_CORE);
2180 /* update remote vsi if decode timeout */
2181 if (ret) {
2182 mtk_vdec_err(ctx, "AV1 frame %d core timeout\n", pfc->seq);
2183 WRITE_ONCE(instance->vsi->state.timeout, 1);
2184 }
2185 vpu_dec_core_end(&instance->vpu);
2186 }
2187
2188 ret = vdec_av1_slice_update_core(instance, lat_buf, pfc);
2189 if (ret) {
2190 mtk_vdec_err(ctx, "vdec_av1_slice_update_core\n");
2191 goto err;
2192 }
2193
2194 mtk_vdec_debug(ctx, "core dma_addr_end 0x%pad\n",
2195 &instance->core_vsi->trans.dma_addr_end);
2196 vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, instance->core_vsi->trans.dma_addr_end);
2197
2198 ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf->src_buf_req);
2199
2200 return 0;
2201
2202 err:
2203 /* always update read pointer */
2204 vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end);
2205
2206 if (fb)
2207 ctx->dev->vdec_pdata->cap_to_disp(ctx, 1, lat_buf->src_buf_req);
2208
2209 return ret;
2210 }
2211
2212 const struct vdec_common_if vdec_av1_slice_lat_if = {
2213 .init = vdec_av1_slice_init,
2214 .decode = vdec_av1_slice_lat_decode,
2215 .get_param = vdec_av1_slice_get_param,
2216 .deinit = vdec_av1_slice_deinit,
2217 };
2218