xref: /linux/drivers/staging/media/meson/vdec/codec_vp9.c (revision 7ae9fb1b7ecbb5d85d07857943f677fd1a559b18)
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Copyright (C) 2018 Maxime Jourdan <mjourdan@baylibre.com>
4  * Copyright (C) 2015 Amlogic, Inc. All rights reserved.
5  */
6 
7 #include <media/v4l2-mem2mem.h>
8 #include <media/videobuf2-dma-contig.h>
9 
10 #include "dos_regs.h"
11 #include "hevc_regs.h"
12 #include "codec_vp9.h"
13 #include "vdec_helpers.h"
14 #include "codec_hevc_common.h"
15 
16 /* HEVC reg mapping */
17 #define VP9_DEC_STATUS_REG	HEVC_ASSIST_SCRATCH_0
18 	#define VP9_10B_DECODE_SLICE	5
19 	#define VP9_HEAD_PARSER_DONE	0xf0
20 #define VP9_RPM_BUFFER		HEVC_ASSIST_SCRATCH_1
21 #define VP9_SHORT_TERM_RPS	HEVC_ASSIST_SCRATCH_2
22 #define VP9_ADAPT_PROB_REG	HEVC_ASSIST_SCRATCH_3
23 #define VP9_MMU_MAP_BUFFER	HEVC_ASSIST_SCRATCH_4
24 #define VP9_PPS_BUFFER		HEVC_ASSIST_SCRATCH_5
25 #define VP9_SAO_UP		HEVC_ASSIST_SCRATCH_6
26 #define VP9_STREAM_SWAP_BUFFER	HEVC_ASSIST_SCRATCH_7
27 #define VP9_STREAM_SWAP_BUFFER2 HEVC_ASSIST_SCRATCH_8
28 #define VP9_PROB_SWAP_BUFFER	HEVC_ASSIST_SCRATCH_9
29 #define VP9_COUNT_SWAP_BUFFER	HEVC_ASSIST_SCRATCH_A
30 #define VP9_SEG_MAP_BUFFER	HEVC_ASSIST_SCRATCH_B
31 #define VP9_SCALELUT		HEVC_ASSIST_SCRATCH_D
32 #define VP9_WAIT_FLAG		HEVC_ASSIST_SCRATCH_E
33 #define LMEM_DUMP_ADR		HEVC_ASSIST_SCRATCH_F
34 #define NAL_SEARCH_CTL		HEVC_ASSIST_SCRATCH_I
35 #define VP9_DECODE_MODE		HEVC_ASSIST_SCRATCH_J
36 	#define DECODE_MODE_SINGLE 0
37 #define DECODE_STOP_POS		HEVC_ASSIST_SCRATCH_K
38 #define HEVC_DECODE_COUNT	HEVC_ASSIST_SCRATCH_M
39 #define HEVC_DECODE_SIZE	HEVC_ASSIST_SCRATCH_N
40 
41 /* VP9 Constants */
42 #define LCU_SIZE		64
43 #define MAX_REF_PIC_NUM		24
44 #define REFS_PER_FRAME		3
45 #define REF_FRAMES		8
46 #define MV_MEM_UNIT		0x240
47 #define ADAPT_PROB_SIZE		0xf80
48 
49 enum FRAME_TYPE {
50 	KEY_FRAME = 0,
51 	INTER_FRAME = 1,
52 	FRAME_TYPES,
53 };
54 
55 /* VP9 Workspace layout */
56 #define MPRED_MV_BUF_SIZE 0x120000
57 
58 #define IPP_SIZE	0x4000
59 #define SAO_ABV_SIZE	0x30000
60 #define SAO_VB_SIZE	0x30000
61 #define SH_TM_RPS_SIZE	0x800
62 #define VPS_SIZE	0x800
63 #define SPS_SIZE	0x800
64 #define PPS_SIZE	0x2000
65 #define SAO_UP_SIZE	0x2800
66 #define SWAP_BUF_SIZE	0x800
67 #define SWAP_BUF2_SIZE	0x800
68 #define SCALELUT_SIZE	0x8000
69 #define DBLK_PARA_SIZE	0x80000
70 #define DBLK_DATA_SIZE	0x80000
71 #define SEG_MAP_SIZE	0xd800
72 #define PROB_SIZE	0x5000
73 #define COUNT_SIZE	0x3000
74 #define MMU_VBH_SIZE	0x5000
75 #define MPRED_ABV_SIZE	0x10000
76 #define MPRED_MV_SIZE	(MPRED_MV_BUF_SIZE * MAX_REF_PIC_NUM)
77 #define RPM_BUF_SIZE	0x100
78 #define LMEM_SIZE	0x800
79 
80 #define IPP_OFFSET       0x00
81 #define SAO_ABV_OFFSET   (IPP_OFFSET + IPP_SIZE)
82 #define SAO_VB_OFFSET    (SAO_ABV_OFFSET + SAO_ABV_SIZE)
83 #define SH_TM_RPS_OFFSET (SAO_VB_OFFSET + SAO_VB_SIZE)
84 #define VPS_OFFSET       (SH_TM_RPS_OFFSET + SH_TM_RPS_SIZE)
85 #define SPS_OFFSET       (VPS_OFFSET + VPS_SIZE)
86 #define PPS_OFFSET       (SPS_OFFSET + SPS_SIZE)
87 #define SAO_UP_OFFSET    (PPS_OFFSET + PPS_SIZE)
88 #define SWAP_BUF_OFFSET  (SAO_UP_OFFSET + SAO_UP_SIZE)
89 #define SWAP_BUF2_OFFSET (SWAP_BUF_OFFSET + SWAP_BUF_SIZE)
90 #define SCALELUT_OFFSET  (SWAP_BUF2_OFFSET + SWAP_BUF2_SIZE)
91 #define DBLK_PARA_OFFSET (SCALELUT_OFFSET + SCALELUT_SIZE)
92 #define DBLK_DATA_OFFSET (DBLK_PARA_OFFSET + DBLK_PARA_SIZE)
93 #define SEG_MAP_OFFSET   (DBLK_DATA_OFFSET + DBLK_DATA_SIZE)
94 #define PROB_OFFSET      (SEG_MAP_OFFSET + SEG_MAP_SIZE)
95 #define COUNT_OFFSET     (PROB_OFFSET + PROB_SIZE)
96 #define MMU_VBH_OFFSET   (COUNT_OFFSET + COUNT_SIZE)
97 #define MPRED_ABV_OFFSET (MMU_VBH_OFFSET + MMU_VBH_SIZE)
98 #define MPRED_MV_OFFSET  (MPRED_ABV_OFFSET + MPRED_ABV_SIZE)
99 #define RPM_OFFSET       (MPRED_MV_OFFSET + MPRED_MV_SIZE)
100 #define LMEM_OFFSET      (RPM_OFFSET + RPM_BUF_SIZE)
101 
102 #define SIZE_WORKSPACE	ALIGN(LMEM_OFFSET + LMEM_SIZE, 64 * SZ_1K)
103 
104 #define NONE           -1
105 #define INTRA_FRAME     0
106 #define LAST_FRAME      1
107 #define GOLDEN_FRAME    2
108 #define ALTREF_FRAME    3
109 #define MAX_REF_FRAMES  4
110 
111 /*
112  * Defines, declarations, sub-functions for vp9 de-block loop
113 	filter Thr/Lvl table update
114  * - struct segmentation is for loop filter only (removed something)
115  * - function "vp9_loop_filter_init" and "vp9_loop_filter_frame_init" will
116 	be instantiated in C_Entry
117  * - vp9_loop_filter_init run once before decoding start
118  * - vp9_loop_filter_frame_init run before every frame decoding start
119  * - set video format to VP9 is in vp9_loop_filter_init
120  */
121 #define MAX_LOOP_FILTER		63
122 #define MAX_REF_LF_DELTAS	4
123 #define MAX_MODE_LF_DELTAS	2
124 #define SEGMENT_DELTADATA	0
125 #define SEGMENT_ABSDATA		1
126 #define MAX_SEGMENTS		8
127 
128 /* VP9 PROB processing defines */
129 #define VP9_PARTITION_START      0
130 #define VP9_PARTITION_SIZE_STEP  (3 * 4)
131 #define VP9_PARTITION_ONE_SIZE   (4 * VP9_PARTITION_SIZE_STEP)
132 #define VP9_PARTITION_KEY_START  0
133 #define VP9_PARTITION_P_START    VP9_PARTITION_ONE_SIZE
134 #define VP9_PARTITION_SIZE       (2 * VP9_PARTITION_ONE_SIZE)
135 #define VP9_SKIP_START           (VP9_PARTITION_START + VP9_PARTITION_SIZE)
136 #define VP9_SKIP_SIZE            4 /* only use 3*/
137 #define VP9_TX_MODE_START        (VP9_SKIP_START + VP9_SKIP_SIZE)
138 #define VP9_TX_MODE_8_0_OFFSET   0
139 #define VP9_TX_MODE_8_1_OFFSET   1
140 #define VP9_TX_MODE_16_0_OFFSET  2
141 #define VP9_TX_MODE_16_1_OFFSET  4
142 #define VP9_TX_MODE_32_0_OFFSET  6
143 #define VP9_TX_MODE_32_1_OFFSET  9
144 #define VP9_TX_MODE_SIZE         12
145 #define VP9_COEF_START           (VP9_TX_MODE_START + VP9_TX_MODE_SIZE)
146 #define VP9_COEF_BAND_0_OFFSET   0
147 #define VP9_COEF_BAND_1_OFFSET   (VP9_COEF_BAND_0_OFFSET + 3 * 3 + 1)
148 #define VP9_COEF_BAND_2_OFFSET   (VP9_COEF_BAND_1_OFFSET + 6 * 3)
149 #define VP9_COEF_BAND_3_OFFSET   (VP9_COEF_BAND_2_OFFSET + 6 * 3)
150 #define VP9_COEF_BAND_4_OFFSET   (VP9_COEF_BAND_3_OFFSET + 6 * 3)
151 #define VP9_COEF_BAND_5_OFFSET   (VP9_COEF_BAND_4_OFFSET + 6 * 3)
152 #define VP9_COEF_SIZE_ONE_SET    100 /* ((3 + 5 * 6) * 3 + 1 padding)*/
153 #define VP9_COEF_4X4_START       (VP9_COEF_START + 0 * VP9_COEF_SIZE_ONE_SET)
154 #define VP9_COEF_8X8_START       (VP9_COEF_START + 4 * VP9_COEF_SIZE_ONE_SET)
155 #define VP9_COEF_16X16_START     (VP9_COEF_START + 8 * VP9_COEF_SIZE_ONE_SET)
156 #define VP9_COEF_32X32_START     (VP9_COEF_START + 12 * VP9_COEF_SIZE_ONE_SET)
157 #define VP9_COEF_SIZE_PLANE      (2 * VP9_COEF_SIZE_ONE_SET)
158 #define VP9_COEF_SIZE            (4 * 2 * 2 * VP9_COEF_SIZE_ONE_SET)
159 #define VP9_INTER_MODE_START     (VP9_COEF_START + VP9_COEF_SIZE)
160 #define VP9_INTER_MODE_SIZE      24 /* only use 21 (# * 7)*/
161 #define VP9_INTERP_START         (VP9_INTER_MODE_START + VP9_INTER_MODE_SIZE)
162 #define VP9_INTERP_SIZE          8
163 #define VP9_INTRA_INTER_START    (VP9_INTERP_START + VP9_INTERP_SIZE)
164 #define VP9_INTRA_INTER_SIZE     4
165 #define VP9_INTERP_INTRA_INTER_START  VP9_INTERP_START
166 #define VP9_INTERP_INTRA_INTER_SIZE   (VP9_INTERP_SIZE + VP9_INTRA_INTER_SIZE)
167 #define VP9_COMP_INTER_START     \
168 		(VP9_INTERP_INTRA_INTER_START + VP9_INTERP_INTRA_INTER_SIZE)
169 #define VP9_COMP_INTER_SIZE      5
170 #define VP9_COMP_REF_START       (VP9_COMP_INTER_START + VP9_COMP_INTER_SIZE)
171 #define VP9_COMP_REF_SIZE        5
172 #define VP9_SINGLE_REF_START     (VP9_COMP_REF_START + VP9_COMP_REF_SIZE)
173 #define VP9_SINGLE_REF_SIZE      10
174 #define VP9_REF_MODE_START       VP9_COMP_INTER_START
175 #define VP9_REF_MODE_SIZE        \
176 		(VP9_COMP_INTER_SIZE + VP9_COMP_REF_SIZE + VP9_SINGLE_REF_SIZE)
177 #define VP9_IF_Y_MODE_START      (VP9_REF_MODE_START + VP9_REF_MODE_SIZE)
178 #define VP9_IF_Y_MODE_SIZE       36
179 #define VP9_IF_UV_MODE_START     (VP9_IF_Y_MODE_START + VP9_IF_Y_MODE_SIZE)
180 #define VP9_IF_UV_MODE_SIZE      92 /* only use 90*/
181 #define VP9_MV_JOINTS_START      (VP9_IF_UV_MODE_START + VP9_IF_UV_MODE_SIZE)
182 #define VP9_MV_JOINTS_SIZE       3
183 #define VP9_MV_SIGN_0_START      (VP9_MV_JOINTS_START + VP9_MV_JOINTS_SIZE)
184 #define VP9_MV_SIGN_0_SIZE       1
185 #define VP9_MV_CLASSES_0_START   (VP9_MV_SIGN_0_START + VP9_MV_SIGN_0_SIZE)
186 #define VP9_MV_CLASSES_0_SIZE    10
187 #define VP9_MV_CLASS0_0_START    \
188 		(VP9_MV_CLASSES_0_START + VP9_MV_CLASSES_0_SIZE)
189 #define VP9_MV_CLASS0_0_SIZE     1
190 #define VP9_MV_BITS_0_START      (VP9_MV_CLASS0_0_START + VP9_MV_CLASS0_0_SIZE)
191 #define VP9_MV_BITS_0_SIZE       10
192 #define VP9_MV_SIGN_1_START      (VP9_MV_BITS_0_START + VP9_MV_BITS_0_SIZE)
193 #define VP9_MV_SIGN_1_SIZE       1
194 #define VP9_MV_CLASSES_1_START   \
195 			(VP9_MV_SIGN_1_START + VP9_MV_SIGN_1_SIZE)
196 #define VP9_MV_CLASSES_1_SIZE    10
197 #define VP9_MV_CLASS0_1_START    \
198 			(VP9_MV_CLASSES_1_START + VP9_MV_CLASSES_1_SIZE)
199 #define VP9_MV_CLASS0_1_SIZE     1
200 #define VP9_MV_BITS_1_START      \
201 			(VP9_MV_CLASS0_1_START + VP9_MV_CLASS0_1_SIZE)
202 #define VP9_MV_BITS_1_SIZE       10
203 #define VP9_MV_CLASS0_FP_0_START \
204 			(VP9_MV_BITS_1_START + VP9_MV_BITS_1_SIZE)
205 #define VP9_MV_CLASS0_FP_0_SIZE  9
206 #define VP9_MV_CLASS0_FP_1_START \
207 			(VP9_MV_CLASS0_FP_0_START + VP9_MV_CLASS0_FP_0_SIZE)
208 #define VP9_MV_CLASS0_FP_1_SIZE  9
209 #define VP9_MV_CLASS0_HP_0_START \
210 			(VP9_MV_CLASS0_FP_1_START + VP9_MV_CLASS0_FP_1_SIZE)
211 #define VP9_MV_CLASS0_HP_0_SIZE  2
212 #define VP9_MV_CLASS0_HP_1_START \
213 			(VP9_MV_CLASS0_HP_0_START + VP9_MV_CLASS0_HP_0_SIZE)
214 #define VP9_MV_CLASS0_HP_1_SIZE  2
215 #define VP9_MV_START             VP9_MV_JOINTS_START
216 #define VP9_MV_SIZE              72 /*only use 69*/
217 
218 #define VP9_TOTAL_SIZE           (VP9_MV_START + VP9_MV_SIZE)
219 
220 /* VP9 COUNT mem processing defines */
221 #define VP9_COEF_COUNT_START           0
222 #define VP9_COEF_COUNT_BAND_0_OFFSET   0
223 #define VP9_COEF_COUNT_BAND_1_OFFSET   \
224 			(VP9_COEF_COUNT_BAND_0_OFFSET + 3 * 5)
225 #define VP9_COEF_COUNT_BAND_2_OFFSET   \
226 			(VP9_COEF_COUNT_BAND_1_OFFSET + 6 * 5)
227 #define VP9_COEF_COUNT_BAND_3_OFFSET   \
228 			(VP9_COEF_COUNT_BAND_2_OFFSET + 6 * 5)
229 #define VP9_COEF_COUNT_BAND_4_OFFSET   \
230 			(VP9_COEF_COUNT_BAND_3_OFFSET + 6 * 5)
231 #define VP9_COEF_COUNT_BAND_5_OFFSET   \
232 			(VP9_COEF_COUNT_BAND_4_OFFSET + 6 * 5)
233 #define VP9_COEF_COUNT_SIZE_ONE_SET    165 /* ((3 + 5 * 6) * 5 */
234 #define VP9_COEF_COUNT_4X4_START       \
235 		(VP9_COEF_COUNT_START + 0 * VP9_COEF_COUNT_SIZE_ONE_SET)
236 #define VP9_COEF_COUNT_8X8_START       \
237 		(VP9_COEF_COUNT_START + 4 * VP9_COEF_COUNT_SIZE_ONE_SET)
238 #define VP9_COEF_COUNT_16X16_START     \
239 		(VP9_COEF_COUNT_START + 8 * VP9_COEF_COUNT_SIZE_ONE_SET)
240 #define VP9_COEF_COUNT_32X32_START     \
241 		(VP9_COEF_COUNT_START + 12 * VP9_COEF_COUNT_SIZE_ONE_SET)
242 #define VP9_COEF_COUNT_SIZE_PLANE      (2 * VP9_COEF_COUNT_SIZE_ONE_SET)
243 #define VP9_COEF_COUNT_SIZE            (4 * 2 * 2 * VP9_COEF_COUNT_SIZE_ONE_SET)
244 
245 #define VP9_INTRA_INTER_COUNT_START    \
246 		(VP9_COEF_COUNT_START + VP9_COEF_COUNT_SIZE)
247 #define VP9_INTRA_INTER_COUNT_SIZE     (4 * 2)
248 #define VP9_COMP_INTER_COUNT_START     \
249 		(VP9_INTRA_INTER_COUNT_START + VP9_INTRA_INTER_COUNT_SIZE)
250 #define VP9_COMP_INTER_COUNT_SIZE      (5 * 2)
251 #define VP9_COMP_REF_COUNT_START       \
252 		(VP9_COMP_INTER_COUNT_START + VP9_COMP_INTER_COUNT_SIZE)
253 #define VP9_COMP_REF_COUNT_SIZE        (5 * 2)
254 #define VP9_SINGLE_REF_COUNT_START     \
255 		(VP9_COMP_REF_COUNT_START + VP9_COMP_REF_COUNT_SIZE)
256 #define VP9_SINGLE_REF_COUNT_SIZE      (10 * 2)
257 #define VP9_TX_MODE_COUNT_START        \
258 		(VP9_SINGLE_REF_COUNT_START + VP9_SINGLE_REF_COUNT_SIZE)
259 #define VP9_TX_MODE_COUNT_SIZE         (12 * 2)
260 #define VP9_SKIP_COUNT_START           \
261 		(VP9_TX_MODE_COUNT_START + VP9_TX_MODE_COUNT_SIZE)
262 #define VP9_SKIP_COUNT_SIZE            (3 * 2)
263 #define VP9_MV_SIGN_0_COUNT_START      \
264 		(VP9_SKIP_COUNT_START + VP9_SKIP_COUNT_SIZE)
265 #define VP9_MV_SIGN_0_COUNT_SIZE       (1 * 2)
266 #define VP9_MV_SIGN_1_COUNT_START      \
267 		(VP9_MV_SIGN_0_COUNT_START + VP9_MV_SIGN_0_COUNT_SIZE)
268 #define VP9_MV_SIGN_1_COUNT_SIZE       (1 * 2)
269 #define VP9_MV_BITS_0_COUNT_START      \
270 		(VP9_MV_SIGN_1_COUNT_START + VP9_MV_SIGN_1_COUNT_SIZE)
271 #define VP9_MV_BITS_0_COUNT_SIZE       (10 * 2)
272 #define VP9_MV_BITS_1_COUNT_START      \
273 		(VP9_MV_BITS_0_COUNT_START + VP9_MV_BITS_0_COUNT_SIZE)
274 #define VP9_MV_BITS_1_COUNT_SIZE       (10 * 2)
275 #define VP9_MV_CLASS0_HP_0_COUNT_START \
276 		(VP9_MV_BITS_1_COUNT_START + VP9_MV_BITS_1_COUNT_SIZE)
277 #define VP9_MV_CLASS0_HP_0_COUNT_SIZE  (2 * 2)
278 #define VP9_MV_CLASS0_HP_1_COUNT_START \
279 		(VP9_MV_CLASS0_HP_0_COUNT_START + VP9_MV_CLASS0_HP_0_COUNT_SIZE)
280 #define VP9_MV_CLASS0_HP_1_COUNT_SIZE  (2 * 2)
281 
282 /* Start merge_tree */
283 #define VP9_INTER_MODE_COUNT_START     \
284 		(VP9_MV_CLASS0_HP_1_COUNT_START + VP9_MV_CLASS0_HP_1_COUNT_SIZE)
285 #define VP9_INTER_MODE_COUNT_SIZE      (7 * 4)
286 #define VP9_IF_Y_MODE_COUNT_START      \
287 		(VP9_INTER_MODE_COUNT_START + VP9_INTER_MODE_COUNT_SIZE)
288 #define VP9_IF_Y_MODE_COUNT_SIZE       (10 * 4)
289 #define VP9_IF_UV_MODE_COUNT_START     \
290 		(VP9_IF_Y_MODE_COUNT_START + VP9_IF_Y_MODE_COUNT_SIZE)
291 #define VP9_IF_UV_MODE_COUNT_SIZE      (10 * 10)
292 #define VP9_PARTITION_P_COUNT_START    \
293 		(VP9_IF_UV_MODE_COUNT_START + VP9_IF_UV_MODE_COUNT_SIZE)
294 #define VP9_PARTITION_P_COUNT_SIZE     (4 * 4 * 4)
295 #define VP9_INTERP_COUNT_START         \
296 		(VP9_PARTITION_P_COUNT_START + VP9_PARTITION_P_COUNT_SIZE)
297 #define VP9_INTERP_COUNT_SIZE          (4 * 3)
298 #define VP9_MV_JOINTS_COUNT_START      \
299 		(VP9_INTERP_COUNT_START + VP9_INTERP_COUNT_SIZE)
300 #define VP9_MV_JOINTS_COUNT_SIZE       (1 * 4)
301 #define VP9_MV_CLASSES_0_COUNT_START   \
302 		(VP9_MV_JOINTS_COUNT_START + VP9_MV_JOINTS_COUNT_SIZE)
303 #define VP9_MV_CLASSES_0_COUNT_SIZE    (1 * 11)
304 #define VP9_MV_CLASS0_0_COUNT_START    \
305 		(VP9_MV_CLASSES_0_COUNT_START + VP9_MV_CLASSES_0_COUNT_SIZE)
306 #define VP9_MV_CLASS0_0_COUNT_SIZE     (1 * 2)
307 #define VP9_MV_CLASSES_1_COUNT_START   \
308 		(VP9_MV_CLASS0_0_COUNT_START + VP9_MV_CLASS0_0_COUNT_SIZE)
309 #define VP9_MV_CLASSES_1_COUNT_SIZE    (1 * 11)
310 #define VP9_MV_CLASS0_1_COUNT_START    \
311 		(VP9_MV_CLASSES_1_COUNT_START + VP9_MV_CLASSES_1_COUNT_SIZE)
312 #define VP9_MV_CLASS0_1_COUNT_SIZE     (1 * 2)
313 #define VP9_MV_CLASS0_FP_0_COUNT_START \
314 		(VP9_MV_CLASS0_1_COUNT_START + VP9_MV_CLASS0_1_COUNT_SIZE)
315 #define VP9_MV_CLASS0_FP_0_COUNT_SIZE  (3 * 4)
316 #define VP9_MV_CLASS0_FP_1_COUNT_START \
317 		(VP9_MV_CLASS0_FP_0_COUNT_START + VP9_MV_CLASS0_FP_0_COUNT_SIZE)
318 #define VP9_MV_CLASS0_FP_1_COUNT_SIZE  (3 * 4)
319 
320 #define DC_PRED    0	/* Average of above and left pixels */
321 #define V_PRED     1	/* Vertical */
322 #define H_PRED     2	/* Horizontal */
323 #define D45_PRED   3	/* Directional 45 deg = round(arctan(1/1) * 180/pi) */
324 #define D135_PRED  4	/* Directional 135 deg = 180 - 45 */
325 #define D117_PRED  5	/* Directional 117 deg = 180 - 63 */
326 #define D153_PRED  6	/* Directional 153 deg = 180 - 27 */
327 #define D207_PRED  7	/* Directional 207 deg = 180 + 27 */
328 #define D63_PRED   8	/* Directional 63 deg = round(arctan(2/1) * 180/pi) */
329 #define TM_PRED    9	/* True-motion */
330 
331 /* Use a static inline to avoid possible side effect from num being reused */
round_power_of_two(int value,int num)332 static inline int round_power_of_two(int value, int num)
333 {
334 	return (value + (1 << (num - 1))) >> num;
335 }
336 
337 #define MODE_MV_COUNT_SAT 20
338 static const int count_to_update_factor[MODE_MV_COUNT_SAT + 1] = {
339 	0, 6, 12, 19, 25, 32, 38, 44, 51, 57, 64,
340 	70, 76, 83, 89, 96, 102, 108, 115, 121, 128
341 };
342 
343 union rpm_param {
344 	struct {
345 		u16 data[RPM_BUF_SIZE];
346 	} l;
347 	struct {
348 		u16 profile;
349 		u16 show_existing_frame;
350 		u16 frame_to_show_idx;
351 		u16 frame_type; /*1 bit*/
352 		u16 show_frame; /*1 bit*/
353 		u16 error_resilient_mode; /*1 bit*/
354 		u16 intra_only; /*1 bit*/
355 		u16 display_size_present; /*1 bit*/
356 		u16 reset_frame_context;
357 		u16 refresh_frame_flags;
358 		u16 width;
359 		u16 height;
360 		u16 display_width;
361 		u16 display_height;
362 		u16 ref_info;
363 		u16 same_frame_size;
364 		u16 mode_ref_delta_enabled;
365 		u16 ref_deltas[4];
366 		u16 mode_deltas[2];
367 		u16 filter_level;
368 		u16 sharpness_level;
369 		u16 bit_depth;
370 		u16 seg_quant_info[8];
371 		u16 seg_enabled;
372 		u16 seg_abs_delta;
373 		/* bit 15: feature enabled; bit 8, sign; bit[5:0], data */
374 		u16 seg_lf_info[8];
375 	} p;
376 };
377 
378 enum SEG_LVL_FEATURES {
379 	SEG_LVL_ALT_Q = 0,	/* Use alternate Quantizer */
380 	SEG_LVL_ALT_LF = 1,	/* Use alternate loop filter value */
381 	SEG_LVL_REF_FRAME = 2,	/* Optional Segment reference frame */
382 	SEG_LVL_SKIP = 3,	/* Optional Segment (0,0) + skip mode */
383 	SEG_LVL_MAX = 4		/* Number of features supported */
384 };
385 
386 struct segmentation {
387 	u8 enabled;
388 	u8 update_map;
389 	u8 update_data;
390 	u8 abs_delta;
391 	u8 temporal_update;
392 	s16 feature_data[MAX_SEGMENTS][SEG_LVL_MAX];
393 	unsigned int feature_mask[MAX_SEGMENTS];
394 };
395 
396 struct loop_filter_thresh {
397 	u8 mblim;
398 	u8 lim;
399 	u8 hev_thr;
400 };
401 
402 struct loop_filter_info_n {
403 	struct loop_filter_thresh lfthr[MAX_LOOP_FILTER + 1];
404 	u8 lvl[MAX_SEGMENTS][MAX_REF_FRAMES][MAX_MODE_LF_DELTAS];
405 };
406 
407 struct loopfilter {
408 	int filter_level;
409 
410 	int sharpness_level;
411 	int last_sharpness_level;
412 
413 	u8 mode_ref_delta_enabled;
414 	u8 mode_ref_delta_update;
415 
416 	/*0 = Intra, Last, GF, ARF*/
417 	signed char ref_deltas[MAX_REF_LF_DELTAS];
418 	signed char last_ref_deltas[MAX_REF_LF_DELTAS];
419 
420 	/*0 = ZERO_MV, MV*/
421 	signed char mode_deltas[MAX_MODE_LF_DELTAS];
422 	signed char last_mode_deltas[MAX_MODE_LF_DELTAS];
423 };
424 
425 struct vp9_frame {
426 	struct list_head list;
427 	struct vb2_v4l2_buffer *vbuf;
428 	int index;
429 	int intra_only;
430 	int show;
431 	int type;
432 	int done;
433 	unsigned int width;
434 	unsigned int height;
435 };
436 
437 struct codec_vp9 {
438 	/* VP9 context lock */
439 	struct mutex lock;
440 
441 	/* Common part with the HEVC decoder */
442 	struct codec_hevc_common common;
443 
444 	/* Buffer for the VP9 Workspace */
445 	void      *workspace_vaddr;
446 	dma_addr_t workspace_paddr;
447 
448 	/* Contains many information parsed from the bitstream */
449 	union rpm_param rpm_param;
450 
451 	/* Whether we detected the bitstream as 10-bit */
452 	int is_10bit;
453 
454 	/* Coded resolution reported by the hardware */
455 	u32 width, height;
456 
457 	/* All ref frames used by the HW at a given time */
458 	struct list_head ref_frames_list;
459 	u32 frames_num;
460 
461 	/* In case of downsampling (decoding with FBC but outputting in NV12M),
462 	 * we need to allocate additional buffers for FBC.
463 	 */
464 	void      *fbc_buffer_vaddr[MAX_REF_PIC_NUM];
465 	dma_addr_t fbc_buffer_paddr[MAX_REF_PIC_NUM];
466 
467 	int ref_frame_map[REF_FRAMES];
468 	int next_ref_frame_map[REF_FRAMES];
469 	struct vp9_frame *frame_refs[REFS_PER_FRAME];
470 
471 	u32 lcu_total;
472 
473 	/* loop filter */
474 	int default_filt_lvl;
475 	struct loop_filter_info_n lfi;
476 	struct loopfilter lf;
477 	struct segmentation seg_4lf;
478 
479 	struct vp9_frame *cur_frame;
480 	struct vp9_frame *prev_frame;
481 };
482 
div_r32(s64 m,int n)483 static int div_r32(s64 m, int n)
484 {
485 	s64 qu = div_s64(m, n);
486 
487 	return (int)qu;
488 }
489 
clip_prob(int p)490 static int clip_prob(int p)
491 {
492 	return clamp_val(p, 1, 255);
493 }
494 
segfeature_active(struct segmentation * seg,int segment_id,enum SEG_LVL_FEATURES feature_id)495 static int segfeature_active(struct segmentation *seg, int segment_id,
496 			     enum SEG_LVL_FEATURES feature_id)
497 {
498 	return seg->enabled &&
499 		(seg->feature_mask[segment_id] & (1 << feature_id));
500 }
501 
get_segdata(struct segmentation * seg,int segment_id,enum SEG_LVL_FEATURES feature_id)502 static int get_segdata(struct segmentation *seg, int segment_id,
503 		       enum SEG_LVL_FEATURES feature_id)
504 {
505 	return seg->feature_data[segment_id][feature_id];
506 }
507 
vp9_update_sharpness(struct loop_filter_info_n * lfi,int sharpness_lvl)508 static void vp9_update_sharpness(struct loop_filter_info_n *lfi,
509 				 int sharpness_lvl)
510 {
511 	int lvl;
512 
513 	/* For each possible value for the loop filter fill out limits*/
514 	for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) {
515 		/* Set loop filter parameters that control sharpness.*/
516 		int block_inside_limit = lvl >> ((sharpness_lvl > 0) +
517 					(sharpness_lvl > 4));
518 
519 		if (sharpness_lvl > 0) {
520 			if (block_inside_limit > (9 - sharpness_lvl))
521 				block_inside_limit = (9 - sharpness_lvl);
522 		}
523 
524 		if (block_inside_limit < 1)
525 			block_inside_limit = 1;
526 
527 		lfi->lfthr[lvl].lim = (u8)block_inside_limit;
528 		lfi->lfthr[lvl].mblim = (u8)(2 * (lvl + 2) +
529 				block_inside_limit);
530 	}
531 }
532 
533 /* Instantiate this function once when decode is started */
534 static void
vp9_loop_filter_init(struct amvdec_core * core,struct codec_vp9 * vp9)535 vp9_loop_filter_init(struct amvdec_core *core, struct codec_vp9 *vp9)
536 {
537 	struct loop_filter_info_n *lfi = &vp9->lfi;
538 	struct loopfilter *lf = &vp9->lf;
539 	struct segmentation *seg_4lf = &vp9->seg_4lf;
540 	int i;
541 
542 	memset(lfi, 0, sizeof(struct loop_filter_info_n));
543 	memset(lf, 0, sizeof(struct loopfilter));
544 	memset(seg_4lf, 0, sizeof(struct segmentation));
545 	lf->sharpness_level = 0;
546 	vp9_update_sharpness(lfi, lf->sharpness_level);
547 	lf->last_sharpness_level = lf->sharpness_level;
548 
549 	for (i = 0; i < 32; i++) {
550 		unsigned int thr;
551 
552 		thr = ((lfi->lfthr[i * 2 + 1].lim & 0x3f) << 8) |
553 			(lfi->lfthr[i * 2 + 1].mblim & 0xff);
554 		thr = (thr << 16) | ((lfi->lfthr[i * 2].lim & 0x3f) << 8) |
555 			(lfi->lfthr[i * 2].mblim & 0xff);
556 
557 		amvdec_write_dos(core, HEVC_DBLK_CFG9, thr);
558 	}
559 
560 	if (core->platform->revision >= VDEC_REVISION_SM1)
561 		amvdec_write_dos(core, HEVC_DBLK_CFGB,
562 				 (0x3 << 14) | /* dw fifo thres r and b */
563 				 (0x3 << 12) | /* dw fifo thres r or b */
564 				 (0x3 << 10) | /* dw fifo thres not r/b */
565 				 BIT(0)); /* VP9 video format */
566 	else if (core->platform->revision >= VDEC_REVISION_G12A)
567 		/* VP9 video format */
568 		amvdec_write_dos(core, HEVC_DBLK_CFGB, (0x54 << 8) | BIT(0));
569 	else
570 		amvdec_write_dos(core, HEVC_DBLK_CFGB, 0x40400001);
571 }
572 
573 static void
vp9_loop_filter_frame_init(struct amvdec_core * core,struct segmentation * seg,struct loop_filter_info_n * lfi,struct loopfilter * lf,int default_filt_lvl)574 vp9_loop_filter_frame_init(struct amvdec_core *core, struct segmentation *seg,
575 			   struct loop_filter_info_n *lfi,
576 			   struct loopfilter *lf, int default_filt_lvl)
577 {
578 	int i;
579 	int seg_id;
580 
581 	/*
582 	 * n_shift is the multiplier for lf_deltas
583 	 * the multiplier is:
584 	 * - 1 for when filter_lvl is between 0 and 31
585 	 * - 2 when filter_lvl is between 32 and 63
586 	 */
587 	const int scale = 1 << (default_filt_lvl >> 5);
588 
589 	/* update limits if sharpness has changed */
590 	if (lf->last_sharpness_level != lf->sharpness_level) {
591 		vp9_update_sharpness(lfi, lf->sharpness_level);
592 		lf->last_sharpness_level = lf->sharpness_level;
593 
594 		/* Write to register */
595 		for (i = 0; i < 32; i++) {
596 			unsigned int thr;
597 
598 			thr = ((lfi->lfthr[i * 2 + 1].lim & 0x3f) << 8) |
599 			      (lfi->lfthr[i * 2 + 1].mblim & 0xff);
600 			thr = (thr << 16) |
601 			      ((lfi->lfthr[i * 2].lim & 0x3f) << 8) |
602 			      (lfi->lfthr[i * 2].mblim & 0xff);
603 
604 			amvdec_write_dos(core, HEVC_DBLK_CFG9, thr);
605 		}
606 	}
607 
608 	for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
609 		int lvl_seg = default_filt_lvl;
610 
611 		if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) {
612 			const int data = get_segdata(seg, seg_id,
613 						SEG_LVL_ALT_LF);
614 			lvl_seg = clamp_t(int,
615 					  seg->abs_delta == SEGMENT_ABSDATA ?
616 						data : default_filt_lvl + data,
617 					  0, MAX_LOOP_FILTER);
618 		}
619 
620 		if (!lf->mode_ref_delta_enabled) {
621 			/*
622 			 * We could get rid of this if we assume that deltas
623 			 * are set to zero when not in use.
624 			 * encoder always uses deltas
625 			 */
626 			memset(lfi->lvl[seg_id], lvl_seg,
627 			       sizeof(lfi->lvl[seg_id]));
628 		} else {
629 			int ref, mode;
630 			const int intra_lvl =
631 				lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
632 			lfi->lvl[seg_id][INTRA_FRAME][0] =
633 				clamp_val(intra_lvl, 0, MAX_LOOP_FILTER);
634 
635 			for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) {
636 				for (mode = 0; mode < MAX_MODE_LF_DELTAS;
637 				     ++mode) {
638 					const int inter_lvl =
639 						lvl_seg +
640 						lf->ref_deltas[ref] * scale +
641 						lf->mode_deltas[mode] * scale;
642 					lfi->lvl[seg_id][ref][mode] =
643 						clamp_val(inter_lvl, 0,
644 							  MAX_LOOP_FILTER);
645 				}
646 			}
647 		}
648 	}
649 
650 	for (i = 0; i < 16; i++) {
651 		unsigned int level;
652 
653 		level = ((lfi->lvl[i >> 1][3][i & 1] & 0x3f) << 24) |
654 			((lfi->lvl[i >> 1][2][i & 1] & 0x3f) << 16) |
655 			((lfi->lvl[i >> 1][1][i & 1] & 0x3f) << 8) |
656 			(lfi->lvl[i >> 1][0][i & 1] & 0x3f);
657 		if (!default_filt_lvl)
658 			level = 0;
659 
660 		amvdec_write_dos(core, HEVC_DBLK_CFGA, level);
661 	}
662 }
663 
codec_vp9_flush_output(struct amvdec_session * sess)664 static void codec_vp9_flush_output(struct amvdec_session *sess)
665 {
666 	struct codec_vp9 *vp9 = sess->priv;
667 	struct vp9_frame *tmp, *n;
668 
669 	mutex_lock(&vp9->lock);
670 	list_for_each_entry_safe(tmp, n, &vp9->ref_frames_list, list) {
671 		if (!tmp->done) {
672 			if (tmp->show)
673 				amvdec_dst_buf_done(sess, tmp->vbuf,
674 						    V4L2_FIELD_NONE);
675 			else
676 				v4l2_m2m_buf_queue(sess->m2m_ctx, tmp->vbuf);
677 
678 			vp9->frames_num--;
679 		}
680 
681 		list_del(&tmp->list);
682 		kfree(tmp);
683 	}
684 	mutex_unlock(&vp9->lock);
685 }
686 
codec_vp9_num_pending_bufs(struct amvdec_session * sess)687 static u32 codec_vp9_num_pending_bufs(struct amvdec_session *sess)
688 {
689 	struct codec_vp9 *vp9 = sess->priv;
690 
691 	if (!vp9)
692 		return 0;
693 
694 	return vp9->frames_num;
695 }
696 
codec_vp9_alloc_workspace(struct amvdec_core * core,struct codec_vp9 * vp9)697 static int codec_vp9_alloc_workspace(struct amvdec_core *core,
698 				     struct codec_vp9 *vp9)
699 {
700 	/* Allocate some memory for the VP9 decoder's state */
701 	vp9->workspace_vaddr = dma_alloc_coherent(core->dev, SIZE_WORKSPACE,
702 						  &vp9->workspace_paddr,
703 						  GFP_KERNEL);
704 	if (!vp9->workspace_vaddr) {
705 		dev_err(core->dev, "Failed to allocate VP9 Workspace\n");
706 		return -ENOMEM;
707 	}
708 
709 	return 0;
710 }
711 
codec_vp9_setup_workspace(struct amvdec_session * sess,struct codec_vp9 * vp9)712 static void codec_vp9_setup_workspace(struct amvdec_session *sess,
713 				      struct codec_vp9 *vp9)
714 {
715 	struct amvdec_core *core = sess->core;
716 	u32 revision = core->platform->revision;
717 	dma_addr_t wkaddr = vp9->workspace_paddr;
718 
719 	amvdec_write_dos(core, HEVCD_IPP_LINEBUFF_BASE, wkaddr + IPP_OFFSET);
720 	amvdec_write_dos(core, VP9_RPM_BUFFER, wkaddr + RPM_OFFSET);
721 	amvdec_write_dos(core, VP9_SHORT_TERM_RPS, wkaddr + SH_TM_RPS_OFFSET);
722 	amvdec_write_dos(core, VP9_PPS_BUFFER, wkaddr + PPS_OFFSET);
723 	amvdec_write_dos(core, VP9_SAO_UP, wkaddr + SAO_UP_OFFSET);
724 
725 	amvdec_write_dos(core, VP9_STREAM_SWAP_BUFFER,
726 			 wkaddr + SWAP_BUF_OFFSET);
727 	amvdec_write_dos(core, VP9_STREAM_SWAP_BUFFER2,
728 			 wkaddr + SWAP_BUF2_OFFSET);
729 	amvdec_write_dos(core, VP9_SCALELUT, wkaddr + SCALELUT_OFFSET);
730 
731 	if (core->platform->revision >= VDEC_REVISION_G12A)
732 		amvdec_write_dos(core, HEVC_DBLK_CFGE,
733 				 wkaddr + DBLK_PARA_OFFSET);
734 
735 	amvdec_write_dos(core, HEVC_DBLK_CFG4, wkaddr + DBLK_PARA_OFFSET);
736 	amvdec_write_dos(core, HEVC_DBLK_CFG5, wkaddr + DBLK_DATA_OFFSET);
737 	amvdec_write_dos(core, VP9_SEG_MAP_BUFFER, wkaddr + SEG_MAP_OFFSET);
738 	amvdec_write_dos(core, VP9_PROB_SWAP_BUFFER, wkaddr + PROB_OFFSET);
739 	amvdec_write_dos(core, VP9_COUNT_SWAP_BUFFER, wkaddr + COUNT_OFFSET);
740 	amvdec_write_dos(core, LMEM_DUMP_ADR, wkaddr + LMEM_OFFSET);
741 
742 	if (codec_hevc_use_mmu(revision, sess->pixfmt_cap, vp9->is_10bit)) {
743 		amvdec_write_dos(core, HEVC_SAO_MMU_VH0_ADDR,
744 				 wkaddr + MMU_VBH_OFFSET);
745 		amvdec_write_dos(core, HEVC_SAO_MMU_VH1_ADDR,
746 				 wkaddr + MMU_VBH_OFFSET + (MMU_VBH_SIZE / 2));
747 
748 		if (revision >= VDEC_REVISION_G12A)
749 			amvdec_write_dos(core, HEVC_ASSIST_MMU_MAP_ADDR,
750 					 vp9->common.mmu_map_paddr);
751 		else
752 			amvdec_write_dos(core, VP9_MMU_MAP_BUFFER,
753 					 vp9->common.mmu_map_paddr);
754 	}
755 }
756 
codec_vp9_start(struct amvdec_session * sess)757 static int codec_vp9_start(struct amvdec_session *sess)
758 {
759 	struct amvdec_core *core = sess->core;
760 	struct codec_vp9 *vp9;
761 	u32 val;
762 	int i;
763 	int ret;
764 
765 	vp9 = kzalloc(sizeof(*vp9), GFP_KERNEL);
766 	if (!vp9)
767 		return -ENOMEM;
768 
769 	ret = codec_vp9_alloc_workspace(core, vp9);
770 	if (ret)
771 		goto free_vp9;
772 
773 	codec_vp9_setup_workspace(sess, vp9);
774 	amvdec_write_dos_bits(core, HEVC_STREAM_CONTROL, BIT(0));
775 	/* stream_fifo_hole */
776 	if (core->platform->revision >= VDEC_REVISION_G12A)
777 		amvdec_write_dos_bits(core, HEVC_STREAM_FIFO_CTL, BIT(29));
778 
779 	val = amvdec_read_dos(core, HEVC_PARSER_INT_CONTROL) & 0x7fffffff;
780 	val |= (3 << 29) | BIT(24) | BIT(22) | BIT(7) | BIT(4) | BIT(0);
781 	amvdec_write_dos(core, HEVC_PARSER_INT_CONTROL, val);
782 	amvdec_write_dos_bits(core, HEVC_SHIFT_STATUS, BIT(0));
783 	amvdec_write_dos(core, HEVC_SHIFT_CONTROL, BIT(10) | BIT(9) |
784 			 (3 << 6) | BIT(5) | BIT(2) | BIT(1) | BIT(0));
785 	amvdec_write_dos(core, HEVC_CABAC_CONTROL, BIT(0));
786 	amvdec_write_dos(core, HEVC_PARSER_CORE_CONTROL, BIT(0));
787 	amvdec_write_dos(core, HEVC_SHIFT_STARTCODE, 0x00000001);
788 
789 	amvdec_write_dos(core, VP9_DEC_STATUS_REG, 0);
790 
791 	amvdec_write_dos(core, HEVC_PARSER_CMD_WRITE, BIT(16));
792 	for (i = 0; i < ARRAY_SIZE(vdec_hevc_parser_cmd); ++i)
793 		amvdec_write_dos(core, HEVC_PARSER_CMD_WRITE,
794 				 vdec_hevc_parser_cmd[i]);
795 
796 	amvdec_write_dos(core, HEVC_PARSER_CMD_SKIP_0, PARSER_CMD_SKIP_CFG_0);
797 	amvdec_write_dos(core, HEVC_PARSER_CMD_SKIP_1, PARSER_CMD_SKIP_CFG_1);
798 	amvdec_write_dos(core, HEVC_PARSER_CMD_SKIP_2, PARSER_CMD_SKIP_CFG_2);
799 	amvdec_write_dos(core, HEVC_PARSER_IF_CONTROL,
800 			 BIT(5) | BIT(2) | BIT(0));
801 
802 	amvdec_write_dos(core, HEVCD_IPP_TOP_CNTL, BIT(0));
803 	amvdec_write_dos(core, HEVCD_IPP_TOP_CNTL, BIT(1));
804 
805 	amvdec_write_dos(core, VP9_WAIT_FLAG, 1);
806 
807 	/* clear mailbox interrupt */
808 	amvdec_write_dos(core, HEVC_ASSIST_MBOX1_CLR_REG, 1);
809 	/* enable mailbox interrupt */
810 	amvdec_write_dos(core, HEVC_ASSIST_MBOX1_MASK, 1);
811 	/* disable PSCALE for hardware sharing */
812 	amvdec_write_dos(core, HEVC_PSCALE_CTRL, 0);
813 	/* Let the uCode do all the parsing */
814 	amvdec_write_dos(core, NAL_SEARCH_CTL, 0x8);
815 
816 	amvdec_write_dos(core, DECODE_STOP_POS, 0);
817 	amvdec_write_dos(core, VP9_DECODE_MODE, DECODE_MODE_SINGLE);
818 
819 	pr_debug("decode_count: %u; decode_size: %u\n",
820 		 amvdec_read_dos(core, HEVC_DECODE_COUNT),
821 		 amvdec_read_dos(core, HEVC_DECODE_SIZE));
822 
823 	vp9_loop_filter_init(core, vp9);
824 
825 	INIT_LIST_HEAD(&vp9->ref_frames_list);
826 	mutex_init(&vp9->lock);
827 	memset(&vp9->ref_frame_map, -1, sizeof(vp9->ref_frame_map));
828 	memset(&vp9->next_ref_frame_map, -1, sizeof(vp9->next_ref_frame_map));
829 	for (i = 0; i < REFS_PER_FRAME; ++i)
830 		vp9->frame_refs[i] = NULL;
831 	sess->priv = vp9;
832 
833 	return 0;
834 
835 free_vp9:
836 	kfree(vp9);
837 	return ret;
838 }
839 
codec_vp9_stop(struct amvdec_session * sess)840 static int codec_vp9_stop(struct amvdec_session *sess)
841 {
842 	struct amvdec_core *core = sess->core;
843 	struct codec_vp9 *vp9 = sess->priv;
844 
845 	mutex_lock(&vp9->lock);
846 	if (vp9->workspace_vaddr)
847 		dma_free_coherent(core->dev, SIZE_WORKSPACE,
848 				  vp9->workspace_vaddr,
849 				  vp9->workspace_paddr);
850 
851 	codec_hevc_free_fbc_buffers(sess, &vp9->common);
852 	mutex_unlock(&vp9->lock);
853 
854 	return 0;
855 }
856 
857 /*
858  * Program LAST & GOLDEN frames into the motion compensation reference cache
859  * controller
860  */
codec_vp9_set_mcrcc(struct amvdec_session * sess)861 static void codec_vp9_set_mcrcc(struct amvdec_session *sess)
862 {
863 	struct amvdec_core *core = sess->core;
864 	struct codec_vp9 *vp9 = sess->priv;
865 	u32 val;
866 
867 	/* Reset mcrcc */
868 	amvdec_write_dos(core, HEVCD_MCRCC_CTL1, 0x2);
869 	/* Disable on I-frame */
870 	if (vp9->cur_frame->type == KEY_FRAME || vp9->cur_frame->intra_only) {
871 		amvdec_write_dos(core, HEVCD_MCRCC_CTL1, 0x0);
872 		return;
873 	}
874 
875 	amvdec_write_dos(core, HEVCD_MPP_ANC_CANVAS_ACCCONFIG_ADDR, BIT(1));
876 	val = amvdec_read_dos(core, HEVCD_MPP_ANC_CANVAS_DATA_ADDR) & 0xffff;
877 	val |= (val << 16);
878 	amvdec_write_dos(core, HEVCD_MCRCC_CTL2, val);
879 	val = amvdec_read_dos(core, HEVCD_MPP_ANC_CANVAS_DATA_ADDR) & 0xffff;
880 	val |= (val << 16);
881 	amvdec_write_dos(core, HEVCD_MCRCC_CTL3, val);
882 
883 	/* Enable mcrcc progressive-mode */
884 	amvdec_write_dos(core, HEVCD_MCRCC_CTL1, 0xff0);
885 }
886 
codec_vp9_set_sao(struct amvdec_session * sess,struct vb2_buffer * vb)887 static void codec_vp9_set_sao(struct amvdec_session *sess,
888 			      struct vb2_buffer *vb)
889 {
890 	struct amvdec_core *core = sess->core;
891 	struct codec_vp9 *vp9 = sess->priv;
892 
893 	dma_addr_t buf_y_paddr;
894 	dma_addr_t buf_u_v_paddr;
895 	u32 val;
896 
897 	if (codec_hevc_use_downsample(sess->pixfmt_cap, vp9->is_10bit))
898 		buf_y_paddr =
899 			vp9->common.fbc_buffer_paddr[vb->index];
900 	else
901 		buf_y_paddr =
902 		       vb2_dma_contig_plane_dma_addr(vb, 0);
903 
904 	if (codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit)) {
905 		val = amvdec_read_dos(core, HEVC_SAO_CTRL5) & ~0xff0200;
906 		amvdec_write_dos(core, HEVC_SAO_CTRL5, val);
907 		amvdec_write_dos(core, HEVC_CM_BODY_START_ADDR, buf_y_paddr);
908 	}
909 
910 	if (sess->pixfmt_cap == V4L2_PIX_FMT_NV12M) {
911 		buf_y_paddr =
912 		       vb2_dma_contig_plane_dma_addr(vb, 0);
913 		buf_u_v_paddr =
914 		       vb2_dma_contig_plane_dma_addr(vb, 1);
915 		amvdec_write_dos(core, HEVC_SAO_Y_START_ADDR, buf_y_paddr);
916 		amvdec_write_dos(core, HEVC_SAO_C_START_ADDR, buf_u_v_paddr);
917 		amvdec_write_dos(core, HEVC_SAO_Y_WPTR, buf_y_paddr);
918 		amvdec_write_dos(core, HEVC_SAO_C_WPTR, buf_u_v_paddr);
919 	}
920 
921 	if (codec_hevc_use_mmu(core->platform->revision, sess->pixfmt_cap,
922 			       vp9->is_10bit)) {
923 		amvdec_write_dos(core, HEVC_CM_HEADER_START_ADDR,
924 				 vp9->common.mmu_header_paddr[vb->index]);
925 		/* use HEVC_CM_HEADER_START_ADDR */
926 		amvdec_write_dos_bits(core, HEVC_SAO_CTRL5, BIT(10));
927 	}
928 
929 	amvdec_write_dos(core, HEVC_SAO_Y_LENGTH,
930 			 amvdec_get_output_size(sess));
931 	amvdec_write_dos(core, HEVC_SAO_C_LENGTH,
932 			 (amvdec_get_output_size(sess) / 2));
933 
934 	if (core->platform->revision >= VDEC_REVISION_G12A) {
935 		amvdec_clear_dos_bits(core, HEVC_DBLK_CFGB,
936 				      BIT(4) | BIT(5) | BIT(8) | BIT(9));
937 		/* enable first, compressed write */
938 		if (codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit))
939 			amvdec_write_dos_bits(core, HEVC_DBLK_CFGB, BIT(8));
940 
941 		/* enable second, uncompressed write */
942 		if (sess->pixfmt_cap == V4L2_PIX_FMT_NV12M)
943 			amvdec_write_dos_bits(core, HEVC_DBLK_CFGB, BIT(9));
944 
945 		/* dblk pipeline mode=1 for performance */
946 		if (sess->width >= 1280)
947 			amvdec_write_dos_bits(core, HEVC_DBLK_CFGB, BIT(4));
948 
949 		pr_debug("HEVC_DBLK_CFGB: %08X\n",
950 			 amvdec_read_dos(core, HEVC_DBLK_CFGB));
951 	}
952 
953 	val = amvdec_read_dos(core, HEVC_SAO_CTRL1) & ~0x3ff0;
954 	val |= 0xff0; /* Set endianness for 2-bytes swaps (nv12) */
955 	if (core->platform->revision < VDEC_REVISION_G12A) {
956 		val &= ~0x3;
957 		if (!codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit))
958 			val |= BIT(0); /* disable cm compression */
959 		/* TOFIX: Handle Amlogic Framebuffer compression */
960 	}
961 
962 	amvdec_write_dos(core, HEVC_SAO_CTRL1, val);
963 	pr_debug("HEVC_SAO_CTRL1: %08X\n", val);
964 
965 	/* no downscale for NV12 */
966 	val = amvdec_read_dos(core, HEVC_SAO_CTRL5) & ~0xff0000;
967 	amvdec_write_dos(core, HEVC_SAO_CTRL5, val);
968 
969 	val = amvdec_read_dos(core, HEVCD_IPP_AXIIF_CONFIG) & ~0x30;
970 	val |= 0xf;
971 	val &= ~BIT(12); /* NV12 */
972 	amvdec_write_dos(core, HEVCD_IPP_AXIIF_CONFIG, val);
973 }
974 
codec_vp9_get_frame_mv_paddr(struct codec_vp9 * vp9,struct vp9_frame * frame)975 static dma_addr_t codec_vp9_get_frame_mv_paddr(struct codec_vp9 *vp9,
976 					       struct vp9_frame *frame)
977 {
978 	return vp9->workspace_paddr + MPRED_MV_OFFSET +
979 	       (frame->index * MPRED_MV_BUF_SIZE);
980 }
981 
codec_vp9_set_mpred_mv(struct amvdec_core * core,struct codec_vp9 * vp9)982 static void codec_vp9_set_mpred_mv(struct amvdec_core *core,
983 				   struct codec_vp9 *vp9)
984 {
985 	int mpred_mv_rd_end_addr;
986 	int use_prev_frame_mvs = vp9->prev_frame->width ==
987 					vp9->cur_frame->width &&
988 				 vp9->prev_frame->height ==
989 					vp9->cur_frame->height &&
990 				 !vp9->prev_frame->intra_only &&
991 				 vp9->prev_frame->show &&
992 				 vp9->prev_frame->type != KEY_FRAME;
993 
994 	amvdec_write_dos(core, HEVC_MPRED_CTRL3, 0x24122412);
995 	amvdec_write_dos(core, HEVC_MPRED_ABV_START_ADDR,
996 			 vp9->workspace_paddr + MPRED_ABV_OFFSET);
997 
998 	amvdec_clear_dos_bits(core, HEVC_MPRED_CTRL4, BIT(6));
999 	if (use_prev_frame_mvs)
1000 		amvdec_write_dos_bits(core, HEVC_MPRED_CTRL4, BIT(6));
1001 
1002 	amvdec_write_dos(core, HEVC_MPRED_MV_WR_START_ADDR,
1003 			 codec_vp9_get_frame_mv_paddr(vp9, vp9->cur_frame));
1004 	amvdec_write_dos(core, HEVC_MPRED_MV_WPTR,
1005 			 codec_vp9_get_frame_mv_paddr(vp9, vp9->cur_frame));
1006 
1007 	amvdec_write_dos(core, HEVC_MPRED_MV_RD_START_ADDR,
1008 			 codec_vp9_get_frame_mv_paddr(vp9, vp9->prev_frame));
1009 	amvdec_write_dos(core, HEVC_MPRED_MV_RPTR,
1010 			 codec_vp9_get_frame_mv_paddr(vp9, vp9->prev_frame));
1011 
1012 	mpred_mv_rd_end_addr =
1013 			codec_vp9_get_frame_mv_paddr(vp9, vp9->prev_frame) +
1014 			(vp9->lcu_total * MV_MEM_UNIT);
1015 	amvdec_write_dos(core, HEVC_MPRED_MV_RD_END_ADDR, mpred_mv_rd_end_addr);
1016 }
1017 
codec_vp9_update_next_ref(struct codec_vp9 * vp9)1018 static void codec_vp9_update_next_ref(struct codec_vp9 *vp9)
1019 {
1020 	union rpm_param *param = &vp9->rpm_param;
1021 	u32 buf_idx = vp9->cur_frame->index;
1022 	int ref_index = 0;
1023 	int refresh_frame_flags;
1024 	int mask;
1025 
1026 	refresh_frame_flags = vp9->cur_frame->type == KEY_FRAME ?
1027 				0xff : param->p.refresh_frame_flags;
1028 
1029 	for (mask = refresh_frame_flags; mask; mask >>= 1) {
1030 		pr_debug("mask=%08X; ref_index=%d\n", mask, ref_index);
1031 		if (mask & 1)
1032 			vp9->next_ref_frame_map[ref_index] = buf_idx;
1033 		else
1034 			vp9->next_ref_frame_map[ref_index] =
1035 				vp9->ref_frame_map[ref_index];
1036 
1037 		++ref_index;
1038 	}
1039 
1040 	for (; ref_index < REF_FRAMES; ++ref_index)
1041 		vp9->next_ref_frame_map[ref_index] =
1042 			vp9->ref_frame_map[ref_index];
1043 }
1044 
codec_vp9_save_refs(struct codec_vp9 * vp9)1045 static void codec_vp9_save_refs(struct codec_vp9 *vp9)
1046 {
1047 	union rpm_param *param = &vp9->rpm_param;
1048 	int i;
1049 
1050 	for (i = 0; i < REFS_PER_FRAME; ++i) {
1051 		const int ref = (param->p.ref_info >>
1052 				 (((REFS_PER_FRAME - i - 1) * 4) + 1)) & 0x7;
1053 
1054 		if (vp9->ref_frame_map[ref] < 0)
1055 			continue;
1056 
1057 		pr_warn("%s: FIXME, would need to save ref %d\n",
1058 			__func__, vp9->ref_frame_map[ref]);
1059 	}
1060 }
1061 
codec_vp9_update_ref(struct codec_vp9 * vp9)1062 static void codec_vp9_update_ref(struct codec_vp9 *vp9)
1063 {
1064 	union rpm_param *param = &vp9->rpm_param;
1065 	int ref_index = 0;
1066 	int mask;
1067 	int refresh_frame_flags;
1068 
1069 	if (!vp9->cur_frame)
1070 		return;
1071 
1072 	refresh_frame_flags = vp9->cur_frame->type == KEY_FRAME ?
1073 				0xff : param->p.refresh_frame_flags;
1074 
1075 	for (mask = refresh_frame_flags; mask; mask >>= 1) {
1076 		vp9->ref_frame_map[ref_index] =
1077 			vp9->next_ref_frame_map[ref_index];
1078 		++ref_index;
1079 	}
1080 
1081 	if (param->p.show_existing_frame)
1082 		return;
1083 
1084 	for (; ref_index < REF_FRAMES; ++ref_index)
1085 		vp9->ref_frame_map[ref_index] =
1086 			vp9->next_ref_frame_map[ref_index];
1087 }
1088 
codec_vp9_get_frame_by_idx(struct codec_vp9 * vp9,int idx)1089 static struct vp9_frame *codec_vp9_get_frame_by_idx(struct codec_vp9 *vp9,
1090 						    int idx)
1091 {
1092 	struct vp9_frame *frame;
1093 
1094 	list_for_each_entry(frame, &vp9->ref_frames_list, list) {
1095 		if (frame->index == idx)
1096 			return frame;
1097 	}
1098 
1099 	return NULL;
1100 }
1101 
codec_vp9_sync_ref(struct codec_vp9 * vp9)1102 static void codec_vp9_sync_ref(struct codec_vp9 *vp9)
1103 {
1104 	union rpm_param *param = &vp9->rpm_param;
1105 	int i;
1106 
1107 	for (i = 0; i < REFS_PER_FRAME; ++i) {
1108 		const int ref = (param->p.ref_info >>
1109 				 (((REFS_PER_FRAME - i - 1) * 4) + 1)) & 0x7;
1110 		const int idx = vp9->ref_frame_map[ref];
1111 
1112 		vp9->frame_refs[i] = codec_vp9_get_frame_by_idx(vp9, idx);
1113 		if (!vp9->frame_refs[i])
1114 			pr_warn("%s: couldn't find VP9 ref %d\n", __func__,
1115 				idx);
1116 	}
1117 }
1118 
codec_vp9_set_refs(struct amvdec_session * sess,struct codec_vp9 * vp9)1119 static void codec_vp9_set_refs(struct amvdec_session *sess,
1120 			       struct codec_vp9 *vp9)
1121 {
1122 	struct amvdec_core *core = sess->core;
1123 	int i;
1124 
1125 	for (i = 0; i < REFS_PER_FRAME; ++i) {
1126 		struct vp9_frame *frame = vp9->frame_refs[i];
1127 		int id_y;
1128 		int id_u_v;
1129 
1130 		if (!frame)
1131 			continue;
1132 
1133 		if (codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit)) {
1134 			id_y = frame->index;
1135 			id_u_v = id_y;
1136 		} else {
1137 			id_y = frame->index * 2;
1138 			id_u_v = id_y + 1;
1139 		}
1140 
1141 		amvdec_write_dos(core, HEVCD_MPP_ANC_CANVAS_DATA_ADDR,
1142 				 (id_u_v << 16) | (id_u_v << 8) | id_y);
1143 	}
1144 }
1145 
codec_vp9_set_mc(struct amvdec_session * sess,struct codec_vp9 * vp9)1146 static void codec_vp9_set_mc(struct amvdec_session *sess,
1147 			     struct codec_vp9 *vp9)
1148 {
1149 	struct amvdec_core *core = sess->core;
1150 	u32 scale = 0;
1151 	u32 sz;
1152 	int i;
1153 
1154 	amvdec_write_dos(core, HEVCD_MPP_ANC_CANVAS_ACCCONFIG_ADDR, 1);
1155 	codec_vp9_set_refs(sess, vp9);
1156 	amvdec_write_dos(core, HEVCD_MPP_ANC_CANVAS_ACCCONFIG_ADDR,
1157 			 (16 << 8) | 1);
1158 	codec_vp9_set_refs(sess, vp9);
1159 
1160 	amvdec_write_dos(core, VP9D_MPP_REFINFO_TBL_ACCCONFIG, BIT(2));
1161 	for (i = 0; i < REFS_PER_FRAME; ++i) {
1162 		if (!vp9->frame_refs[i])
1163 			continue;
1164 
1165 		if (vp9->frame_refs[i]->width != vp9->width ||
1166 		    vp9->frame_refs[i]->height != vp9->height)
1167 			scale = 1;
1168 
1169 		sz = amvdec_am21c_body_size(vp9->frame_refs[i]->width,
1170 					    vp9->frame_refs[i]->height);
1171 
1172 		amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA,
1173 				 vp9->frame_refs[i]->width);
1174 		amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA,
1175 				 vp9->frame_refs[i]->height);
1176 		amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA,
1177 				 (vp9->frame_refs[i]->width << 14) /
1178 				 vp9->width);
1179 		amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA,
1180 				 (vp9->frame_refs[i]->height << 14) /
1181 				 vp9->height);
1182 		amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA, sz >> 5);
1183 	}
1184 
1185 	amvdec_write_dos(core, VP9D_MPP_REF_SCALE_ENBL, scale);
1186 }
1187 
codec_vp9_get_new_frame(struct amvdec_session * sess)1188 static struct vp9_frame *codec_vp9_get_new_frame(struct amvdec_session *sess)
1189 {
1190 	struct codec_vp9 *vp9 = sess->priv;
1191 	union rpm_param *param = &vp9->rpm_param;
1192 	struct vb2_v4l2_buffer *vbuf;
1193 	struct vp9_frame *new_frame;
1194 
1195 	new_frame = kzalloc(sizeof(*new_frame), GFP_KERNEL);
1196 	if (!new_frame)
1197 		return NULL;
1198 
1199 	vbuf = v4l2_m2m_dst_buf_remove(sess->m2m_ctx);
1200 	if (!vbuf) {
1201 		dev_err(sess->core->dev, "No dst buffer available\n");
1202 		kfree(new_frame);
1203 		return NULL;
1204 	}
1205 
1206 	while (codec_vp9_get_frame_by_idx(vp9, vbuf->vb2_buf.index)) {
1207 		struct vb2_v4l2_buffer *old_vbuf = vbuf;
1208 
1209 		vbuf = v4l2_m2m_dst_buf_remove(sess->m2m_ctx);
1210 		v4l2_m2m_buf_queue(sess->m2m_ctx, old_vbuf);
1211 		if (!vbuf) {
1212 			dev_err(sess->core->dev, "No dst buffer available\n");
1213 			kfree(new_frame);
1214 			return NULL;
1215 		}
1216 	}
1217 
1218 	new_frame->vbuf = vbuf;
1219 	new_frame->index = vbuf->vb2_buf.index;
1220 	new_frame->intra_only = param->p.intra_only;
1221 	new_frame->show = param->p.show_frame;
1222 	new_frame->type = param->p.frame_type;
1223 	new_frame->width = vp9->width;
1224 	new_frame->height = vp9->height;
1225 	list_add_tail(&new_frame->list, &vp9->ref_frames_list);
1226 	vp9->frames_num++;
1227 
1228 	return new_frame;
1229 }
1230 
codec_vp9_show_existing_frame(struct codec_vp9 * vp9)1231 static void codec_vp9_show_existing_frame(struct codec_vp9 *vp9)
1232 {
1233 	union rpm_param *param = &vp9->rpm_param;
1234 
1235 	if (!param->p.show_existing_frame)
1236 		return;
1237 
1238 	pr_debug("showing frame %u\n", param->p.frame_to_show_idx);
1239 }
1240 
codec_vp9_rm_noshow_frame(struct amvdec_session * sess)1241 static void codec_vp9_rm_noshow_frame(struct amvdec_session *sess)
1242 {
1243 	struct codec_vp9 *vp9 = sess->priv;
1244 	struct vp9_frame *tmp;
1245 
1246 	list_for_each_entry(tmp, &vp9->ref_frames_list, list) {
1247 		if (tmp->show)
1248 			continue;
1249 
1250 		pr_debug("rm noshow: %u\n", tmp->index);
1251 		v4l2_m2m_buf_queue(sess->m2m_ctx, tmp->vbuf);
1252 		list_del(&tmp->list);
1253 		kfree(tmp);
1254 		vp9->frames_num--;
1255 		return;
1256 	}
1257 }
1258 
codec_vp9_process_frame(struct amvdec_session * sess)1259 static void codec_vp9_process_frame(struct amvdec_session *sess)
1260 {
1261 	struct amvdec_core *core = sess->core;
1262 	struct codec_vp9 *vp9 = sess->priv;
1263 	union rpm_param *param = &vp9->rpm_param;
1264 	int intra_only;
1265 
1266 	if (!param->p.show_frame)
1267 		codec_vp9_rm_noshow_frame(sess);
1268 
1269 	vp9->cur_frame = codec_vp9_get_new_frame(sess);
1270 	if (!vp9->cur_frame)
1271 		return;
1272 
1273 	pr_debug("frame %d: type: %08X; show_exist: %u; show: %u, intra_only: %u\n",
1274 		 vp9->cur_frame->index,
1275 		 param->p.frame_type, param->p.show_existing_frame,
1276 		 param->p.show_frame, param->p.intra_only);
1277 
1278 	if (param->p.frame_type != KEY_FRAME)
1279 		codec_vp9_sync_ref(vp9);
1280 	codec_vp9_update_next_ref(vp9);
1281 	codec_vp9_show_existing_frame(vp9);
1282 
1283 	if (codec_hevc_use_mmu(core->platform->revision, sess->pixfmt_cap,
1284 			       vp9->is_10bit))
1285 		codec_hevc_fill_mmu_map(sess, &vp9->common,
1286 					&vp9->cur_frame->vbuf->vb2_buf);
1287 
1288 	intra_only = param->p.show_frame ? 0 : param->p.intra_only;
1289 
1290 	/* clear mpred (for keyframe only) */
1291 	if (param->p.frame_type != KEY_FRAME && !intra_only) {
1292 		codec_vp9_set_mc(sess, vp9);
1293 		codec_vp9_set_mpred_mv(core, vp9);
1294 	} else {
1295 		amvdec_clear_dos_bits(core, HEVC_MPRED_CTRL4, BIT(6));
1296 	}
1297 
1298 	amvdec_write_dos(core, HEVC_PARSER_PICTURE_SIZE,
1299 			 (vp9->height << 16) | vp9->width);
1300 	codec_vp9_set_mcrcc(sess);
1301 	codec_vp9_set_sao(sess, &vp9->cur_frame->vbuf->vb2_buf);
1302 
1303 	vp9_loop_filter_frame_init(core, &vp9->seg_4lf,
1304 				   &vp9->lfi, &vp9->lf,
1305 				   vp9->default_filt_lvl);
1306 
1307 	/* ask uCode to start decoding */
1308 	amvdec_write_dos(core, VP9_DEC_STATUS_REG, VP9_10B_DECODE_SLICE);
1309 }
1310 
codec_vp9_process_lf(struct codec_vp9 * vp9)1311 static void codec_vp9_process_lf(struct codec_vp9 *vp9)
1312 {
1313 	union rpm_param *param = &vp9->rpm_param;
1314 	int i;
1315 
1316 	vp9->lf.mode_ref_delta_enabled = param->p.mode_ref_delta_enabled;
1317 	vp9->lf.sharpness_level = param->p.sharpness_level;
1318 	vp9->default_filt_lvl = param->p.filter_level;
1319 	vp9->seg_4lf.enabled = param->p.seg_enabled;
1320 	vp9->seg_4lf.abs_delta = param->p.seg_abs_delta;
1321 
1322 	for (i = 0; i < 4; i++)
1323 		vp9->lf.ref_deltas[i] = param->p.ref_deltas[i];
1324 
1325 	for (i = 0; i < 2; i++)
1326 		vp9->lf.mode_deltas[i] = param->p.mode_deltas[i];
1327 
1328 	for (i = 0; i < MAX_SEGMENTS; i++)
1329 		vp9->seg_4lf.feature_mask[i] =
1330 			(param->p.seg_lf_info[i] & 0x8000) ?
1331 				(1 << SEG_LVL_ALT_LF) : 0;
1332 
1333 	for (i = 0; i < MAX_SEGMENTS; i++)
1334 		vp9->seg_4lf.feature_data[i][SEG_LVL_ALT_LF] =
1335 			(param->p.seg_lf_info[i] & 0x100) ?
1336 				-(param->p.seg_lf_info[i] & 0x3f)
1337 				: (param->p.seg_lf_info[i] & 0x3f);
1338 }
1339 
codec_vp9_resume(struct amvdec_session * sess)1340 static void codec_vp9_resume(struct amvdec_session *sess)
1341 {
1342 	struct codec_vp9 *vp9 = sess->priv;
1343 
1344 	mutex_lock(&vp9->lock);
1345 	if (codec_hevc_setup_buffers(sess, &vp9->common, vp9->is_10bit)) {
1346 		mutex_unlock(&vp9->lock);
1347 		amvdec_abort(sess);
1348 		return;
1349 	}
1350 
1351 	codec_vp9_setup_workspace(sess, vp9);
1352 	codec_hevc_setup_decode_head(sess, vp9->is_10bit);
1353 	codec_vp9_process_lf(vp9);
1354 	codec_vp9_process_frame(sess);
1355 
1356 	mutex_unlock(&vp9->lock);
1357 }
1358 
1359 /*
1360  * The RPM section within the workspace contains
1361  * many information regarding the parsed bitstream
1362  */
codec_vp9_fetch_rpm(struct amvdec_session * sess)1363 static void codec_vp9_fetch_rpm(struct amvdec_session *sess)
1364 {
1365 	struct codec_vp9 *vp9 = sess->priv;
1366 	u16 *rpm_vaddr = vp9->workspace_vaddr + RPM_OFFSET;
1367 	int i, j;
1368 
1369 	for (i = 0; i < RPM_BUF_SIZE; i += 4)
1370 		for (j = 0; j < 4; j++)
1371 			vp9->rpm_param.l.data[i + j] = rpm_vaddr[i + 3 - j];
1372 }
1373 
codec_vp9_process_rpm(struct codec_vp9 * vp9)1374 static int codec_vp9_process_rpm(struct codec_vp9 *vp9)
1375 {
1376 	union rpm_param *param = &vp9->rpm_param;
1377 	int src_changed = 0;
1378 	int is_10bit = 0;
1379 	int pic_width_64 = ALIGN(param->p.width, 64);
1380 	int pic_height_32 = ALIGN(param->p.height, 32);
1381 	int pic_width_lcu  = (pic_width_64 % LCU_SIZE) ?
1382 				pic_width_64 / LCU_SIZE  + 1
1383 				: pic_width_64 / LCU_SIZE;
1384 	int pic_height_lcu = (pic_height_32 % LCU_SIZE) ?
1385 				pic_height_32 / LCU_SIZE + 1
1386 				: pic_height_32 / LCU_SIZE;
1387 	vp9->lcu_total = pic_width_lcu * pic_height_lcu;
1388 
1389 	if (param->p.bit_depth == 10)
1390 		is_10bit = 1;
1391 
1392 	if (vp9->width != param->p.width || vp9->height != param->p.height ||
1393 	    vp9->is_10bit != is_10bit)
1394 		src_changed = 1;
1395 
1396 	vp9->width = param->p.width;
1397 	vp9->height = param->p.height;
1398 	vp9->is_10bit = is_10bit;
1399 
1400 	pr_debug("width: %u; height: %u; is_10bit: %d; src_changed: %d\n",
1401 		 vp9->width, vp9->height, is_10bit, src_changed);
1402 
1403 	return src_changed;
1404 }
1405 
codec_vp9_is_ref(struct codec_vp9 * vp9,struct vp9_frame * frame)1406 static bool codec_vp9_is_ref(struct codec_vp9 *vp9, struct vp9_frame *frame)
1407 {
1408 	int i;
1409 
1410 	for (i = 0; i < REF_FRAMES; ++i)
1411 		if (vp9->ref_frame_map[i] == frame->index)
1412 			return true;
1413 
1414 	return false;
1415 }
1416 
codec_vp9_show_frame(struct amvdec_session * sess)1417 static void codec_vp9_show_frame(struct amvdec_session *sess)
1418 {
1419 	struct codec_vp9 *vp9 = sess->priv;
1420 	struct vp9_frame *tmp, *n;
1421 
1422 	list_for_each_entry_safe(tmp, n, &vp9->ref_frames_list, list) {
1423 		if (!tmp->show || tmp == vp9->cur_frame)
1424 			continue;
1425 
1426 		if (!tmp->done) {
1427 			pr_debug("Doning %u\n", tmp->index);
1428 			amvdec_dst_buf_done(sess, tmp->vbuf, V4L2_FIELD_NONE);
1429 			tmp->done = 1;
1430 			vp9->frames_num--;
1431 		}
1432 
1433 		if (codec_vp9_is_ref(vp9, tmp) || tmp == vp9->prev_frame)
1434 			continue;
1435 
1436 		pr_debug("deleting %d\n", tmp->index);
1437 		list_del(&tmp->list);
1438 		kfree(tmp);
1439 	}
1440 }
1441 
vp9_tree_merge_probs(unsigned int * prev_prob,unsigned int * cur_prob,int coef_node_start,int tree_left,int tree_right,int tree_i,int node)1442 static void vp9_tree_merge_probs(unsigned int *prev_prob,
1443 				 unsigned int *cur_prob,
1444 				 int coef_node_start, int tree_left,
1445 				 int tree_right,
1446 				 int tree_i, int node)
1447 {
1448 	int prob_32, prob_res, prob_shift;
1449 	int pre_prob, new_prob;
1450 	int den, m_count, get_prob, factor;
1451 
1452 	prob_32 = prev_prob[coef_node_start / 4 * 2];
1453 	prob_res = coef_node_start & 3;
1454 	prob_shift = prob_res * 8;
1455 	pre_prob = (prob_32 >> prob_shift) & 0xff;
1456 
1457 	den = tree_left + tree_right;
1458 
1459 	if (den == 0) {
1460 		new_prob = pre_prob;
1461 	} else {
1462 		m_count = min(den, MODE_MV_COUNT_SAT);
1463 		get_prob =
1464 			clip_prob(div_r32(((int64_t)tree_left * 256 +
1465 					   (den >> 1)),
1466 					  den));
1467 
1468 		/* weighted_prob */
1469 		factor = count_to_update_factor[m_count];
1470 		new_prob = round_power_of_two(pre_prob * (256 - factor) +
1471 					      get_prob * factor, 8);
1472 	}
1473 
1474 	cur_prob[coef_node_start / 4 * 2] =
1475 		(cur_prob[coef_node_start / 4 * 2] & (~(0xff << prob_shift))) |
1476 		(new_prob << prob_shift);
1477 }
1478 
adapt_coef_probs_cxt(unsigned int * prev_prob,unsigned int * cur_prob,unsigned int * count,int update_factor,int cxt_num,int coef_cxt_start,int coef_count_cxt_start)1479 static void adapt_coef_probs_cxt(unsigned int *prev_prob,
1480 				 unsigned int *cur_prob,
1481 				 unsigned int *count,
1482 				 int update_factor,
1483 				 int cxt_num,
1484 				 int coef_cxt_start,
1485 				 int coef_count_cxt_start)
1486 {
1487 	int prob_32, prob_res, prob_shift;
1488 	int pre_prob, new_prob;
1489 	int num, den, m_count, get_prob, factor;
1490 	int node, coef_node_start;
1491 	int count_sat = 24;
1492 	int cxt;
1493 
1494 	for (cxt = 0; cxt < cxt_num; cxt++) {
1495 		const int n0 = count[coef_count_cxt_start];
1496 		const int n1 = count[coef_count_cxt_start + 1];
1497 		const int n2 = count[coef_count_cxt_start + 2];
1498 		const int neob = count[coef_count_cxt_start + 3];
1499 		const int nneob = count[coef_count_cxt_start + 4];
1500 		const unsigned int branch_ct[3][2] = {
1501 			{ neob, nneob },
1502 			{ n0, n1 + n2 },
1503 			{ n1, n2 }
1504 		};
1505 
1506 		coef_node_start = coef_cxt_start;
1507 		for (node = 0 ; node < 3 ; node++) {
1508 			prob_32 = prev_prob[coef_node_start / 4 * 2];
1509 			prob_res = coef_node_start & 3;
1510 			prob_shift = prob_res * 8;
1511 			pre_prob = (prob_32 >> prob_shift) & 0xff;
1512 
1513 			/* get binary prob */
1514 			num = branch_ct[node][0];
1515 			den = branch_ct[node][0] + branch_ct[node][1];
1516 			m_count = min(den, count_sat);
1517 
1518 			get_prob = (den == 0) ?
1519 					128u :
1520 					clip_prob(div_r32(((int64_t)num * 256 +
1521 							  (den >> 1)), den));
1522 
1523 			factor = update_factor * m_count / count_sat;
1524 			new_prob =
1525 				round_power_of_two(pre_prob * (256 - factor) +
1526 						   get_prob * factor, 8);
1527 
1528 			cur_prob[coef_node_start / 4 * 2] =
1529 				(cur_prob[coef_node_start / 4 * 2] &
1530 				 (~(0xff << prob_shift))) |
1531 				(new_prob << prob_shift);
1532 
1533 			coef_node_start += 1;
1534 		}
1535 
1536 		coef_cxt_start = coef_cxt_start + 3;
1537 		coef_count_cxt_start = coef_count_cxt_start + 5;
1538 	}
1539 }
1540 
adapt_coef_probs(int prev_kf,int cur_kf,int pre_fc,unsigned int * prev_prob,unsigned int * cur_prob,unsigned int * count)1541 static void adapt_coef_probs(int prev_kf, int cur_kf, int pre_fc,
1542 			     unsigned int *prev_prob, unsigned int *cur_prob,
1543 			     unsigned int *count)
1544 {
1545 	int tx_size, coef_tx_size_start, coef_count_tx_size_start;
1546 	int plane, coef_plane_start, coef_count_plane_start;
1547 	int type, coef_type_start, coef_count_type_start;
1548 	int band, coef_band_start, coef_count_band_start;
1549 	int cxt_num;
1550 	int coef_cxt_start, coef_count_cxt_start;
1551 	int node, coef_node_start, coef_count_node_start;
1552 
1553 	int tree_i, tree_left, tree_right;
1554 	int mvd_i;
1555 
1556 	int update_factor = cur_kf ? 112 : (prev_kf ? 128 : 112);
1557 
1558 	int prob_32;
1559 	int prob_res;
1560 	int prob_shift;
1561 	int pre_prob;
1562 
1563 	int den;
1564 	int get_prob;
1565 	int m_count;
1566 	int factor;
1567 
1568 	int new_prob;
1569 
1570 	for (tx_size = 0 ; tx_size < 4 ; tx_size++) {
1571 		coef_tx_size_start = VP9_COEF_START +
1572 				tx_size * 4 * VP9_COEF_SIZE_ONE_SET;
1573 		coef_count_tx_size_start = VP9_COEF_COUNT_START +
1574 				tx_size * 4 * VP9_COEF_COUNT_SIZE_ONE_SET;
1575 		coef_plane_start = coef_tx_size_start;
1576 		coef_count_plane_start = coef_count_tx_size_start;
1577 
1578 		for (plane = 0 ; plane < 2 ; plane++) {
1579 			coef_type_start = coef_plane_start;
1580 			coef_count_type_start = coef_count_plane_start;
1581 
1582 			for (type = 0 ; type < 2 ; type++) {
1583 				coef_band_start = coef_type_start;
1584 				coef_count_band_start = coef_count_type_start;
1585 
1586 				for (band = 0 ; band < 6 ; band++) {
1587 					if (band == 0)
1588 						cxt_num = 3;
1589 					else
1590 						cxt_num = 6;
1591 					coef_cxt_start = coef_band_start;
1592 					coef_count_cxt_start =
1593 						coef_count_band_start;
1594 
1595 					adapt_coef_probs_cxt(prev_prob,
1596 							     cur_prob,
1597 							     count,
1598 							     update_factor,
1599 							     cxt_num,
1600 							     coef_cxt_start,
1601 							coef_count_cxt_start);
1602 
1603 					if (band == 0) {
1604 						coef_band_start += 10;
1605 						coef_count_band_start += 15;
1606 					} else {
1607 						coef_band_start += 18;
1608 						coef_count_band_start += 30;
1609 					}
1610 				}
1611 				coef_type_start += VP9_COEF_SIZE_ONE_SET;
1612 				coef_count_type_start +=
1613 					VP9_COEF_COUNT_SIZE_ONE_SET;
1614 			}
1615 
1616 			coef_plane_start += 2 * VP9_COEF_SIZE_ONE_SET;
1617 			coef_count_plane_start +=
1618 				2 * VP9_COEF_COUNT_SIZE_ONE_SET;
1619 		}
1620 	}
1621 
1622 	if (cur_kf == 0) {
1623 		/* mode_mv_merge_probs - merge_intra_inter_prob */
1624 		for (coef_count_node_start = VP9_INTRA_INTER_COUNT_START;
1625 		     coef_count_node_start < (VP9_MV_CLASS0_HP_1_COUNT_START +
1626 					      VP9_MV_CLASS0_HP_1_COUNT_SIZE);
1627 		     coef_count_node_start += 2) {
1628 			if (coef_count_node_start ==
1629 					VP9_INTRA_INTER_COUNT_START)
1630 				coef_node_start = VP9_INTRA_INTER_START;
1631 			else if (coef_count_node_start ==
1632 					VP9_COMP_INTER_COUNT_START)
1633 				coef_node_start = VP9_COMP_INTER_START;
1634 			else if (coef_count_node_start ==
1635 					VP9_TX_MODE_COUNT_START)
1636 				coef_node_start = VP9_TX_MODE_START;
1637 			else if (coef_count_node_start ==
1638 					VP9_SKIP_COUNT_START)
1639 				coef_node_start = VP9_SKIP_START;
1640 			else if (coef_count_node_start ==
1641 					VP9_MV_SIGN_0_COUNT_START)
1642 				coef_node_start = VP9_MV_SIGN_0_START;
1643 			else if (coef_count_node_start ==
1644 					VP9_MV_SIGN_1_COUNT_START)
1645 				coef_node_start = VP9_MV_SIGN_1_START;
1646 			else if (coef_count_node_start ==
1647 					VP9_MV_BITS_0_COUNT_START)
1648 				coef_node_start = VP9_MV_BITS_0_START;
1649 			else if (coef_count_node_start ==
1650 					VP9_MV_BITS_1_COUNT_START)
1651 				coef_node_start = VP9_MV_BITS_1_START;
1652 			else /* node_start == VP9_MV_CLASS0_HP_0_COUNT_START */
1653 				coef_node_start = VP9_MV_CLASS0_HP_0_START;
1654 
1655 			den = count[coef_count_node_start] +
1656 			      count[coef_count_node_start + 1];
1657 
1658 			prob_32 = prev_prob[coef_node_start / 4 * 2];
1659 			prob_res = coef_node_start & 3;
1660 			prob_shift = prob_res * 8;
1661 			pre_prob = (prob_32 >> prob_shift) & 0xff;
1662 
1663 			if (den == 0) {
1664 				new_prob = pre_prob;
1665 			} else {
1666 				m_count = min(den, MODE_MV_COUNT_SAT);
1667 				get_prob =
1668 				clip_prob(div_r32(((int64_t)
1669 					count[coef_count_node_start] * 256 +
1670 					(den >> 1)),
1671 					den));
1672 
1673 				/* weighted prob */
1674 				factor = count_to_update_factor[m_count];
1675 				new_prob =
1676 					round_power_of_two(pre_prob *
1677 							   (256 - factor) +
1678 							   get_prob * factor,
1679 							   8);
1680 			}
1681 
1682 			cur_prob[coef_node_start / 4 * 2] =
1683 				(cur_prob[coef_node_start / 4 * 2] &
1684 				 (~(0xff << prob_shift))) |
1685 				(new_prob << prob_shift);
1686 
1687 			coef_node_start = coef_node_start + 1;
1688 		}
1689 
1690 		coef_node_start = VP9_INTER_MODE_START;
1691 		coef_count_node_start = VP9_INTER_MODE_COUNT_START;
1692 		for (tree_i = 0 ; tree_i < 7 ; tree_i++) {
1693 			for (node = 0 ; node < 3 ; node++) {
1694 				unsigned int start = coef_count_node_start;
1695 
1696 				switch (node) {
1697 				case 2:
1698 					tree_left = count[start + 1];
1699 					tree_right = count[start + 3];
1700 					break;
1701 				case 1:
1702 					tree_left = count[start + 0];
1703 					tree_right = count[start + 1] +
1704 						     count[start + 3];
1705 					break;
1706 				default:
1707 					tree_left = count[start + 2];
1708 					tree_right = count[start + 0] +
1709 						     count[start + 1] +
1710 						     count[start + 3];
1711 					break;
1712 				}
1713 
1714 				vp9_tree_merge_probs(prev_prob, cur_prob,
1715 						     coef_node_start,
1716 						     tree_left, tree_right,
1717 						     tree_i, node);
1718 
1719 				coef_node_start = coef_node_start + 1;
1720 			}
1721 
1722 			coef_count_node_start = coef_count_node_start + 4;
1723 		}
1724 
1725 		coef_node_start = VP9_IF_Y_MODE_START;
1726 		coef_count_node_start = VP9_IF_Y_MODE_COUNT_START;
1727 		for (tree_i = 0 ; tree_i < 14 ; tree_i++) {
1728 			for (node = 0 ; node < 9 ; node++) {
1729 				unsigned int start = coef_count_node_start;
1730 
1731 				switch (node) {
1732 				case 8:
1733 					tree_left =
1734 						count[start + D153_PRED];
1735 					tree_right =
1736 						count[start + D207_PRED];
1737 					break;
1738 				case 7:
1739 					tree_left =
1740 						count[start + D63_PRED];
1741 					tree_right =
1742 						count[start + D207_PRED] +
1743 						count[start + D153_PRED];
1744 					break;
1745 				case 6:
1746 					tree_left =
1747 						count[start + D45_PRED];
1748 					tree_right =
1749 						count[start + D207_PRED] +
1750 						count[start + D153_PRED] +
1751 						count[start + D63_PRED];
1752 					break;
1753 				case 5:
1754 					tree_left =
1755 						count[start + D135_PRED];
1756 					tree_right =
1757 						count[start + D117_PRED];
1758 					break;
1759 				case 4:
1760 					tree_left =
1761 						count[start + H_PRED];
1762 					tree_right =
1763 						count[start + D117_PRED] +
1764 						count[start + D135_PRED];
1765 					break;
1766 				case 3:
1767 					tree_left =
1768 						count[start + H_PRED] +
1769 						count[start + D117_PRED] +
1770 						count[start + D135_PRED];
1771 					tree_right =
1772 						count[start + D45_PRED] +
1773 						count[start + D207_PRED] +
1774 						count[start + D153_PRED] +
1775 						count[start + D63_PRED];
1776 					break;
1777 				case 2:
1778 					tree_left =
1779 						count[start + V_PRED];
1780 					tree_right =
1781 						count[start + H_PRED] +
1782 						count[start + D117_PRED] +
1783 						count[start + D135_PRED] +
1784 						count[start + D45_PRED] +
1785 						count[start + D207_PRED] +
1786 						count[start + D153_PRED] +
1787 						count[start + D63_PRED];
1788 					break;
1789 				case 1:
1790 					tree_left =
1791 						count[start + TM_PRED];
1792 					tree_right =
1793 						count[start + V_PRED] +
1794 						count[start + H_PRED] +
1795 						count[start + D117_PRED] +
1796 						count[start + D135_PRED] +
1797 						count[start + D45_PRED] +
1798 						count[start + D207_PRED] +
1799 						count[start + D153_PRED] +
1800 						count[start + D63_PRED];
1801 					break;
1802 				default:
1803 					tree_left =
1804 						count[start + DC_PRED];
1805 					tree_right =
1806 						count[start + TM_PRED] +
1807 						count[start + V_PRED] +
1808 						count[start + H_PRED] +
1809 						count[start + D117_PRED] +
1810 						count[start + D135_PRED] +
1811 						count[start + D45_PRED] +
1812 						count[start + D207_PRED] +
1813 						count[start + D153_PRED] +
1814 						count[start + D63_PRED];
1815 					break;
1816 				}
1817 
1818 				vp9_tree_merge_probs(prev_prob, cur_prob,
1819 						     coef_node_start,
1820 						     tree_left, tree_right,
1821 						     tree_i, node);
1822 
1823 				coef_node_start = coef_node_start + 1;
1824 			}
1825 			coef_count_node_start = coef_count_node_start + 10;
1826 		}
1827 
1828 		coef_node_start = VP9_PARTITION_P_START;
1829 		coef_count_node_start = VP9_PARTITION_P_COUNT_START;
1830 		for (tree_i = 0 ; tree_i < 16 ; tree_i++) {
1831 			for (node = 0 ; node < 3 ; node++) {
1832 				unsigned int start = coef_count_node_start;
1833 
1834 				switch (node) {
1835 				case 2:
1836 					tree_left = count[start + 2];
1837 					tree_right = count[start + 3];
1838 					break;
1839 				case 1:
1840 					tree_left = count[start + 1];
1841 					tree_right = count[start + 2] +
1842 						     count[start + 3];
1843 					break;
1844 				default:
1845 					tree_left = count[start + 0];
1846 					tree_right = count[start + 1] +
1847 						     count[start + 2] +
1848 						     count[start + 3];
1849 					break;
1850 				}
1851 
1852 				vp9_tree_merge_probs(prev_prob, cur_prob,
1853 						     coef_node_start,
1854 						     tree_left, tree_right,
1855 						     tree_i, node);
1856 
1857 				coef_node_start = coef_node_start + 1;
1858 			}
1859 
1860 			coef_count_node_start = coef_count_node_start + 4;
1861 		}
1862 
1863 		coef_node_start = VP9_INTERP_START;
1864 		coef_count_node_start = VP9_INTERP_COUNT_START;
1865 		for (tree_i = 0 ; tree_i < 4 ; tree_i++) {
1866 			for (node = 0 ; node < 2 ; node++) {
1867 				unsigned int start = coef_count_node_start;
1868 
1869 				switch (node) {
1870 				case 1:
1871 					tree_left = count[start + 1];
1872 					tree_right = count[start + 2];
1873 					break;
1874 				default:
1875 					tree_left = count[start + 0];
1876 					tree_right = count[start + 1] +
1877 						     count[start + 2];
1878 					break;
1879 				}
1880 
1881 				vp9_tree_merge_probs(prev_prob, cur_prob,
1882 						     coef_node_start,
1883 						     tree_left, tree_right,
1884 						     tree_i, node);
1885 
1886 				coef_node_start = coef_node_start + 1;
1887 			}
1888 			coef_count_node_start = coef_count_node_start + 3;
1889 		}
1890 
1891 		coef_node_start = VP9_MV_JOINTS_START;
1892 		coef_count_node_start = VP9_MV_JOINTS_COUNT_START;
1893 		for (tree_i = 0 ; tree_i < 1 ; tree_i++) {
1894 			for (node = 0 ; node < 3 ; node++) {
1895 				unsigned int start = coef_count_node_start;
1896 
1897 				switch (node) {
1898 				case 2:
1899 					tree_left = count[start + 2];
1900 					tree_right = count[start + 3];
1901 					break;
1902 				case 1:
1903 					tree_left = count[start + 1];
1904 					tree_right = count[start + 2] +
1905 						     count[start + 3];
1906 					break;
1907 				default:
1908 					tree_left = count[start + 0];
1909 					tree_right = count[start + 1] +
1910 						     count[start + 2] +
1911 						     count[start + 3];
1912 					break;
1913 				}
1914 
1915 				vp9_tree_merge_probs(prev_prob, cur_prob,
1916 						     coef_node_start,
1917 						     tree_left, tree_right,
1918 						     tree_i, node);
1919 
1920 				coef_node_start = coef_node_start + 1;
1921 			}
1922 			coef_count_node_start = coef_count_node_start + 4;
1923 		}
1924 
1925 		for (mvd_i = 0 ; mvd_i < 2 ; mvd_i++) {
1926 			coef_node_start = mvd_i ? VP9_MV_CLASSES_1_START :
1927 						  VP9_MV_CLASSES_0_START;
1928 			coef_count_node_start = mvd_i ?
1929 					VP9_MV_CLASSES_1_COUNT_START :
1930 					VP9_MV_CLASSES_0_COUNT_START;
1931 			tree_i = 0;
1932 			for (node = 0; node < 10; node++) {
1933 				unsigned int start = coef_count_node_start;
1934 
1935 				switch (node) {
1936 				case 9:
1937 					tree_left = count[start + 9];
1938 					tree_right = count[start + 10];
1939 					break;
1940 				case 8:
1941 					tree_left = count[start + 7];
1942 					tree_right = count[start + 8];
1943 					break;
1944 				case 7:
1945 					tree_left = count[start + 7] +
1946 						     count[start + 8];
1947 					tree_right = count[start + 9] +
1948 						     count[start + 10];
1949 					break;
1950 				case 6:
1951 					tree_left = count[start + 6];
1952 					tree_right = count[start + 7] +
1953 						     count[start + 8] +
1954 						     count[start + 9] +
1955 						     count[start + 10];
1956 					break;
1957 				case 5:
1958 					tree_left = count[start + 4];
1959 					tree_right = count[start + 5];
1960 					break;
1961 				case 4:
1962 					tree_left = count[start + 4] +
1963 						    count[start + 5];
1964 					tree_right = count[start + 6] +
1965 						     count[start + 7] +
1966 						     count[start + 8] +
1967 						     count[start + 9] +
1968 						     count[start + 10];
1969 					break;
1970 				case 3:
1971 					tree_left = count[start + 2];
1972 					tree_right = count[start + 3];
1973 					break;
1974 				case 2:
1975 					tree_left = count[start + 2] +
1976 						    count[start + 3];
1977 					tree_right = count[start + 4] +
1978 						     count[start + 5] +
1979 						     count[start + 6] +
1980 						     count[start + 7] +
1981 						     count[start + 8] +
1982 						     count[start + 9] +
1983 						     count[start + 10];
1984 					break;
1985 				case 1:
1986 					tree_left = count[start + 1];
1987 					tree_right = count[start + 2] +
1988 						     count[start + 3] +
1989 						     count[start + 4] +
1990 						     count[start + 5] +
1991 						     count[start + 6] +
1992 						     count[start + 7] +
1993 						     count[start + 8] +
1994 						     count[start + 9] +
1995 						     count[start + 10];
1996 					break;
1997 				default:
1998 					tree_left = count[start + 0];
1999 					tree_right = count[start + 1] +
2000 						     count[start + 2] +
2001 						     count[start + 3] +
2002 						     count[start + 4] +
2003 						     count[start + 5] +
2004 						     count[start + 6] +
2005 						     count[start + 7] +
2006 						     count[start + 8] +
2007 						     count[start + 9] +
2008 						     count[start + 10];
2009 					break;
2010 				}
2011 
2012 				vp9_tree_merge_probs(prev_prob, cur_prob,
2013 						     coef_node_start,
2014 						     tree_left, tree_right,
2015 						     tree_i, node);
2016 
2017 				coef_node_start = coef_node_start + 1;
2018 			}
2019 
2020 			coef_node_start = mvd_i ? VP9_MV_CLASS0_1_START :
2021 						  VP9_MV_CLASS0_0_START;
2022 			coef_count_node_start =	mvd_i ?
2023 						VP9_MV_CLASS0_1_COUNT_START :
2024 						VP9_MV_CLASS0_0_COUNT_START;
2025 			tree_i = 0;
2026 			node = 0;
2027 			tree_left = count[coef_count_node_start + 0];
2028 			tree_right = count[coef_count_node_start + 1];
2029 
2030 			vp9_tree_merge_probs(prev_prob, cur_prob,
2031 					     coef_node_start,
2032 					     tree_left, tree_right,
2033 					     tree_i, node);
2034 			coef_node_start = mvd_i ? VP9_MV_CLASS0_FP_1_START :
2035 						  VP9_MV_CLASS0_FP_0_START;
2036 			coef_count_node_start =	mvd_i ?
2037 					VP9_MV_CLASS0_FP_1_COUNT_START :
2038 					VP9_MV_CLASS0_FP_0_COUNT_START;
2039 
2040 			for (tree_i = 0; tree_i < 3; tree_i++) {
2041 				for (node = 0; node < 3; node++) {
2042 					unsigned int start =
2043 						coef_count_node_start;
2044 					switch (node) {
2045 					case 2:
2046 						tree_left = count[start + 2];
2047 						tree_right = count[start + 3];
2048 						break;
2049 					case 1:
2050 						tree_left = count[start + 1];
2051 						tree_right = count[start + 2] +
2052 							     count[start + 3];
2053 						break;
2054 					default:
2055 						tree_left = count[start + 0];
2056 						tree_right = count[start + 1] +
2057 							     count[start + 2] +
2058 							     count[start + 3];
2059 						break;
2060 					}
2061 
2062 					vp9_tree_merge_probs(prev_prob,
2063 							     cur_prob,
2064 							     coef_node_start,
2065 							     tree_left,
2066 							     tree_right,
2067 							     tree_i, node);
2068 
2069 					coef_node_start = coef_node_start + 1;
2070 				}
2071 				coef_count_node_start =
2072 					coef_count_node_start + 4;
2073 			}
2074 		}
2075 	}
2076 }
2077 
codec_vp9_threaded_isr(struct amvdec_session * sess)2078 static irqreturn_t codec_vp9_threaded_isr(struct amvdec_session *sess)
2079 {
2080 	struct amvdec_core *core = sess->core;
2081 	struct codec_vp9 *vp9 = sess->priv;
2082 	u32 dec_status = amvdec_read_dos(core, VP9_DEC_STATUS_REG);
2083 	u32 prob_status = amvdec_read_dos(core, VP9_ADAPT_PROB_REG);
2084 	int i;
2085 
2086 	if (!vp9)
2087 		return IRQ_HANDLED;
2088 
2089 	mutex_lock(&vp9->lock);
2090 	if (dec_status != VP9_HEAD_PARSER_DONE) {
2091 		dev_err(core->dev_dec, "Unrecognized dec_status: %08X\n",
2092 			dec_status);
2093 		amvdec_abort(sess);
2094 		goto unlock;
2095 	}
2096 
2097 	pr_debug("ISR: %08X;%08X\n", dec_status, prob_status);
2098 	sess->keyframe_found = 1;
2099 
2100 	if ((prob_status & 0xff) == 0xfd && vp9->cur_frame) {
2101 		/* VP9_REQ_ADAPT_PROB */
2102 		u8 *prev_prob_b = ((u8 *)vp9->workspace_vaddr +
2103 					 PROB_OFFSET) +
2104 					((prob_status >> 8) * 0x1000);
2105 		u8 *cur_prob_b = ((u8 *)vp9->workspace_vaddr +
2106 					 PROB_OFFSET) + 0x4000;
2107 		u8 *count_b = (u8 *)vp9->workspace_vaddr +
2108 				   COUNT_OFFSET;
2109 		int last_frame_type = vp9->prev_frame ?
2110 						vp9->prev_frame->type :
2111 						KEY_FRAME;
2112 
2113 		adapt_coef_probs(last_frame_type == KEY_FRAME,
2114 				 vp9->cur_frame->type == KEY_FRAME ? 1 : 0,
2115 				 prob_status >> 8,
2116 				 (unsigned int *)prev_prob_b,
2117 				 (unsigned int *)cur_prob_b,
2118 				 (unsigned int *)count_b);
2119 
2120 		memcpy(prev_prob_b, cur_prob_b, ADAPT_PROB_SIZE);
2121 		amvdec_write_dos(core, VP9_ADAPT_PROB_REG, 0);
2122 	}
2123 
2124 	/* Invalidate first 3 refs */
2125 	for (i = 0; i < REFS_PER_FRAME ; ++i)
2126 		vp9->frame_refs[i] = NULL;
2127 
2128 	vp9->prev_frame = vp9->cur_frame;
2129 	codec_vp9_update_ref(vp9);
2130 
2131 	codec_vp9_fetch_rpm(sess);
2132 	if (codec_vp9_process_rpm(vp9)) {
2133 		amvdec_src_change(sess, vp9->width, vp9->height, 16);
2134 
2135 		/* No frame is actually processed */
2136 		vp9->cur_frame = NULL;
2137 
2138 		/* Show the remaining frame */
2139 		codec_vp9_show_frame(sess);
2140 
2141 		/* FIXME: Save refs for resized frame */
2142 		if (vp9->frames_num)
2143 			codec_vp9_save_refs(vp9);
2144 
2145 		goto unlock;
2146 	}
2147 
2148 	codec_vp9_process_lf(vp9);
2149 	codec_vp9_process_frame(sess);
2150 	codec_vp9_show_frame(sess);
2151 
2152 unlock:
2153 	mutex_unlock(&vp9->lock);
2154 	return IRQ_HANDLED;
2155 }
2156 
codec_vp9_isr(struct amvdec_session * sess)2157 static irqreturn_t codec_vp9_isr(struct amvdec_session *sess)
2158 {
2159 	return IRQ_WAKE_THREAD;
2160 }
2161 
2162 struct amvdec_codec_ops codec_vp9_ops = {
2163 	.start = codec_vp9_start,
2164 	.stop = codec_vp9_stop,
2165 	.isr = codec_vp9_isr,
2166 	.threaded_isr = codec_vp9_threaded_isr,
2167 	.num_pending_bufs = codec_vp9_num_pending_bufs,
2168 	.drain = codec_vp9_flush_output,
2169 	.resume = codec_vp9_resume,
2170 };
2171