xref: /linux/drivers/media/platform/verisilicon/hantro_vp8.c (revision 4f2c0a4acffbec01079c28f839422e64ddeff004)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Hantro VPU codec driver
4  *
5  * Copyright (C) 2018 Rockchip Electronics Co., Ltd.
6  */
7 
8 #include "hantro.h"
9 
10 /*
11  * probs table with packed
12  */
13 struct vp8_prob_tbl_packed {
14 	u8 prob_mb_skip_false;
15 	u8 prob_intra;
16 	u8 prob_ref_last;
17 	u8 prob_ref_golden;
18 	u8 prob_segment[3];
19 	u8 padding0;
20 
21 	u8 prob_luma_16x16_pred_mode[4];
22 	u8 prob_chroma_pred_mode[3];
23 	u8 padding1;
24 
25 	/* mv prob */
26 	u8 prob_mv_context[2][V4L2_VP8_MV_PROB_CNT];
27 	u8 padding2[2];
28 
29 	/* coeff probs */
30 	u8 prob_coeffs[4][8][3][V4L2_VP8_COEFF_PROB_CNT];
31 	u8 padding3[96];
32 };
33 
34 /*
35  * filter taps taken to 7-bit precision,
36  * reference RFC6386#Page-16, filters[8][6]
37  */
38 const u32 hantro_vp8_dec_mc_filter[8][6] = {
39 	{ 0, 0, 128, 0, 0, 0 },
40 	{ 0, -6, 123, 12, -1, 0 },
41 	{ 2, -11, 108, 36, -8, 1 },
42 	{ 0, -9, 93, 50, -6, 0 },
43 	{ 3, -16, 77, 77, -16, 3 },
44 	{ 0, -6, 50, 93, -9, 0 },
45 	{ 1, -8, 36, 108, -11, 2 },
46 	{ 0, -1, 12, 123, -6, 0 }
47 };
48 
hantro_vp8_prob_update(struct hantro_ctx * ctx,const struct v4l2_ctrl_vp8_frame * hdr)49 void hantro_vp8_prob_update(struct hantro_ctx *ctx,
50 			    const struct v4l2_ctrl_vp8_frame *hdr)
51 {
52 	const struct v4l2_vp8_entropy *entropy = &hdr->entropy;
53 	u32 i, j, k;
54 	u8 *dst;
55 
56 	/* first probs */
57 	dst = ctx->vp8_dec.prob_tbl.cpu;
58 
59 	dst[0] = hdr->prob_skip_false;
60 	dst[1] = hdr->prob_intra;
61 	dst[2] = hdr->prob_last;
62 	dst[3] = hdr->prob_gf;
63 	dst[4] = hdr->segment.segment_probs[0];
64 	dst[5] = hdr->segment.segment_probs[1];
65 	dst[6] = hdr->segment.segment_probs[2];
66 	dst[7] = 0;
67 
68 	dst += 8;
69 	dst[0] = entropy->y_mode_probs[0];
70 	dst[1] = entropy->y_mode_probs[1];
71 	dst[2] = entropy->y_mode_probs[2];
72 	dst[3] = entropy->y_mode_probs[3];
73 	dst[4] = entropy->uv_mode_probs[0];
74 	dst[5] = entropy->uv_mode_probs[1];
75 	dst[6] = entropy->uv_mode_probs[2];
76 	dst[7] = 0; /*unused */
77 
78 	/* mv probs */
79 	dst += 8;
80 	dst[0] = entropy->mv_probs[0][0]; /* is short */
81 	dst[1] = entropy->mv_probs[1][0];
82 	dst[2] = entropy->mv_probs[0][1]; /* sign */
83 	dst[3] = entropy->mv_probs[1][1];
84 	dst[4] = entropy->mv_probs[0][8 + 9];
85 	dst[5] = entropy->mv_probs[0][9 + 9];
86 	dst[6] = entropy->mv_probs[1][8 + 9];
87 	dst[7] = entropy->mv_probs[1][9 + 9];
88 	dst += 8;
89 	for (i = 0; i < 2; ++i) {
90 		for (j = 0; j < 8; j += 4) {
91 			dst[0] = entropy->mv_probs[i][j + 9 + 0];
92 			dst[1] = entropy->mv_probs[i][j + 9 + 1];
93 			dst[2] = entropy->mv_probs[i][j + 9 + 2];
94 			dst[3] = entropy->mv_probs[i][j + 9 + 3];
95 			dst += 4;
96 		}
97 	}
98 	for (i = 0; i < 2; ++i) {
99 		dst[0] = entropy->mv_probs[i][0 + 2];
100 		dst[1] = entropy->mv_probs[i][1 + 2];
101 		dst[2] = entropy->mv_probs[i][2 + 2];
102 		dst[3] = entropy->mv_probs[i][3 + 2];
103 		dst[4] = entropy->mv_probs[i][4 + 2];
104 		dst[5] = entropy->mv_probs[i][5 + 2];
105 		dst[6] = entropy->mv_probs[i][6 + 2];
106 		dst[7] = 0;	/*unused */
107 		dst += 8;
108 	}
109 
110 	/* coeff probs (header part) */
111 	dst = ctx->vp8_dec.prob_tbl.cpu;
112 	dst += (8 * 7);
113 	for (i = 0; i < 4; ++i) {
114 		for (j = 0; j < 8; ++j) {
115 			for (k = 0; k < 3; ++k) {
116 				dst[0] = entropy->coeff_probs[i][j][k][0];
117 				dst[1] = entropy->coeff_probs[i][j][k][1];
118 				dst[2] = entropy->coeff_probs[i][j][k][2];
119 				dst[3] = entropy->coeff_probs[i][j][k][3];
120 				dst += 4;
121 			}
122 		}
123 	}
124 
125 	/* coeff probs (footer part) */
126 	dst = ctx->vp8_dec.prob_tbl.cpu;
127 	dst += (8 * 55);
128 	for (i = 0; i < 4; ++i) {
129 		for (j = 0; j < 8; ++j) {
130 			for (k = 0; k < 3; ++k) {
131 				dst[0] = entropy->coeff_probs[i][j][k][4];
132 				dst[1] = entropy->coeff_probs[i][j][k][5];
133 				dst[2] = entropy->coeff_probs[i][j][k][6];
134 				dst[3] = entropy->coeff_probs[i][j][k][7];
135 				dst[4] = entropy->coeff_probs[i][j][k][8];
136 				dst[5] = entropy->coeff_probs[i][j][k][9];
137 				dst[6] = entropy->coeff_probs[i][j][k][10];
138 				dst[7] = 0;	/*unused */
139 				dst += 8;
140 			}
141 		}
142 	}
143 }
144 
hantro_vp8_dec_init(struct hantro_ctx * ctx)145 int hantro_vp8_dec_init(struct hantro_ctx *ctx)
146 {
147 	struct hantro_dev *vpu = ctx->dev;
148 	struct hantro_aux_buf *aux_buf;
149 	unsigned int mb_width, mb_height;
150 	size_t segment_map_size;
151 	int ret;
152 
153 	/* segment map table size calculation */
154 	mb_width = DIV_ROUND_UP(ctx->dst_fmt.width, 16);
155 	mb_height = DIV_ROUND_UP(ctx->dst_fmt.height, 16);
156 	segment_map_size = round_up(DIV_ROUND_UP(mb_width * mb_height, 4), 64);
157 
158 	/*
159 	 * In context init the dma buffer for segment map must be allocated.
160 	 * And the data in segment map buffer must be set to all zero.
161 	 */
162 	aux_buf = &ctx->vp8_dec.segment_map;
163 	aux_buf->size = segment_map_size;
164 	aux_buf->cpu = dma_alloc_coherent(vpu->dev, aux_buf->size,
165 					  &aux_buf->dma, GFP_KERNEL);
166 	if (!aux_buf->cpu)
167 		return -ENOMEM;
168 
169 	/*
170 	 * Allocate probability table buffer,
171 	 * total 1208 bytes, 4K page is far enough.
172 	 */
173 	aux_buf = &ctx->vp8_dec.prob_tbl;
174 	aux_buf->size = sizeof(struct vp8_prob_tbl_packed);
175 	aux_buf->cpu = dma_alloc_coherent(vpu->dev, aux_buf->size,
176 					  &aux_buf->dma, GFP_KERNEL);
177 	if (!aux_buf->cpu) {
178 		ret = -ENOMEM;
179 		goto err_free_seg_map;
180 	}
181 
182 	return 0;
183 
184 err_free_seg_map:
185 	dma_free_coherent(vpu->dev, ctx->vp8_dec.segment_map.size,
186 			  ctx->vp8_dec.segment_map.cpu,
187 			  ctx->vp8_dec.segment_map.dma);
188 
189 	return ret;
190 }
191 
hantro_vp8_dec_exit(struct hantro_ctx * ctx)192 void hantro_vp8_dec_exit(struct hantro_ctx *ctx)
193 {
194 	struct hantro_vp8_dec_hw_ctx *vp8_dec = &ctx->vp8_dec;
195 	struct hantro_dev *vpu = ctx->dev;
196 
197 	dma_free_coherent(vpu->dev, vp8_dec->segment_map.size,
198 			  vp8_dec->segment_map.cpu, vp8_dec->segment_map.dma);
199 	dma_free_coherent(vpu->dev, vp8_dec->prob_tbl.size,
200 			  vp8_dec->prob_tbl.cpu, vp8_dec->prob_tbl.dma);
201 }
202