xref: /linux/drivers/media/platform/verisilicon/hantro_hevc.c (revision cdd30ebb1b9f36159d66f088b61aee264e649d7a)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Hantro VPU HEVC codec driver
4  *
5  * Copyright (C) 2020 Safran Passenger Innovations LLC
6  */
7 
8 #include <linux/types.h>
9 #include <media/v4l2-mem2mem.h>
10 
11 #include "hantro.h"
12 #include "hantro_hw.h"
13 
14 #define VERT_FILTER_RAM_SIZE 8 /* bytes per pixel row */
15 /*
16  * BSD control data of current picture at tile border
17  * 128 bits per 4x4 tile = 128/(8*4) bytes per row
18  */
19 #define BSD_CTRL_RAM_SIZE 4 /* bytes per pixel row */
20 /* tile border coefficients of filter */
21 #define VERT_SAO_RAM_SIZE 48 /* bytes per pixel */
22 
23 #define SCALING_LIST_SIZE (16 * 64)
24 
25 #define MAX_TILE_COLS 20
26 #define MAX_TILE_ROWS 22
27 
28 static bool hevc_use_compression = IS_ENABLED(CONFIG_VIDEO_HANTRO_HEVC_RFC);
29 module_param_named(hevc_use_compression, hevc_use_compression, bool, 0644);
30 MODULE_PARM_DESC(hevc_use_compression,
31 		 "Use reference frame compression for HEVC");
32 
33 void hantro_hevc_ref_init(struct hantro_ctx *ctx)
34 {
35 	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
36 
37 	hevc_dec->ref_bufs_used = 0;
38 }
39 
40 dma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx,
41 				   s32 poc)
42 {
43 	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
44 	int i;
45 
46 	/* Find the reference buffer in already known ones */
47 	for (i = 0;  i < NUM_REF_PICTURES; i++) {
48 		if (hevc_dec->ref_bufs_poc[i] == poc) {
49 			hevc_dec->ref_bufs_used |= 1 << i;
50 			return hevc_dec->ref_bufs[i].dma;
51 		}
52 	}
53 
54 	return 0;
55 }
56 
57 int hantro_hevc_add_ref_buf(struct hantro_ctx *ctx, int poc, dma_addr_t addr)
58 {
59 	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
60 	int i;
61 
62 	/* Add a new reference buffer */
63 	for (i = 0; i < NUM_REF_PICTURES; i++) {
64 		if (!(hevc_dec->ref_bufs_used & 1 << i)) {
65 			hevc_dec->ref_bufs_used |= 1 << i;
66 			hevc_dec->ref_bufs_poc[i] = poc;
67 			hevc_dec->ref_bufs[i].dma = addr;
68 			return 0;
69 		}
70 	}
71 
72 	return -EINVAL;
73 }
74 
75 static int tile_buffer_reallocate(struct hantro_ctx *ctx)
76 {
77 	struct hantro_dev *vpu = ctx->dev;
78 	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
79 	const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls;
80 	const struct v4l2_ctrl_hevc_pps *pps = ctrls->pps;
81 	const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps;
82 	unsigned int num_tile_cols = pps->num_tile_columns_minus1 + 1;
83 	unsigned int height64 = (sps->pic_height_in_luma_samples + 63) & ~63;
84 	unsigned int size;
85 
86 	if (num_tile_cols <= 1 ||
87 	    num_tile_cols <= hevc_dec->num_tile_cols_allocated)
88 		return 0;
89 
90 	/* Need to reallocate due to tiles passed via PPS */
91 	if (hevc_dec->tile_filter.cpu) {
92 		dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size,
93 				  hevc_dec->tile_filter.cpu,
94 				  hevc_dec->tile_filter.dma);
95 		hevc_dec->tile_filter.cpu = NULL;
96 	}
97 
98 	if (hevc_dec->tile_sao.cpu) {
99 		dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size,
100 				  hevc_dec->tile_sao.cpu,
101 				  hevc_dec->tile_sao.dma);
102 		hevc_dec->tile_sao.cpu = NULL;
103 	}
104 
105 	if (hevc_dec->tile_bsd.cpu) {
106 		dma_free_coherent(vpu->dev, hevc_dec->tile_bsd.size,
107 				  hevc_dec->tile_bsd.cpu,
108 				  hevc_dec->tile_bsd.dma);
109 		hevc_dec->tile_bsd.cpu = NULL;
110 	}
111 
112 	size = (VERT_FILTER_RAM_SIZE * height64 * (num_tile_cols - 1) * ctx->bit_depth) / 8;
113 	hevc_dec->tile_filter.cpu = dma_alloc_coherent(vpu->dev, size,
114 						       &hevc_dec->tile_filter.dma,
115 						       GFP_KERNEL);
116 	if (!hevc_dec->tile_filter.cpu)
117 		return -ENOMEM;
118 	hevc_dec->tile_filter.size = size;
119 
120 	size = (VERT_SAO_RAM_SIZE * height64 * (num_tile_cols - 1) * ctx->bit_depth) / 8;
121 	hevc_dec->tile_sao.cpu = dma_alloc_coherent(vpu->dev, size,
122 						    &hevc_dec->tile_sao.dma,
123 						    GFP_KERNEL);
124 	if (!hevc_dec->tile_sao.cpu)
125 		goto err_free_tile_buffers;
126 	hevc_dec->tile_sao.size = size;
127 
128 	size = BSD_CTRL_RAM_SIZE * height64 * (num_tile_cols - 1);
129 	hevc_dec->tile_bsd.cpu = dma_alloc_coherent(vpu->dev, size,
130 						    &hevc_dec->tile_bsd.dma,
131 						    GFP_KERNEL);
132 	if (!hevc_dec->tile_bsd.cpu)
133 		goto err_free_sao_buffers;
134 	hevc_dec->tile_bsd.size = size;
135 
136 	hevc_dec->num_tile_cols_allocated = num_tile_cols;
137 
138 	return 0;
139 
140 err_free_sao_buffers:
141 	if (hevc_dec->tile_sao.cpu)
142 		dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size,
143 				  hevc_dec->tile_sao.cpu,
144 				  hevc_dec->tile_sao.dma);
145 	hevc_dec->tile_sao.cpu = NULL;
146 
147 err_free_tile_buffers:
148 	if (hevc_dec->tile_filter.cpu)
149 		dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size,
150 				  hevc_dec->tile_filter.cpu,
151 				  hevc_dec->tile_filter.dma);
152 	hevc_dec->tile_filter.cpu = NULL;
153 
154 	return -ENOMEM;
155 }
156 
157 static int hantro_hevc_validate_sps(struct hantro_ctx *ctx, const struct v4l2_ctrl_hevc_sps *sps)
158 {
159 	/*
160 	 * for tile pixel format check if the width and height match
161 	 * hardware constraints
162 	 */
163 	if (ctx->vpu_dst_fmt->fourcc == V4L2_PIX_FMT_NV12_4L4) {
164 		if (ctx->dst_fmt.width !=
165 		    ALIGN(sps->pic_width_in_luma_samples, ctx->vpu_dst_fmt->frmsize.step_width))
166 			return -EINVAL;
167 
168 		if (ctx->dst_fmt.height !=
169 		    ALIGN(sps->pic_height_in_luma_samples, ctx->vpu_dst_fmt->frmsize.step_height))
170 			return -EINVAL;
171 	}
172 
173 	return 0;
174 }
175 
176 int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx)
177 {
178 	struct hantro_hevc_dec_hw_ctx *hevc_ctx = &ctx->hevc_dec;
179 	struct hantro_hevc_dec_ctrls *ctrls = &hevc_ctx->ctrls;
180 	int ret;
181 
182 	hantro_start_prepare_run(ctx);
183 
184 	ctrls->decode_params =
185 		hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_DECODE_PARAMS);
186 	if (WARN_ON(!ctrls->decode_params))
187 		return -EINVAL;
188 
189 	ctrls->scaling =
190 		hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_SCALING_MATRIX);
191 	if (WARN_ON(!ctrls->scaling))
192 		return -EINVAL;
193 
194 	ctrls->sps =
195 		hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_SPS);
196 	if (WARN_ON(!ctrls->sps))
197 		return -EINVAL;
198 
199 	ret = hantro_hevc_validate_sps(ctx, ctrls->sps);
200 	if (ret)
201 		return ret;
202 
203 	ctrls->pps =
204 		hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_PPS);
205 	if (WARN_ON(!ctrls->pps))
206 		return -EINVAL;
207 
208 	ret = tile_buffer_reallocate(ctx);
209 	if (ret)
210 		return ret;
211 
212 	return 0;
213 }
214 
215 void hantro_hevc_dec_exit(struct hantro_ctx *ctx)
216 {
217 	struct hantro_dev *vpu = ctx->dev;
218 	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
219 
220 	if (hevc_dec->tile_sizes.cpu)
221 		dma_free_coherent(vpu->dev, hevc_dec->tile_sizes.size,
222 				  hevc_dec->tile_sizes.cpu,
223 				  hevc_dec->tile_sizes.dma);
224 	hevc_dec->tile_sizes.cpu = NULL;
225 
226 	if (hevc_dec->scaling_lists.cpu)
227 		dma_free_coherent(vpu->dev, hevc_dec->scaling_lists.size,
228 				  hevc_dec->scaling_lists.cpu,
229 				  hevc_dec->scaling_lists.dma);
230 	hevc_dec->scaling_lists.cpu = NULL;
231 
232 	if (hevc_dec->tile_filter.cpu)
233 		dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size,
234 				  hevc_dec->tile_filter.cpu,
235 				  hevc_dec->tile_filter.dma);
236 	hevc_dec->tile_filter.cpu = NULL;
237 
238 	if (hevc_dec->tile_sao.cpu)
239 		dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size,
240 				  hevc_dec->tile_sao.cpu,
241 				  hevc_dec->tile_sao.dma);
242 	hevc_dec->tile_sao.cpu = NULL;
243 
244 	if (hevc_dec->tile_bsd.cpu)
245 		dma_free_coherent(vpu->dev, hevc_dec->tile_bsd.size,
246 				  hevc_dec->tile_bsd.cpu,
247 				  hevc_dec->tile_bsd.dma);
248 	hevc_dec->tile_bsd.cpu = NULL;
249 }
250 
251 int hantro_hevc_dec_init(struct hantro_ctx *ctx)
252 {
253 	struct hantro_dev *vpu = ctx->dev;
254 	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
255 	unsigned int size;
256 
257 	memset(hevc_dec, 0, sizeof(*hevc_dec));
258 
259 	/*
260 	 * Maximum number of tiles times width and height (2 bytes each),
261 	 * rounding up to next 16 bytes boundary + one extra 16 byte
262 	 * chunk (HW guys wanted to have this).
263 	 */
264 	size = round_up(MAX_TILE_COLS * MAX_TILE_ROWS * 4 * sizeof(u16) + 16, 16);
265 	hevc_dec->tile_sizes.cpu = dma_alloc_coherent(vpu->dev, size,
266 						      &hevc_dec->tile_sizes.dma,
267 						      GFP_KERNEL);
268 	if (!hevc_dec->tile_sizes.cpu)
269 		return -ENOMEM;
270 
271 	hevc_dec->tile_sizes.size = size;
272 
273 	hevc_dec->scaling_lists.cpu = dma_alloc_coherent(vpu->dev, SCALING_LIST_SIZE,
274 							 &hevc_dec->scaling_lists.dma,
275 							 GFP_KERNEL);
276 	if (!hevc_dec->scaling_lists.cpu)
277 		return -ENOMEM;
278 
279 	hevc_dec->scaling_lists.size = SCALING_LIST_SIZE;
280 
281 	hantro_hevc_ref_init(ctx);
282 
283 	hevc_dec->use_compression =
284 		hevc_use_compression & hantro_needs_postproc(ctx, ctx->vpu_dst_fmt);
285 
286 	return 0;
287 }
288