1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3 * NVIDIA Tegra Video decoder driver
4 *
5 * Copyright (C) 2016-2022 Dmitry Osipenko <digetx@gmail.com>
6 *
7 */
8
9 #include <linux/iopoll.h>
10 #include <linux/pm_runtime.h>
11 #include <linux/reset.h>
12 #include <linux/slab.h>
13
14 #include <media/v4l2-h264.h>
15
16 #include "trace.h"
17 #include "vde.h"
18
19 #define FLAG_B_FRAME 0x1
20 #define FLAG_REFERENCE 0x2
21
22 struct tegra_vde_h264_decoder_ctx {
23 unsigned int dpb_frames_nb;
24 unsigned int dpb_ref_frames_with_earlier_poc_nb;
25 unsigned int baseline_profile;
26 unsigned int level_idc;
27 unsigned int log2_max_pic_order_cnt_lsb;
28 unsigned int log2_max_frame_num;
29 unsigned int pic_order_cnt_type;
30 unsigned int direct_8x8_inference_flag;
31 unsigned int pic_width_in_mbs;
32 unsigned int pic_height_in_mbs;
33 unsigned int pic_init_qp;
34 unsigned int deblocking_filter_control_present_flag;
35 unsigned int constrained_intra_pred_flag;
36 unsigned int chroma_qp_index_offset;
37 unsigned int pic_order_present_flag;
38 unsigned int num_ref_idx_l0_active_minus1;
39 unsigned int num_ref_idx_l1_active_minus1;
40 };
41
42 struct h264_reflists {
43 struct v4l2_h264_reference p[V4L2_H264_NUM_DPB_ENTRIES];
44 struct v4l2_h264_reference b0[V4L2_H264_NUM_DPB_ENTRIES];
45 struct v4l2_h264_reference b1[V4L2_H264_NUM_DPB_ENTRIES];
46 };
47
tegra_vde_wait_mbe(struct tegra_vde * vde)48 static int tegra_vde_wait_mbe(struct tegra_vde *vde)
49 {
50 u32 tmp;
51
52 return readl_relaxed_poll_timeout(vde->mbe + 0x8C, tmp,
53 tmp >= 0x10, 1, 100);
54 }
55
tegra_vde_setup_mbe_frame_idx(struct tegra_vde * vde,unsigned int refs_nb,bool setup_refs)56 static int tegra_vde_setup_mbe_frame_idx(struct tegra_vde *vde,
57 unsigned int refs_nb,
58 bool setup_refs)
59 {
60 u32 value, frame_idx_enb_mask = 0;
61 unsigned int frame_idx;
62 unsigned int idx;
63 int err;
64
65 tegra_vde_writel(vde, 0xD0000000 | (0 << 23), vde->mbe, 0x80);
66 tegra_vde_writel(vde, 0xD0200000 | (0 << 23), vde->mbe, 0x80);
67
68 err = tegra_vde_wait_mbe(vde);
69 if (err)
70 return err;
71
72 if (!setup_refs)
73 return 0;
74
75 for (idx = 0, frame_idx = 1; idx < refs_nb; idx++, frame_idx++) {
76 tegra_vde_writel(vde, 0xD0000000 | (frame_idx << 23),
77 vde->mbe, 0x80);
78 tegra_vde_writel(vde, 0xD0200000 | (frame_idx << 23),
79 vde->mbe, 0x80);
80
81 frame_idx_enb_mask |= frame_idx << (6 * (idx % 4));
82
83 if (idx % 4 == 3 || idx == refs_nb - 1) {
84 value = 0xC0000000;
85 value |= (idx >> 2) << 24;
86 value |= frame_idx_enb_mask;
87
88 tegra_vde_writel(vde, value, vde->mbe, 0x80);
89
90 err = tegra_vde_wait_mbe(vde);
91 if (err)
92 return err;
93
94 frame_idx_enb_mask = 0;
95 }
96 }
97
98 return 0;
99 }
100
tegra_vde_mbe_set_0xa_reg(struct tegra_vde * vde,int reg,u32 val)101 static void tegra_vde_mbe_set_0xa_reg(struct tegra_vde *vde, int reg, u32 val)
102 {
103 tegra_vde_writel(vde, 0xA0000000 | (reg << 24) | (val & 0xFFFF),
104 vde->mbe, 0x80);
105 tegra_vde_writel(vde, 0xA0000000 | ((reg + 1) << 24) | (val >> 16),
106 vde->mbe, 0x80);
107 }
108
tegra_vde_wait_bsev(struct tegra_vde * vde,bool wait_dma)109 static int tegra_vde_wait_bsev(struct tegra_vde *vde, bool wait_dma)
110 {
111 struct device *dev = vde->dev;
112 u32 value;
113 int err;
114
115 err = readl_relaxed_poll_timeout(vde->bsev + INTR_STATUS, value,
116 !(value & BIT(2)), 1, 100);
117 if (err) {
118 dev_err(dev, "BSEV unknown bit timeout\n");
119 return err;
120 }
121
122 err = readl_relaxed_poll_timeout(vde->bsev + INTR_STATUS, value,
123 (value & BSE_ICMDQUE_EMPTY), 1, 100);
124 if (err) {
125 dev_err(dev, "BSEV ICMDQUE flush timeout\n");
126 return err;
127 }
128
129 if (!wait_dma)
130 return 0;
131
132 err = readl_relaxed_poll_timeout(vde->bsev + INTR_STATUS, value,
133 !(value & BSE_DMA_BUSY), 1, 1000);
134 if (err) {
135 dev_err(dev, "BSEV DMA timeout\n");
136 return err;
137 }
138
139 return 0;
140 }
141
tegra_vde_push_to_bsev_icmdqueue(struct tegra_vde * vde,u32 value,bool wait_dma)142 static int tegra_vde_push_to_bsev_icmdqueue(struct tegra_vde *vde,
143 u32 value, bool wait_dma)
144 {
145 tegra_vde_writel(vde, value, vde->bsev, ICMDQUE_WR);
146
147 return tegra_vde_wait_bsev(vde, wait_dma);
148 }
149
tegra_vde_setup_frameid(struct tegra_vde * vde,struct tegra_video_frame * frame,unsigned int frameid,u32 mbs_width,u32 mbs_height)150 static void tegra_vde_setup_frameid(struct tegra_vde *vde,
151 struct tegra_video_frame *frame,
152 unsigned int frameid,
153 u32 mbs_width, u32 mbs_height)
154 {
155 u32 y_addr = frame ? frame->y_addr : 0x6CDEAD00;
156 u32 cb_addr = frame ? frame->cb_addr : 0x6CDEAD00;
157 u32 cr_addr = frame ? frame->cr_addr : 0x6CDEAD00;
158 u32 value1 = frame ? ((frame->luma_atoms_pitch << 16) | mbs_height) : 0;
159 u32 value2 = frame ? ((frame->chroma_atoms_pitch << 6) | 1) : 0;
160
161 tegra_vde_writel(vde, y_addr >> 8, vde->frameid, 0x000 + frameid * 4);
162 tegra_vde_writel(vde, cb_addr >> 8, vde->frameid, 0x100 + frameid * 4);
163 tegra_vde_writel(vde, cr_addr >> 8, vde->frameid, 0x180 + frameid * 4);
164 tegra_vde_writel(vde, value1, vde->frameid, 0x080 + frameid * 4);
165 tegra_vde_writel(vde, value2, vde->frameid, 0x280 + frameid * 4);
166 }
167
tegra_setup_frameidx(struct tegra_vde * vde,struct tegra_video_frame * frames,unsigned int frames_nb,u32 mbs_width,u32 mbs_height)168 static void tegra_setup_frameidx(struct tegra_vde *vde,
169 struct tegra_video_frame *frames,
170 unsigned int frames_nb,
171 u32 mbs_width, u32 mbs_height)
172 {
173 unsigned int idx;
174
175 for (idx = 0; idx < frames_nb; idx++)
176 tegra_vde_setup_frameid(vde, &frames[idx], idx,
177 mbs_width, mbs_height);
178
179 for (; idx < 17; idx++)
180 tegra_vde_setup_frameid(vde, NULL, idx, 0, 0);
181 }
182
tegra_vde_setup_iram_entry(struct tegra_vde * vde,unsigned int table,unsigned int row,u32 value1,u32 value2)183 static void tegra_vde_setup_iram_entry(struct tegra_vde *vde,
184 unsigned int table,
185 unsigned int row,
186 u32 value1, u32 value2)
187 {
188 u32 *iram_tables = vde->iram;
189
190 trace_vde_setup_iram_entry(table, row, value1, value2);
191
192 iram_tables[0x20 * table + row * 2 + 0] = value1;
193 iram_tables[0x20 * table + row * 2 + 1] = value2;
194 }
195
tegra_vde_setup_iram_tables(struct tegra_vde * vde,struct tegra_video_frame * dpb_frames,unsigned int ref_frames_nb,unsigned int with_earlier_poc_nb)196 static void tegra_vde_setup_iram_tables(struct tegra_vde *vde,
197 struct tegra_video_frame *dpb_frames,
198 unsigned int ref_frames_nb,
199 unsigned int with_earlier_poc_nb)
200 {
201 struct tegra_video_frame *frame;
202 int with_later_poc_nb;
203 u32 value, aux_addr;
204 unsigned int i, k;
205
206 trace_vde_ref_l0(dpb_frames[0].frame_num);
207
208 for (i = 0; i < 16; i++) {
209 if (i < ref_frames_nb) {
210 frame = &dpb_frames[i + 1];
211
212 aux_addr = frame->aux_addr;
213
214 value = (i + 1) << 26;
215 value |= !(frame->flags & FLAG_B_FRAME) << 25;
216 value |= 1 << 24;
217 value |= frame->frame_num;
218 } else {
219 aux_addr = 0x6ADEAD00;
220 value = 0x3f;
221 }
222
223 tegra_vde_setup_iram_entry(vde, 0, i, value, aux_addr);
224 tegra_vde_setup_iram_entry(vde, 1, i, value, aux_addr);
225 tegra_vde_setup_iram_entry(vde, 2, i, value, aux_addr);
226 tegra_vde_setup_iram_entry(vde, 3, i, value, aux_addr);
227 }
228
229 if (!(dpb_frames[0].flags & FLAG_B_FRAME))
230 return;
231
232 if (with_earlier_poc_nb >= ref_frames_nb)
233 return;
234
235 with_later_poc_nb = ref_frames_nb - with_earlier_poc_nb;
236
237 trace_vde_ref_l1(with_later_poc_nb, with_earlier_poc_nb);
238
239 for (i = 0, k = with_earlier_poc_nb; i < with_later_poc_nb; i++, k++) {
240 frame = &dpb_frames[k + 1];
241
242 aux_addr = frame->aux_addr;
243
244 value = (k + 1) << 26;
245 value |= !(frame->flags & FLAG_B_FRAME) << 25;
246 value |= 1 << 24;
247 value |= frame->frame_num;
248
249 tegra_vde_setup_iram_entry(vde, 2, i, value, aux_addr);
250 }
251
252 for (k = 0; i < ref_frames_nb; i++, k++) {
253 frame = &dpb_frames[k + 1];
254
255 aux_addr = frame->aux_addr;
256
257 value = (k + 1) << 26;
258 value |= !(frame->flags & FLAG_B_FRAME) << 25;
259 value |= 1 << 24;
260 value |= frame->frame_num;
261
262 tegra_vde_setup_iram_entry(vde, 2, i, value, aux_addr);
263 }
264 }
265
tegra_vde_setup_hw_context(struct tegra_vde * vde,struct tegra_vde_h264_decoder_ctx * ctx,struct tegra_video_frame * dpb_frames,dma_addr_t bitstream_data_addr,size_t bitstream_data_size,unsigned int macroblocks_nb)266 static int tegra_vde_setup_hw_context(struct tegra_vde *vde,
267 struct tegra_vde_h264_decoder_ctx *ctx,
268 struct tegra_video_frame *dpb_frames,
269 dma_addr_t bitstream_data_addr,
270 size_t bitstream_data_size,
271 unsigned int macroblocks_nb)
272 {
273 struct device *dev = vde->dev;
274 u32 value;
275 int err;
276
277 tegra_vde_set_bits(vde, 0x000A, vde->sxe, 0xF0);
278 tegra_vde_set_bits(vde, 0x000B, vde->bsev, CMDQUE_CONTROL);
279 tegra_vde_set_bits(vde, 0x8002, vde->mbe, 0x50);
280 tegra_vde_set_bits(vde, 0x000A, vde->mbe, 0xA0);
281 tegra_vde_set_bits(vde, 0x000A, vde->ppe, 0x14);
282 tegra_vde_set_bits(vde, 0x000A, vde->ppe, 0x28);
283 tegra_vde_set_bits(vde, 0x0A00, vde->mce, 0x08);
284 tegra_vde_set_bits(vde, 0x000A, vde->tfe, 0x00);
285 tegra_vde_set_bits(vde, 0x0005, vde->vdma, 0x04);
286
287 tegra_vde_writel(vde, 0x00000000, vde->vdma, 0x1C);
288 tegra_vde_writel(vde, 0x00000000, vde->vdma, 0x00);
289 tegra_vde_writel(vde, 0x00000007, vde->vdma, 0x04);
290 tegra_vde_writel(vde, 0x00000007, vde->frameid, 0x200);
291 tegra_vde_writel(vde, 0x00000005, vde->tfe, 0x04);
292 tegra_vde_writel(vde, 0x00000000, vde->mbe, 0x84);
293 tegra_vde_writel(vde, 0x00000010, vde->sxe, 0x08);
294 tegra_vde_writel(vde, 0x00000150, vde->sxe, 0x54);
295 tegra_vde_writel(vde, 0x0000054C, vde->sxe, 0x58);
296 tegra_vde_writel(vde, 0x00000E34, vde->sxe, 0x5C);
297 tegra_vde_writel(vde, 0x063C063C, vde->mce, 0x10);
298 tegra_vde_writel(vde, 0x0003FC00, vde->bsev, INTR_STATUS);
299 tegra_vde_writel(vde, 0x0000150D, vde->bsev, BSE_CONFIG);
300 tegra_vde_writel(vde, 0x00000100, vde->bsev, BSE_INT_ENB);
301 tegra_vde_writel(vde, 0x00000000, vde->bsev, 0x98);
302 tegra_vde_writel(vde, 0x00000060, vde->bsev, 0x9C);
303
304 memset(vde->iram + 128, 0, macroblocks_nb / 2);
305
306 tegra_setup_frameidx(vde, dpb_frames, ctx->dpb_frames_nb,
307 ctx->pic_width_in_mbs, ctx->pic_height_in_mbs);
308
309 tegra_vde_setup_iram_tables(vde, dpb_frames,
310 ctx->dpb_frames_nb - 1,
311 ctx->dpb_ref_frames_with_earlier_poc_nb);
312
313 /*
314 * The IRAM mapping is write-combine, ensure that CPU buffers have
315 * been flushed at this point.
316 */
317 wmb();
318
319 tegra_vde_writel(vde, 0x00000000, vde->bsev, 0x8C);
320 tegra_vde_writel(vde, bitstream_data_addr + bitstream_data_size,
321 vde->bsev, 0x54);
322
323 vde->bitstream_data_addr = bitstream_data_addr;
324
325 value = ctx->pic_width_in_mbs << 11 | ctx->pic_height_in_mbs << 3;
326
327 tegra_vde_writel(vde, value, vde->bsev, 0x88);
328
329 err = tegra_vde_wait_bsev(vde, false);
330 if (err)
331 return err;
332
333 err = tegra_vde_push_to_bsev_icmdqueue(vde, 0x800003FC, false);
334 if (err)
335 return err;
336
337 value = 0x01500000;
338 value |= ((vde->iram_lists_addr + 512) >> 2) & 0xFFFF;
339
340 err = tegra_vde_push_to_bsev_icmdqueue(vde, value, true);
341 if (err)
342 return err;
343
344 err = tegra_vde_push_to_bsev_icmdqueue(vde, 0x840F054C, false);
345 if (err)
346 return err;
347
348 err = tegra_vde_push_to_bsev_icmdqueue(vde, 0x80000080, false);
349 if (err)
350 return err;
351
352 value = 0x0E340000 | ((vde->iram_lists_addr >> 2) & 0xFFFF);
353
354 err = tegra_vde_push_to_bsev_icmdqueue(vde, value, true);
355 if (err)
356 return err;
357
358 value = 0x00800005;
359 value |= ctx->pic_width_in_mbs << 11;
360 value |= ctx->pic_height_in_mbs << 3;
361
362 tegra_vde_writel(vde, value, vde->sxe, 0x10);
363
364 value = !ctx->baseline_profile << 17;
365 value |= ctx->level_idc << 13;
366 value |= ctx->log2_max_pic_order_cnt_lsb << 7;
367 value |= ctx->pic_order_cnt_type << 5;
368 value |= ctx->log2_max_frame_num;
369
370 tegra_vde_writel(vde, value, vde->sxe, 0x40);
371
372 value = ctx->pic_init_qp << 25;
373 value |= !!(ctx->deblocking_filter_control_present_flag) << 2;
374 value |= !!ctx->pic_order_present_flag;
375
376 tegra_vde_writel(vde, value, vde->sxe, 0x44);
377
378 value = ctx->chroma_qp_index_offset;
379 value |= ctx->num_ref_idx_l0_active_minus1 << 5;
380 value |= ctx->num_ref_idx_l1_active_minus1 << 10;
381 value |= !!ctx->constrained_intra_pred_flag << 15;
382
383 tegra_vde_writel(vde, value, vde->sxe, 0x48);
384
385 value = 0x0C000000;
386 value |= !!(dpb_frames[0].flags & FLAG_B_FRAME) << 24;
387
388 tegra_vde_writel(vde, value, vde->sxe, 0x4C);
389
390 value = 0x03800000;
391 value |= bitstream_data_size & GENMASK(19, 15);
392
393 tegra_vde_writel(vde, value, vde->sxe, 0x68);
394
395 tegra_vde_writel(vde, bitstream_data_addr, vde->sxe, 0x6C);
396
397 if (vde->soc->supports_ref_pic_marking)
398 tegra_vde_writel(vde, vde->secure_bo->dma_addr, vde->sxe, 0x7c);
399
400 value = 0x10000005;
401 value |= ctx->pic_width_in_mbs << 11;
402 value |= ctx->pic_height_in_mbs << 3;
403
404 tegra_vde_writel(vde, value, vde->mbe, 0x80);
405
406 value = 0x26800000;
407 value |= ctx->level_idc << 4;
408 value |= !ctx->baseline_profile << 1;
409 value |= !!ctx->direct_8x8_inference_flag;
410
411 tegra_vde_writel(vde, value, vde->mbe, 0x80);
412
413 tegra_vde_writel(vde, 0xF4000001, vde->mbe, 0x80);
414 tegra_vde_writel(vde, 0x20000000, vde->mbe, 0x80);
415 tegra_vde_writel(vde, 0xF4000101, vde->mbe, 0x80);
416
417 value = 0x20000000;
418 value |= ctx->chroma_qp_index_offset << 8;
419
420 tegra_vde_writel(vde, value, vde->mbe, 0x80);
421
422 err = tegra_vde_setup_mbe_frame_idx(vde,
423 ctx->dpb_frames_nb - 1,
424 ctx->pic_order_cnt_type == 0);
425 if (err) {
426 dev_err(dev, "MBE frames setup failed %d\n", err);
427 return err;
428 }
429
430 tegra_vde_mbe_set_0xa_reg(vde, 0, 0x000009FC);
431 tegra_vde_mbe_set_0xa_reg(vde, 2, 0x61DEAD00);
432 tegra_vde_mbe_set_0xa_reg(vde, 4, 0x62DEAD00);
433 tegra_vde_mbe_set_0xa_reg(vde, 6, 0x63DEAD00);
434 tegra_vde_mbe_set_0xa_reg(vde, 8, dpb_frames[0].aux_addr);
435
436 value = 0xFC000000;
437 value |= !!(dpb_frames[0].flags & FLAG_B_FRAME) << 2;
438
439 if (!ctx->baseline_profile)
440 value |= !!(dpb_frames[0].flags & FLAG_REFERENCE) << 1;
441
442 tegra_vde_writel(vde, value, vde->mbe, 0x80);
443
444 err = tegra_vde_wait_mbe(vde);
445 if (err) {
446 dev_err(dev, "MBE programming failed %d\n", err);
447 return err;
448 }
449
450 return 0;
451 }
452
tegra_vde_decode_frame(struct tegra_vde * vde,unsigned int macroblocks_nb)453 static void tegra_vde_decode_frame(struct tegra_vde *vde,
454 unsigned int macroblocks_nb)
455 {
456 reinit_completion(&vde->decode_completion);
457
458 tegra_vde_writel(vde, 0x00000001, vde->bsev, 0x8C);
459 tegra_vde_writel(vde, 0x20000000 | (macroblocks_nb - 1),
460 vde->sxe, 0x00);
461 }
462
tegra_vde_validate_h264_ctx(struct device * dev,struct tegra_vde_h264_decoder_ctx * ctx)463 static int tegra_vde_validate_h264_ctx(struct device *dev,
464 struct tegra_vde_h264_decoder_ctx *ctx)
465 {
466 if (ctx->dpb_frames_nb == 0 || ctx->dpb_frames_nb > 17) {
467 dev_err(dev, "Bad DPB size %u\n", ctx->dpb_frames_nb);
468 return -EINVAL;
469 }
470
471 if (ctx->level_idc > 15) {
472 dev_err(dev, "Bad level value %u\n", ctx->level_idc);
473 return -EINVAL;
474 }
475
476 if (ctx->pic_init_qp > 52) {
477 dev_err(dev, "Bad pic_init_qp value %u\n", ctx->pic_init_qp);
478 return -EINVAL;
479 }
480
481 if (ctx->log2_max_pic_order_cnt_lsb > 16) {
482 dev_err(dev, "Bad log2_max_pic_order_cnt_lsb value %u\n",
483 ctx->log2_max_pic_order_cnt_lsb);
484 return -EINVAL;
485 }
486
487 if (ctx->log2_max_frame_num > 16) {
488 dev_err(dev, "Bad log2_max_frame_num value %u\n",
489 ctx->log2_max_frame_num);
490 return -EINVAL;
491 }
492
493 if (ctx->chroma_qp_index_offset > 31) {
494 dev_err(dev, "Bad chroma_qp_index_offset value %u\n",
495 ctx->chroma_qp_index_offset);
496 return -EINVAL;
497 }
498
499 if (ctx->pic_order_cnt_type > 2) {
500 dev_err(dev, "Bad pic_order_cnt_type value %u\n",
501 ctx->pic_order_cnt_type);
502 return -EINVAL;
503 }
504
505 if (ctx->num_ref_idx_l0_active_minus1 > 15) {
506 dev_err(dev, "Bad num_ref_idx_l0_active_minus1 value %u\n",
507 ctx->num_ref_idx_l0_active_minus1);
508 return -EINVAL;
509 }
510
511 if (ctx->num_ref_idx_l1_active_minus1 > 15) {
512 dev_err(dev, "Bad num_ref_idx_l1_active_minus1 value %u\n",
513 ctx->num_ref_idx_l1_active_minus1);
514 return -EINVAL;
515 }
516
517 if (!ctx->pic_width_in_mbs || ctx->pic_width_in_mbs > 127) {
518 dev_err(dev, "Bad pic_width_in_mbs value %u\n",
519 ctx->pic_width_in_mbs);
520 return -EINVAL;
521 }
522
523 if (!ctx->pic_height_in_mbs || ctx->pic_height_in_mbs > 127) {
524 dev_err(dev, "Bad pic_height_in_mbs value %u\n",
525 ctx->pic_height_in_mbs);
526 return -EINVAL;
527 }
528
529 return 0;
530 }
531
tegra_vde_decode_begin(struct tegra_vde * vde,struct tegra_vde_h264_decoder_ctx * ctx,struct tegra_video_frame * dpb_frames,dma_addr_t bitstream_data_addr,size_t bitstream_data_size)532 static int tegra_vde_decode_begin(struct tegra_vde *vde,
533 struct tegra_vde_h264_decoder_ctx *ctx,
534 struct tegra_video_frame *dpb_frames,
535 dma_addr_t bitstream_data_addr,
536 size_t bitstream_data_size)
537 {
538 struct device *dev = vde->dev;
539 unsigned int macroblocks_nb;
540 int err;
541
542 err = mutex_lock_interruptible(&vde->lock);
543 if (err)
544 return err;
545
546 err = pm_runtime_resume_and_get(dev);
547 if (err < 0)
548 goto unlock;
549
550 /*
551 * We rely on the VDE registers reset value, otherwise VDE
552 * causes bus lockup.
553 */
554 err = reset_control_assert(vde->rst_mc);
555 if (err) {
556 dev_err(dev, "DEC start: Failed to assert MC reset: %d\n",
557 err);
558 goto put_runtime_pm;
559 }
560
561 err = reset_control_reset(vde->rst);
562 if (err) {
563 dev_err(dev, "DEC start: Failed to reset HW: %d\n", err);
564 goto put_runtime_pm;
565 }
566
567 err = reset_control_deassert(vde->rst_mc);
568 if (err) {
569 dev_err(dev, "DEC start: Failed to deassert MC reset: %d\n",
570 err);
571 goto put_runtime_pm;
572 }
573
574 macroblocks_nb = ctx->pic_width_in_mbs * ctx->pic_height_in_mbs;
575
576 err = tegra_vde_setup_hw_context(vde, ctx, dpb_frames,
577 bitstream_data_addr,
578 bitstream_data_size,
579 macroblocks_nb);
580 if (err)
581 goto put_runtime_pm;
582
583 tegra_vde_decode_frame(vde, macroblocks_nb);
584
585 return 0;
586
587 put_runtime_pm:
588 pm_runtime_mark_last_busy(dev);
589 pm_runtime_put_autosuspend(dev);
590
591 unlock:
592 mutex_unlock(&vde->lock);
593
594 return err;
595 }
596
tegra_vde_decode_abort(struct tegra_vde * vde)597 static void tegra_vde_decode_abort(struct tegra_vde *vde)
598 {
599 struct device *dev = vde->dev;
600 int err;
601
602 /*
603 * At first reset memory client to avoid resetting VDE HW in the
604 * middle of DMA which could result into memory corruption or hang
605 * the whole system.
606 */
607 err = reset_control_assert(vde->rst_mc);
608 if (err)
609 dev_err(dev, "DEC end: Failed to assert MC reset: %d\n", err);
610
611 err = reset_control_assert(vde->rst);
612 if (err)
613 dev_err(dev, "DEC end: Failed to assert HW reset: %d\n", err);
614
615 pm_runtime_mark_last_busy(dev);
616 pm_runtime_put_autosuspend(dev);
617
618 mutex_unlock(&vde->lock);
619 }
620
tegra_vde_decode_end(struct tegra_vde * vde)621 static int tegra_vde_decode_end(struct tegra_vde *vde)
622 {
623 unsigned int read_bytes, macroblocks_nb;
624 struct device *dev = vde->dev;
625 dma_addr_t bsev_ptr;
626 long time_left;
627 int ret;
628
629 time_left = wait_for_completion_interruptible_timeout(
630 &vde->decode_completion, msecs_to_jiffies(1000));
631 if (time_left < 0) {
632 ret = time_left;
633 } else if (time_left == 0) {
634 bsev_ptr = tegra_vde_readl(vde, vde->bsev, 0x10);
635 macroblocks_nb = tegra_vde_readl(vde, vde->sxe, 0xC8) & 0x1FFF;
636 read_bytes = bsev_ptr ? bsev_ptr - vde->bitstream_data_addr : 0;
637
638 dev_err(dev, "Decoding failed: read 0x%X bytes, %u macroblocks parsed\n",
639 read_bytes, macroblocks_nb);
640
641 ret = -EIO;
642 } else {
643 ret = 0;
644 }
645
646 tegra_vde_decode_abort(vde);
647
648 return ret;
649 }
650
get_ref_buf(struct tegra_ctx * ctx,struct vb2_v4l2_buffer * dst,unsigned int dpb_idx)651 static struct vb2_buffer *get_ref_buf(struct tegra_ctx *ctx,
652 struct vb2_v4l2_buffer *dst,
653 unsigned int dpb_idx)
654 {
655 const struct v4l2_h264_dpb_entry *dpb = ctx->h264.decode_params->dpb;
656 struct vb2_queue *cap_q = &ctx->fh.m2m_ctx->cap_q_ctx.q;
657 struct vb2_buffer *vb = NULL;
658
659 if (dpb[dpb_idx].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)
660 vb = vb2_find_buffer(cap_q, dpb[dpb_idx].reference_ts);
661
662 /*
663 * If a DPB entry is unused or invalid, address of current destination
664 * buffer is returned.
665 */
666 if (!vb)
667 return &dst->vb2_buf;
668
669 return vb;
670 }
671
tegra_vde_validate_vb_size(struct tegra_ctx * ctx,struct vb2_buffer * vb,unsigned int plane_id,size_t min_size)672 static int tegra_vde_validate_vb_size(struct tegra_ctx *ctx,
673 struct vb2_buffer *vb,
674 unsigned int plane_id,
675 size_t min_size)
676 {
677 u64 offset = vb->planes[plane_id].data_offset;
678 struct device *dev = ctx->vde->dev;
679
680 if (offset + min_size > vb2_plane_size(vb, plane_id)) {
681 dev_err(dev, "Too small plane[%u] size %lu @0x%llX, should be at least %zu\n",
682 plane_id, vb2_plane_size(vb, plane_id), offset, min_size);
683 return -EINVAL;
684 }
685
686 return 0;
687 }
688
tegra_vde_h264_setup_frame(struct tegra_ctx * ctx,struct tegra_vde_h264_decoder_ctx * h264,struct v4l2_h264_reflist_builder * b,struct vb2_buffer * vb,unsigned int ref_id,unsigned int id)689 static int tegra_vde_h264_setup_frame(struct tegra_ctx *ctx,
690 struct tegra_vde_h264_decoder_ctx *h264,
691 struct v4l2_h264_reflist_builder *b,
692 struct vb2_buffer *vb,
693 unsigned int ref_id,
694 unsigned int id)
695 {
696 struct v4l2_pix_format_mplane *pixfmt = &ctx->decoded_fmt.fmt.pix_mp;
697 struct tegra_m2m_buffer *tb = vb_to_tegra_buf(vb);
698 struct tegra_ctx_h264 *h = &ctx->h264;
699 struct tegra_vde *vde = ctx->vde;
700 struct device *dev = vde->dev;
701 unsigned int cstride, lstride;
702 unsigned int flags = 0;
703 size_t lsize, csize;
704 int err, frame_num;
705
706 lsize = h264->pic_width_in_mbs * 16 * h264->pic_height_in_mbs * 16;
707 csize = h264->pic_width_in_mbs * 8 * h264->pic_height_in_mbs * 8;
708 lstride = pixfmt->plane_fmt[0].bytesperline;
709 cstride = pixfmt->plane_fmt[1].bytesperline;
710
711 err = tegra_vde_validate_vb_size(ctx, vb, 0, lsize);
712 if (err)
713 return err;
714
715 err = tegra_vde_validate_vb_size(ctx, vb, 1, csize);
716 if (err)
717 return err;
718
719 err = tegra_vde_validate_vb_size(ctx, vb, 2, csize);
720 if (err)
721 return err;
722
723 if (!tb->aux || tb->aux->size < csize) {
724 dev_err(dev, "Too small aux size %zd, should be at least %zu\n",
725 tb->aux ? tb->aux->size : -1, csize);
726 return -EINVAL;
727 }
728
729 if (id == 0) {
730 frame_num = h->decode_params->frame_num;
731
732 if (h->decode_params->nal_ref_idc)
733 flags |= FLAG_REFERENCE;
734 } else {
735 frame_num = b->refs[ref_id].frame_num;
736 }
737
738 if (tb->b_frame)
739 flags |= FLAG_B_FRAME;
740
741 vde->frames[id].flags = flags;
742 vde->frames[id].y_addr = tb->dma_addr[0];
743 vde->frames[id].cb_addr = tb->dma_addr[1];
744 vde->frames[id].cr_addr = tb->dma_addr[2];
745 vde->frames[id].aux_addr = tb->aux->dma_addr;
746 vde->frames[id].frame_num = frame_num & 0x7fffff;
747 vde->frames[id].luma_atoms_pitch = lstride / VDE_ATOM;
748 vde->frames[id].chroma_atoms_pitch = cstride / VDE_ATOM;
749
750 return 0;
751 }
752
tegra_vde_h264_setup_frames(struct tegra_ctx * ctx,struct tegra_vde_h264_decoder_ctx * h264)753 static int tegra_vde_h264_setup_frames(struct tegra_ctx *ctx,
754 struct tegra_vde_h264_decoder_ctx *h264)
755 {
756 struct vb2_v4l2_buffer *src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
757 struct vb2_v4l2_buffer *dst = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
758 const struct v4l2_h264_dpb_entry *dpb = ctx->h264.decode_params->dpb;
759 struct tegra_m2m_buffer *tb = vb_to_tegra_buf(&dst->vb2_buf);
760 struct tegra_ctx_h264 *h = &ctx->h264;
761 struct v4l2_h264_reflist_builder b;
762 struct v4l2_h264_reference *dpb_id;
763 struct h264_reflists reflists;
764 struct vb2_buffer *ref;
765 unsigned int i;
766 int err;
767
768 /*
769 * Tegra hardware requires information about frame's type, assuming
770 * that frame consists of the same type slices. Userspace must tag
771 * frame's type appropriately.
772 *
773 * Decoding of a non-uniform frames isn't supported by hardware and
774 * require software preprocessing that we don't implement. Decoding
775 * is expected to fail in this case. Such video streams are rare in
776 * practice, so not a big deal.
777 *
778 * If userspace doesn't tell us frame's type, then we will try decode
779 * as-is.
780 */
781 v4l2_m2m_buf_copy_metadata(src, dst, true);
782
783 if (h->decode_params->flags & V4L2_H264_DECODE_PARAM_FLAG_BFRAME)
784 tb->b_frame = true;
785 else
786 tb->b_frame = false;
787
788 err = tegra_vde_h264_setup_frame(ctx, h264, NULL, &dst->vb2_buf, 0,
789 h264->dpb_frames_nb++);
790 if (err)
791 return err;
792
793 if (!(h->decode_params->flags & (V4L2_H264_DECODE_PARAM_FLAG_PFRAME |
794 V4L2_H264_DECODE_PARAM_FLAG_BFRAME)))
795 return 0;
796
797 v4l2_h264_init_reflist_builder(&b, h->decode_params, h->sps, dpb);
798
799 if (h->decode_params->flags & V4L2_H264_DECODE_PARAM_FLAG_BFRAME) {
800 v4l2_h264_build_b_ref_lists(&b, reflists.b0, reflists.b1);
801 dpb_id = reflists.b0;
802 } else {
803 v4l2_h264_build_p_ref_list(&b, reflists.p);
804 dpb_id = reflists.p;
805 }
806
807 for (i = 0; i < b.num_valid; i++) {
808 int dpb_idx = dpb_id[i].index;
809
810 ref = get_ref_buf(ctx, dst, dpb_idx);
811
812 err = tegra_vde_h264_setup_frame(ctx, h264, &b, ref, dpb_idx,
813 h264->dpb_frames_nb++);
814 if (err)
815 return err;
816
817 if (b.refs[dpb_idx].top_field_order_cnt < b.cur_pic_order_count)
818 h264->dpb_ref_frames_with_earlier_poc_nb++;
819 }
820
821 return 0;
822 }
823
to_tegra_vde_h264_level_idc(unsigned int level_idc)824 static unsigned int to_tegra_vde_h264_level_idc(unsigned int level_idc)
825 {
826 switch (level_idc) {
827 case 11:
828 return 2;
829 case 12:
830 return 3;
831 case 13:
832 return 4;
833 case 20:
834 return 5;
835 case 21:
836 return 6;
837 case 22:
838 return 7;
839 case 30:
840 return 8;
841 case 31:
842 return 9;
843 case 32:
844 return 10;
845 case 40:
846 return 11;
847 case 41:
848 return 12;
849 case 42:
850 return 13;
851 case 50:
852 return 14;
853 default:
854 break;
855 }
856
857 return 15;
858 }
859
tegra_vde_h264_setup_context(struct tegra_ctx * ctx,struct tegra_vde_h264_decoder_ctx * h264)860 static int tegra_vde_h264_setup_context(struct tegra_ctx *ctx,
861 struct tegra_vde_h264_decoder_ctx *h264)
862 {
863 struct tegra_ctx_h264 *h = &ctx->h264;
864 struct tegra_vde *vde = ctx->vde;
865 struct device *dev = vde->dev;
866 int err;
867
868 memset(h264, 0, sizeof(*h264));
869 memset(vde->frames, 0, sizeof(vde->frames));
870
871 tegra_vde_prepare_control_data(ctx, V4L2_CID_STATELESS_H264_DECODE_PARAMS);
872 tegra_vde_prepare_control_data(ctx, V4L2_CID_STATELESS_H264_SPS);
873 tegra_vde_prepare_control_data(ctx, V4L2_CID_STATELESS_H264_PPS);
874
875 /* CABAC unsupported by hardware, requires software preprocessing */
876 if (h->pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE)
877 return -EOPNOTSUPP;
878
879 if (h->decode_params->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC)
880 return -EOPNOTSUPP;
881
882 if (h->sps->profile_idc == 66)
883 h264->baseline_profile = 1;
884
885 if (h->sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE)
886 h264->direct_8x8_inference_flag = 1;
887
888 if (h->pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED)
889 h264->constrained_intra_pred_flag = 1;
890
891 if (h->pps->flags & V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT)
892 h264->deblocking_filter_control_present_flag = 1;
893
894 if (h->pps->flags & V4L2_H264_PPS_FLAG_BOTTOM_FIELD_PIC_ORDER_IN_FRAME_PRESENT)
895 h264->pic_order_present_flag = 1;
896
897 h264->level_idc = to_tegra_vde_h264_level_idc(h->sps->level_idc);
898 h264->log2_max_pic_order_cnt_lsb = h->sps->log2_max_pic_order_cnt_lsb_minus4 + 4;
899 h264->log2_max_frame_num = h->sps->log2_max_frame_num_minus4 + 4;
900 h264->pic_order_cnt_type = h->sps->pic_order_cnt_type;
901 h264->pic_width_in_mbs = h->sps->pic_width_in_mbs_minus1 + 1;
902 h264->pic_height_in_mbs = h->sps->pic_height_in_map_units_minus1 + 1;
903
904 h264->num_ref_idx_l0_active_minus1 = h->pps->num_ref_idx_l0_default_active_minus1;
905 h264->num_ref_idx_l1_active_minus1 = h->pps->num_ref_idx_l1_default_active_minus1;
906 h264->chroma_qp_index_offset = h->pps->chroma_qp_index_offset & 0x1f;
907 h264->pic_init_qp = h->pps->pic_init_qp_minus26 + 26;
908
909 err = tegra_vde_h264_setup_frames(ctx, h264);
910 if (err)
911 return err;
912
913 err = tegra_vde_validate_h264_ctx(dev, h264);
914 if (err)
915 return err;
916
917 return 0;
918 }
919
tegra_vde_h264_decode_run(struct tegra_ctx * ctx)920 int tegra_vde_h264_decode_run(struct tegra_ctx *ctx)
921 {
922 struct vb2_v4l2_buffer *src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
923 struct tegra_m2m_buffer *bitstream = vb_to_tegra_buf(&src->vb2_buf);
924 size_t bitstream_size = vb2_get_plane_payload(&src->vb2_buf, 0);
925 struct tegra_vde_h264_decoder_ctx h264;
926 struct tegra_vde *vde = ctx->vde;
927 int err;
928
929 err = tegra_vde_h264_setup_context(ctx, &h264);
930 if (err)
931 return err;
932
933 err = tegra_vde_decode_begin(vde, &h264, vde->frames,
934 bitstream->dma_addr[0],
935 bitstream_size);
936 if (err)
937 return err;
938
939 return 0;
940 }
941
tegra_vde_h264_decode_wait(struct tegra_ctx * ctx)942 int tegra_vde_h264_decode_wait(struct tegra_ctx *ctx)
943 {
944 return tegra_vde_decode_end(ctx->vde);
945 }
946