1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Rockchip RK3288 VPU codec driver
4 *
5 * Copyright (c) 2014 Rockchip Electronics Co., Ltd.
6 * Hertz Wong <hertz.wong@rock-chips.com>
7 * Herman Chen <herman.chen@rock-chips.com>
8 *
9 * Copyright (C) 2014 Google, Inc.
10 * Tomasz Figa <tfiga@chromium.org>
11 */
12
13 #include <linux/types.h>
14 #include <media/v4l2-h264.h>
15 #include <media/v4l2-mem2mem.h>
16
17 #include "hantro.h"
18 #include "hantro_hw.h"
19
20 /* Size with u32 units. */
21 #define CABAC_INIT_BUFFER_SIZE (460 * 2)
22 #define POC_BUFFER_SIZE 34
23 #define SCALING_LIST_SIZE (6 * 16 + 2 * 64)
24
25 /*
26 * For valid and long term reference marking, index are reversed, so bit 31
27 * indicates the status of the picture 0.
28 */
29 #define REF_BIT(i) BIT(32 - 1 - (i))
30
31 /* Data structure describing auxiliary buffer format. */
32 struct hantro_h264_dec_priv_tbl {
33 u32 cabac_table[CABAC_INIT_BUFFER_SIZE];
34 u32 poc[POC_BUFFER_SIZE];
35 u8 scaling_list[SCALING_LIST_SIZE];
36 };
37
38 /*
39 * Constant CABAC table.
40 * From drivers/media/platform/rk3288-vpu/rk3288_vpu_hw_h264d.c
41 * in https://chromium.googlesource.com/chromiumos/third_party/kernel,
42 * chromeos-3.14 branch.
43 */
44 static const u32 h264_cabac_table[] = {
45 0x14f10236, 0x034a14f1, 0x0236034a, 0xe47fe968, 0xfa35ff36, 0x07330000,
46 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
47 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
48 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
49 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
50 0x0029003f, 0x003f003f, 0xf7530456, 0x0061f948, 0x0d29033e, 0x000b0137,
51 0x0045ef7f, 0xf3660052, 0xf94aeb6b, 0xe57fe17f, 0xe87fee5f, 0xe57feb72,
52 0xe27fef7b, 0xf473f07a, 0xf573f43f, 0xfe44f154, 0xf368fd46, 0xf85df65a,
53 0xe27fff4a, 0xfa61f95b, 0xec7ffc38, 0xfb52f94c, 0xea7df95d, 0xf557fd4d,
54 0xfb47fc3f, 0xfc44f454, 0xf93ef941, 0x083d0538, 0xfe420140, 0x003dfe4e,
55 0x01320734, 0x0a23002c, 0x0b26012d, 0x002e052c, 0x1f110133, 0x07321c13,
56 0x10210e3e, 0xf36cf164, 0xf365f35b, 0xf45ef658, 0xf054f656, 0xf953f357,
57 0xed5e0146, 0x0048fb4a, 0x123bf866, 0xf164005f, 0xfc4b0248, 0xf54bfd47,
58 0x0f2ef345, 0x003e0041, 0x1525f148, 0x09391036, 0x003e0c48, 0x18000f09,
59 0x08190d12, 0x0f090d13, 0x0a250c12, 0x061d1421, 0x0f1e042d, 0x013a003e,
60 0x073d0c26, 0x0b2d0f27, 0x0b2a0d2c, 0x102d0c29, 0x0a311e22, 0x122a0a37,
61 0x1133112e, 0x00591aed, 0x16ef1aef, 0x1ee71cec, 0x21e925e5, 0x21e928e4,
62 0x26ef21f5, 0x28f129fa, 0x26012911, 0x1efa1b03, 0x1a1625f0, 0x23fc26f8,
63 0x26fd2503, 0x26052a00, 0x23102716, 0x0e301b25, 0x153c0c44, 0x0261fd47,
64 0xfa2afb32, 0xfd36fe3e, 0x003a013f, 0xfe48ff4a, 0xf75bfb43, 0xfb1bfd27,
65 0xfe2c002e, 0xf040f844, 0xf64efa4d, 0xf656f45c, 0xf137f63c, 0xfa3efc41,
66 0xf449f84c, 0xf950f758, 0xef6ef561, 0xec54f54f, 0xfa49fc4a, 0xf356f360,
67 0xf561ed75, 0xf84efb21, 0xfc30fe35, 0xfd3ef347, 0xf64ff456, 0xf35af261,
68 0x0000fa5d, 0xfa54f84f, 0x0042ff47, 0x003efe3c, 0xfe3bfb4b, 0xfd3efc3a,
69 0xf742ff4f, 0x00470344, 0x0a2cf93e, 0x0f240e28, 0x101b0c1d, 0x012c1424,
70 0x1220052a, 0x01300a3e, 0x112e0940, 0xf468f561, 0xf060f958, 0xf855f955,
71 0xf755f358, 0x0442fd4d, 0xfd4cfa4c, 0x0a3aff4c, 0xff53f963, 0xf25f025f,
72 0x004cfb4a, 0x0046f54b, 0x01440041, 0xf249033e, 0x043eff44, 0xf34b0b37,
73 0x05400c46, 0x0f060613, 0x07100c0e, 0x120d0d0b, 0x0d0f0f10, 0x0c170d17,
74 0x0f140e1a, 0x0e2c1128, 0x112f1811, 0x15151916, 0x1f1b161d, 0x13230e32,
75 0x0a39073f, 0xfe4dfc52, 0xfd5e0945, 0xf46d24dd, 0x24de20e6, 0x25e22ce0,
76 0x22ee22f1, 0x28f121f9, 0x23fb2100, 0x2602210d, 0x17230d3a, 0x1dfd1a00,
77 0x161e1ff9, 0x23f122fd, 0x220324ff, 0x2205200b, 0x2305220c, 0x270b1e1d,
78 0x221a1d27, 0x13421f15, 0x1f1f1932, 0xef78ec70, 0xee72f555, 0xf15cf259,
79 0xe647f151, 0xf2500044, 0xf246e838, 0xe944e832, 0xf54a17f3, 0x1af328f1,
80 0x31f22c03, 0x2d062c22, 0x21361352, 0xfd4bff17, 0x0122012b, 0x0036fe37,
81 0x003d0140, 0x0044f75c, 0xf26af361, 0xf15af45a, 0xee58f649, 0xf74ff256,
82 0xf649f646, 0xf645fb42, 0xf740fb3a, 0x023b15f6, 0x18f51cf8, 0x1cff1d03,
83 0x1d092314, 0x1d240e43, 0x14f10236, 0x034a14f1, 0x0236034a, 0xe47fe968,
84 0xfa35ff36, 0x07331721, 0x17021500, 0x01090031, 0xdb760539, 0xf34ef541,
85 0x013e0c31, 0xfc491132, 0x1240092b, 0x1d001a43, 0x105a0968, 0xd27fec68,
86 0x0143f34e, 0xf541013e, 0xfa56ef5f, 0xfa3d092d, 0xfd45fa51, 0xf5600637,
87 0x0743fb56, 0x0258003a, 0xfd4cf65e, 0x05360445, 0xfd510058, 0xf943fb4a,
88 0xfc4afb50, 0xf948013a, 0x0029003f, 0x003f003f, 0xf7530456, 0x0061f948,
89 0x0d29033e, 0x002dfc4e, 0xfd60e57e, 0xe462e765, 0xe943e452, 0xec5ef053,
90 0xea6eeb5b, 0xee66f35d, 0xe37ff95c, 0xfb59f960, 0xf36cfd2e, 0xff41ff39,
91 0xf75dfd4a, 0xf75cf857, 0xe97e0536, 0x063c063b, 0x0645ff30, 0x0044fc45,
92 0xf858fe55, 0xfa4eff4b, 0xf94d0236, 0x0532fd44, 0x0132062a, 0xfc51013f,
93 0xfc460043, 0x0239fe4c, 0x0b230440, 0x013d0b23, 0x12190c18, 0x0d1d0d24,
94 0xf65df949, 0xfe490d2e, 0x0931f964, 0x09350235, 0x0535fe3d, 0x00380038,
95 0xf33ffb3c, 0xff3e0439, 0xfa450439, 0x0e270433, 0x0d440340, 0x013d093f,
96 0x07321027, 0x052c0434, 0x0b30fb3c, 0xff3b003b, 0x1621052c, 0x0e2bff4e,
97 0x003c0945, 0x0b1c0228, 0x032c0031, 0x002e022c, 0x0233002f, 0x0427023e,
98 0x062e0036, 0x0336023a, 0x043f0633, 0x06390735, 0x06340637, 0x0b2d0e24,
99 0x0835ff52, 0x0737fd4e, 0x0f2e161f, 0xff541907, 0x1ef91c03, 0x1c042000,
100 0x22ff1e06, 0x1e062009, 0x1f131a1b, 0x1a1e2514, 0x1c221146, 0x0143053b,
101 0x0943101e, 0x12201223, 0x161d181f, 0x1726122b, 0x14290b3f, 0x093b0940,
102 0xff5efe59, 0xf76cfa4c, 0xfe2c002d, 0x0034fd40, 0xfe3bfc46, 0xfc4bf852,
103 0xef66f74d, 0x0318002a, 0x00300037, 0xfa3bf947, 0xf453f557, 0xe277013a,
104 0xfd1dff24, 0x0126022b, 0xfa37003a, 0x0040fd4a, 0xf65a0046, 0xfc1d051f,
105 0x072a013b, 0xfe3afd48, 0xfd51f561, 0x003a0805, 0x0a0e0e12, 0x0d1b0228,
106 0x003afd46, 0xfa4ff855, 0x0000f36a, 0xf06af657, 0xeb72ee6e, 0xf262ea6e,
107 0xeb6aee67, 0xeb6be96c, 0xe670f660, 0xf45ffb5b, 0xf75dea5e, 0xfb560943,
108 0xfc50f655, 0xff46073c, 0x093a053d, 0x0c320f32, 0x12311136, 0x0a29072e,
109 0xff330731, 0x08340929, 0x062f0237, 0x0d290a2c, 0x06320535, 0x0d31043f,
110 0x0640fe45, 0xfe3b0646, 0x0a2c091f, 0x0c2b0335, 0x0e220a26, 0xfd340d28,
111 0x1120072c, 0x07260d32, 0x0a391a2b, 0x0e0b0b0e, 0x090b120b, 0x150917fe,
112 0x20f120f1, 0x22eb27e9, 0x2adf29e1, 0x2ee426f4, 0x151d2de8, 0x35d330e6,
113 0x41d52bed, 0x27f61e09, 0x121a141b, 0x0039f252, 0xfb4bed61, 0xdd7d1b00,
114 0x1c001ffc, 0x1b062208, 0x1e0a1816, 0x21131620, 0x1a1f1529, 0x1a2c172f,
115 0x10410e47, 0x083c063f, 0x11411518, 0x17141a17, 0x1b201c17, 0x1c181728,
116 0x18201c1d, 0x172a1339, 0x1635163d, 0x0b560c28, 0x0b330e3b, 0xfc4ff947,
117 0xfb45f746, 0xf842f644, 0xed49f445, 0xf046f143, 0xec3eed46, 0xf042ea41,
118 0xec3f09fe, 0x1af721f7, 0x27f929fe, 0x2d033109, 0x2d1b243b, 0xfa42f923,
119 0xf92af82d, 0xfb30f438, 0xfa3cfb3e, 0xf842f84c, 0xfb55fa51, 0xf64df951,
120 0xef50ee49, 0xfc4af653, 0xf747f743, 0xff3df842, 0xf242003b, 0x023b15f3,
121 0x21f227f9, 0x2efe3302, 0x3c063d11, 0x37222a3e, 0x14f10236, 0x034a14f1,
122 0x0236034a, 0xe47fe968, 0xfa35ff36, 0x07331619, 0x22001000, 0xfe090429,
123 0xe3760241, 0xfa47f34f, 0x05340932, 0xfd460a36, 0x1a221316, 0x28003902,
124 0x29241a45, 0xd37ff165, 0xfc4cfa47, 0xf34f0534, 0x0645f35a, 0x0034082b,
125 0xfe45fb52, 0xf660023b, 0x024bfd57, 0xfd640138, 0xfd4afa55, 0x003bfd51,
126 0xf956fb5f, 0xff42ff4d, 0x0146fe56, 0xfb48003d, 0x0029003f, 0x003f003f,
127 0xf7530456, 0x0061f948, 0x0d29033e, 0x0d0f0733, 0x0250d97f, 0xee5bef60,
128 0xe651dd62, 0xe866e961, 0xe577e863, 0xeb6eee66, 0xdc7f0050, 0xfb59f95e,
129 0xfc5c0027, 0x0041f154, 0xdd7ffe49, 0xf468f75b, 0xe17f0337, 0x07380737,
130 0x083dfd35, 0x0044f94a, 0xf758f367, 0xf35bf759, 0xf25cf84c, 0xf457e96e,
131 0xe869f64e, 0xec70ef63, 0xb27fba7f, 0xce7fd27f, 0xfc42fb4e, 0xfc47f848,
132 0x023bff37, 0xf946fa4b, 0xf859de77, 0xfd4b2014, 0x1e16d47f, 0x0036fb3d,
133 0x003aff3c, 0xfd3df843, 0xe754f24a, 0xfb410534, 0x0239003d, 0xf745f546,
134 0x1237fc47, 0x003a073d, 0x09291219, 0x0920052b, 0x092f002c, 0x0033022e,
135 0x1326fc42, 0x0f260c2a, 0x09220059, 0x042d0a1c, 0x0a1f21f5, 0x34d5120f,
136 0x1c0023ea, 0x26e72200, 0x27ee20f4, 0x66a20000, 0x38f121fc, 0x1d0a25fb,
137 0x33e327f7, 0x34de45c6, 0x43c12cfb, 0x200737e3, 0x20010000, 0x1b2421e7,
138 0x22e224e4, 0x26e426e5, 0x22ee23f0, 0x22f220f8, 0x25fa2300, 0x1e0a1c12,
139 0x1a191d29, 0x004b0248, 0x084d0e23, 0x121f1123, 0x151e112d, 0x142a122d,
140 0x1b1a1036, 0x07421038, 0x0b490a43, 0xf674e970, 0xf147f93d, 0x0035fb42,
141 0xf54df750, 0xf754f657, 0xde7feb65, 0xfd27fb35, 0xf93df54b, 0xf14def5b,
142 0xe76be76f, 0xe47af54c, 0xf62cf634, 0xf639f73a, 0xf048f945, 0xfc45fb4a,
143 0xf7560242, 0xf7220120, 0x0b1f0534, 0xfe37fe43, 0x0049f859, 0x03340704,
144 0x0a081108, 0x10130325, 0xff3dfb49, 0xff46fc4e, 0x0000eb7e, 0xe97cec6e,
145 0xe67ee77c, 0xef69e579, 0xe575ef66, 0xe675e574, 0xdf7af65f, 0xf264f85f,
146 0xef6fe472, 0xfa59fe50, 0xfc52f755, 0xf851ff48, 0x05400143, 0x09380045,
147 0x01450745, 0xf945fa43, 0xf04dfe40, 0x023dfa43, 0xfd400239, 0xfd41fd42,
148 0x003e0933, 0xff42fe47, 0xfe4bff46, 0xf7480e3c, 0x1025002f, 0x12230b25,
149 0x0c290a29, 0x02300c29, 0x0d29003b, 0x03321328, 0x03421232, 0x13fa12fa,
150 0x0e001af4, 0x1ff021e7, 0x21ea25e4, 0x27e22ae2, 0x2fd62ddc, 0x31de29ef,
151 0x200945b9, 0x3fc142c0, 0x4db636d9, 0x34dd29f6, 0x240028ff, 0x1e0e1c1a,
152 0x17250c37, 0x0b4125df, 0x27dc28db, 0x26e22edf, 0x2ae228e8, 0x31e326f4,
153 0x28f626fd, 0x2efb1f14, 0x1d1e192c, 0x0c300b31, 0x1a2d1616, 0x17161b15,
154 0x21141a1c, 0x1e181b22, 0x122a1927, 0x12320c46, 0x15360e47, 0x0b531920,
155 0x15311536, 0xfb55fa51, 0xf64df951, 0xef50ee49, 0xfc4af653, 0xf747f743,
156 0xff3df842, 0xf242003b, 0x023b11f6, 0x20f32af7, 0x31fb3500, 0x4003440a,
157 0x421b2f39, 0xfb470018, 0xff24fe2a, 0xfe34f739, 0xfa3ffc41, 0xfc43f952,
158 0xfd51fd4c, 0xf948fa4e, 0xf448f244, 0xfd46fa4c, 0xfb42fb3e, 0x0039fc3d,
159 0xf73c0136, 0x023a11f6, 0x20f32af7, 0x31fb3500, 0x4003440a, 0x421b2f39,
160 0x14f10236, 0x034a14f1, 0x0236034a, 0xe47fe968, 0xfa35ff36, 0x07331d10,
161 0x19000e00, 0xf633fd3e, 0xe5631a10, 0xfc55e866, 0x05390639, 0xef490e39,
162 0x1428140a, 0x1d003600, 0x252a0c61, 0xe07fea75, 0xfe4afc55, 0xe8660539,
163 0xfa5df258, 0xfa2c0437, 0xf559f167, 0xeb741339, 0x143a0454, 0x0660013f,
164 0xfb55f36a, 0x053f064b, 0xfd5aff65, 0x0337fc4f, 0xfe4bf461, 0xf932013c,
165 0x0029003f, 0x003f003f, 0xf7530456, 0x0061f948, 0x0d29033e, 0x0722f758,
166 0xec7fdc7f, 0xef5bf25f, 0xe754e756, 0xf459ef5b, 0xe17ff24c, 0xee67f35a,
167 0xdb7f0b50, 0x054c0254, 0x054efa37, 0x043df253, 0xdb7ffb4f, 0xf568f55b,
168 0xe27f0041, 0xfe4f0048, 0xfc5cfa38, 0x0344f847, 0xf362fc56, 0xf458fb52,
169 0xfd48fc43, 0xf848f059, 0xf745ff3b, 0x05420439, 0xfc47fe47, 0x023aff4a,
170 0xfc2cff45, 0x003ef933, 0xfc2ffa2a, 0xfd29fa35, 0x084cf74e, 0xf5530934,
171 0x0043fb5a, 0x0143f148, 0xfb4bf850, 0xeb53eb40, 0xf31fe740, 0xe35e094b,
172 0x113ff84a, 0xfb23fe1b, 0x0d5b0341, 0xf945084d, 0xf642033e, 0xfd44ec51,
173 0x001e0107, 0xfd17eb4a, 0x1042e97c, 0x11252cee, 0x32deea7f, 0x0427002a,
174 0x07220b1d, 0x081f0625, 0x072a0328, 0x08210d2b, 0x0d24042f, 0x0337023a,
175 0x063c082c, 0x0b2c0e2a, 0x07300438, 0x04340d25, 0x0931133a, 0x0a300c2d,
176 0x00451421, 0x083f23ee, 0x21e71cfd, 0x180a1b00, 0x22f234d4, 0x27e81311,
177 0x1f19241d, 0x1821220f, 0x1e141649, 0x1422131f, 0x1b2c1310, 0x0f240f24,
178 0x151c1915, 0x1e141f0c, 0x1b10182a, 0x005d0e38, 0x0f391a26, 0xe87fe873,
179 0xea52f73e, 0x0035003b, 0xf255f359, 0xf35ef55c, 0xe37feb64, 0xf239f443,
180 0xf547f64d, 0xeb55f058, 0xe968f162, 0xdb7ff652, 0xf830f83d, 0xf842f946,
181 0xf24bf64f, 0xf753f45c, 0xee6cfc4f, 0xea45f04b, 0xfe3a013a, 0xf34ef753,
182 0xfc51f363, 0xf351fa26, 0xf33efa3a, 0xfe3bf049, 0xf64cf356, 0xf753f657,
183 0x0000ea7f, 0xe77fe778, 0xe57fed72, 0xe975e776, 0xe675e871, 0xe476e178,
184 0xdb7cf65e, 0xf166f663, 0xf36ace7f, 0xfb5c1139, 0xfb56f35e, 0xf45bfe4d,
185 0x0047ff49, 0x0440f951, 0x05400f39, 0x01430044, 0xf6430144, 0x004d0240,
186 0x0044fb4e, 0x0737053b, 0x02410e36, 0x0f2c053c, 0x0246fe4c, 0xee560c46,
187 0x0540f446, 0x0b370538, 0x00450241, 0xfa4a0536, 0x0736fa4c, 0xf552fe4d,
188 0xfe4d192a, 0x11f310f7, 0x11f41beb, 0x25e229d8, 0x2ad730d1, 0x27e02ed8,
189 0x34cd2ed7, 0x34d92bed, 0x200b3dc9, 0x38d23ece, 0x51bd2dec, 0x23fe1c0f,
190 0x22012701, 0x1e111426, 0x122d0f36, 0x004f24f0, 0x25f225ef, 0x2001220f,
191 0x1d0f1819, 0x22161f10, 0x23121f1c, 0x2129241c, 0x1b2f153e, 0x121f131a,
192 0x24181817, 0x1b10181e, 0x1f1d1629, 0x162a103c, 0x0f340e3c, 0x034ef07b,
193 0x15351638, 0x193d1521, 0x1332113d, 0xfd4ef84a, 0xf748f648, 0xee4bf447,
194 0xf53ffb46, 0xef4bf248, 0xf043f835, 0xf23bf734, 0xf54409fe, 0x1ef61ffc,
195 0x21ff2107, 0x1f0c2517, 0x1f261440, 0xf747f925, 0xf82cf531, 0xf638f43b,
196 0xf83ff743, 0xfa44f64f, 0xfd4ef84a, 0xf748f648, 0xee4bf447, 0xf53ffb46,
197 0xef4bf248, 0xf043f835, 0xf23bf734, 0xf54409fe, 0x1ef61ffc, 0x21ff2107,
198 0x1f0c2517, 0x1f261440
199 };
200
201 static void
assemble_scaling_list(struct hantro_ctx * ctx)202 assemble_scaling_list(struct hantro_ctx *ctx)
203 {
204 const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls;
205 const struct v4l2_ctrl_h264_scaling_matrix *scaling = ctrls->scaling;
206 const struct v4l2_ctrl_h264_pps *pps = ctrls->pps;
207 const size_t num_list_4x4 = ARRAY_SIZE(scaling->scaling_list_4x4);
208 const size_t list_len_4x4 = ARRAY_SIZE(scaling->scaling_list_4x4[0]);
209 const size_t list_len_8x8 = ARRAY_SIZE(scaling->scaling_list_8x8[0]);
210 struct hantro_h264_dec_priv_tbl *tbl = ctx->h264_dec.priv.cpu;
211 u32 *dst = (u32 *)tbl->scaling_list;
212 const u32 *src;
213 int i, j;
214
215 if (!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT))
216 return;
217
218 for (i = 0; i < num_list_4x4; i++) {
219 src = (u32 *)&scaling->scaling_list_4x4[i];
220 for (j = 0; j < list_len_4x4 / 4; j++)
221 *dst++ = swab32(src[j]);
222 }
223
224 /* Only Intra/Inter Y lists */
225 for (i = 0; i < 2; i++) {
226 src = (u32 *)&scaling->scaling_list_8x8[i];
227 for (j = 0; j < list_len_8x8 / 4; j++)
228 *dst++ = swab32(src[j]);
229 }
230 }
231
prepare_table(struct hantro_ctx * ctx)232 static void prepare_table(struct hantro_ctx *ctx)
233 {
234 const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls;
235 const struct v4l2_ctrl_h264_decode_params *dec_param = ctrls->decode;
236 const struct v4l2_ctrl_h264_sps *sps = ctrls->sps;
237 struct hantro_h264_dec_priv_tbl *tbl = ctx->h264_dec.priv.cpu;
238 const struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb;
239 u32 dpb_longterm = 0;
240 u32 dpb_valid = 0;
241 int i;
242
243 for (i = 0; i < HANTRO_H264_DPB_SIZE; ++i) {
244 tbl->poc[i * 2] = dpb[i].top_field_order_cnt;
245 tbl->poc[i * 2 + 1] = dpb[i].bottom_field_order_cnt;
246
247 if (!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_VALID))
248 continue;
249
250 /*
251 * Set up bit maps of valid and long term DPBs.
252 * NOTE: The bits are reversed, i.e. MSb is DPB 0. For frame
253 * decoding, bit 31 to 15 are used, while for field decoding,
254 * all bits are used, with bit 31 being a top field, 30 a bottom
255 * field and so on.
256 */
257 if (dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) {
258 if (dpb[i].fields & V4L2_H264_TOP_FIELD_REF)
259 dpb_valid |= REF_BIT(i * 2);
260
261 if (dpb[i].fields & V4L2_H264_BOTTOM_FIELD_REF)
262 dpb_valid |= REF_BIT(i * 2 + 1);
263
264 if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) {
265 dpb_longterm |= REF_BIT(i * 2);
266 dpb_longterm |= REF_BIT(i * 2 + 1);
267 }
268 } else {
269 dpb_valid |= REF_BIT(i);
270
271 if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)
272 dpb_longterm |= REF_BIT(i);
273 }
274 }
275 ctx->h264_dec.dpb_valid = dpb_valid;
276 ctx->h264_dec.dpb_longterm = dpb_longterm;
277
278 if ((dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) ||
279 !(sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)) {
280 tbl->poc[32] = ctx->h264_dec.cur_poc;
281 tbl->poc[33] = 0;
282 } else {
283 tbl->poc[32] = dec_param->top_field_order_cnt;
284 tbl->poc[33] = dec_param->bottom_field_order_cnt;
285 }
286
287 assemble_scaling_list(ctx);
288 }
289
dpb_entry_match(const struct v4l2_h264_dpb_entry * a,const struct v4l2_h264_dpb_entry * b)290 static bool dpb_entry_match(const struct v4l2_h264_dpb_entry *a,
291 const struct v4l2_h264_dpb_entry *b)
292 {
293 return a->reference_ts == b->reference_ts;
294 }
295
update_dpb(struct hantro_ctx * ctx)296 static void update_dpb(struct hantro_ctx *ctx)
297 {
298 const struct v4l2_ctrl_h264_decode_params *dec_param;
299 DECLARE_BITMAP(new, ARRAY_SIZE(dec_param->dpb)) = { 0, };
300 DECLARE_BITMAP(used, ARRAY_SIZE(dec_param->dpb)) = { 0, };
301 unsigned int i, j;
302
303 dec_param = ctx->h264_dec.ctrls.decode;
304
305 /* Disable all entries by default. */
306 for (i = 0; i < ARRAY_SIZE(ctx->h264_dec.dpb); i++)
307 ctx->h264_dec.dpb[i].flags = 0;
308
309 /* Try to match new DPB entries with existing ones by their POCs. */
310 for (i = 0; i < ARRAY_SIZE(dec_param->dpb); i++) {
311 const struct v4l2_h264_dpb_entry *ndpb = &dec_param->dpb[i];
312
313 if (!(ndpb->flags & V4L2_H264_DPB_ENTRY_FLAG_VALID))
314 continue;
315
316 /*
317 * To cut off some comparisons, iterate only on target DPB
318 * entries which are not used yet.
319 */
320 for_each_clear_bit(j, used, ARRAY_SIZE(ctx->h264_dec.dpb)) {
321 struct v4l2_h264_dpb_entry *cdpb;
322
323 cdpb = &ctx->h264_dec.dpb[j];
324 if (!dpb_entry_match(cdpb, ndpb))
325 continue;
326
327 *cdpb = *ndpb;
328 set_bit(j, used);
329 break;
330 }
331
332 if (j == ARRAY_SIZE(ctx->h264_dec.dpb))
333 set_bit(i, new);
334 }
335
336 /* For entries that could not be matched, use remaining free slots. */
337 for_each_set_bit(i, new, ARRAY_SIZE(dec_param->dpb)) {
338 const struct v4l2_h264_dpb_entry *ndpb = &dec_param->dpb[i];
339 struct v4l2_h264_dpb_entry *cdpb;
340
341 /*
342 * Both arrays are of the same sizes, so there is no way
343 * we can end up with no space in target array, unless
344 * something is buggy.
345 */
346 j = find_first_zero_bit(used, ARRAY_SIZE(ctx->h264_dec.dpb));
347 if (WARN_ON(j >= ARRAY_SIZE(ctx->h264_dec.dpb)))
348 return;
349
350 cdpb = &ctx->h264_dec.dpb[j];
351 *cdpb = *ndpb;
352 set_bit(j, used);
353 }
354 }
355
hantro_h264_get_ref_buf(struct hantro_ctx * ctx,unsigned int dpb_idx)356 dma_addr_t hantro_h264_get_ref_buf(struct hantro_ctx *ctx,
357 unsigned int dpb_idx)
358 {
359 struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb;
360 dma_addr_t dma_addr = 0;
361 s32 cur_poc = ctx->h264_dec.cur_poc;
362 u32 flags;
363
364 if (dpb[dpb_idx].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)
365 dma_addr = hantro_get_ref(ctx, dpb[dpb_idx].reference_ts);
366
367 if (!dma_addr) {
368 struct vb2_v4l2_buffer *dst_buf;
369 struct vb2_buffer *buf;
370
371 /*
372 * If a DPB entry is unused or invalid, address of current
373 * destination buffer is returned.
374 */
375 dst_buf = hantro_get_dst_buf(ctx);
376 buf = &dst_buf->vb2_buf;
377 dma_addr = hantro_get_dec_buf_addr(ctx, buf);
378 }
379
380 flags = dpb[dpb_idx].flags & V4L2_H264_DPB_ENTRY_FLAG_FIELD ? 0x2 : 0;
381 flags |= abs(dpb[dpb_idx].top_field_order_cnt - cur_poc) <
382 abs(dpb[dpb_idx].bottom_field_order_cnt - cur_poc) ?
383 0x1 : 0;
384
385 return dma_addr | flags;
386 }
387
hantro_h264_get_ref_nbr(struct hantro_ctx * ctx,unsigned int dpb_idx)388 u16 hantro_h264_get_ref_nbr(struct hantro_ctx *ctx, unsigned int dpb_idx)
389 {
390 const struct v4l2_h264_dpb_entry *dpb = &ctx->h264_dec.dpb[dpb_idx];
391
392 if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
393 return 0;
394 return dpb->frame_num;
395 }
396
397 /*
398 * Removes all references with the same parity as the current picture from the
399 * reference list. The remaining list will have references with the opposite
400 * parity. This is effectively a deduplication of references since each buffer
401 * stores two fields. For this reason, each buffer is found twice in the
402 * reference list.
403 *
404 * This technique has been chosen through trial and error. This simple approach
405 * resulted in the highest conformance score. Note that this method may suffer
406 * worse quality in the case an opposite reference frame has been lost. If this
407 * becomes a problem in the future, it should be possible to add a preprocessing
408 * to identify un-paired fields and avoid removing them.
409 */
deduplicate_reflist(struct v4l2_h264_reflist_builder * b,struct v4l2_h264_reference * reflist)410 static void deduplicate_reflist(struct v4l2_h264_reflist_builder *b,
411 struct v4l2_h264_reference *reflist)
412 {
413 int write_idx = 0;
414 int i;
415
416 if (b->cur_pic_fields == V4L2_H264_FRAME_REF) {
417 write_idx = b->num_valid;
418 goto done;
419 }
420
421 for (i = 0; i < b->num_valid; i++) {
422 if (!(b->cur_pic_fields == reflist[i].fields)) {
423 reflist[write_idx++] = reflist[i];
424 continue;
425 }
426 }
427
428 done:
429 /* Should not happen unless we have a bug in the reflist builder. */
430 if (WARN_ON(write_idx > 16))
431 write_idx = 16;
432
433 /* Clear the remaining, some streams fails otherwise */
434 for (; write_idx < 16; write_idx++)
435 reflist[write_idx].index = 15;
436 }
437
hantro_h264_dec_prepare_run(struct hantro_ctx * ctx)438 int hantro_h264_dec_prepare_run(struct hantro_ctx *ctx)
439 {
440 struct hantro_h264_dec_hw_ctx *h264_ctx = &ctx->h264_dec;
441 struct hantro_h264_dec_ctrls *ctrls = &h264_ctx->ctrls;
442 struct v4l2_h264_reflist_builder reflist_builder;
443
444 hantro_start_prepare_run(ctx);
445
446 ctrls->scaling =
447 hantro_get_ctrl(ctx, V4L2_CID_STATELESS_H264_SCALING_MATRIX);
448 if (WARN_ON(!ctrls->scaling))
449 return -EINVAL;
450
451 ctrls->decode =
452 hantro_get_ctrl(ctx, V4L2_CID_STATELESS_H264_DECODE_PARAMS);
453 if (WARN_ON(!ctrls->decode))
454 return -EINVAL;
455
456 ctrls->sps =
457 hantro_get_ctrl(ctx, V4L2_CID_STATELESS_H264_SPS);
458 if (WARN_ON(!ctrls->sps))
459 return -EINVAL;
460
461 ctrls->pps =
462 hantro_get_ctrl(ctx, V4L2_CID_STATELESS_H264_PPS);
463 if (WARN_ON(!ctrls->pps))
464 return -EINVAL;
465
466 /* Update the DPB with new refs. */
467 update_dpb(ctx);
468
469 /* Build the P/B{0,1} ref lists. */
470 v4l2_h264_init_reflist_builder(&reflist_builder, ctrls->decode,
471 ctrls->sps, ctx->h264_dec.dpb);
472 h264_ctx->cur_poc = reflist_builder.cur_pic_order_count;
473
474 /* Prepare data in memory. */
475 prepare_table(ctx);
476
477 v4l2_h264_build_p_ref_list(&reflist_builder, h264_ctx->reflists.p);
478 v4l2_h264_build_b_ref_lists(&reflist_builder, h264_ctx->reflists.b0,
479 h264_ctx->reflists.b1);
480
481 /*
482 * Reduce ref lists to at most 16 entries, Hantro hardware will deduce
483 * the actual picture lists in field through the dpb_valid,
484 * dpb_longterm bitmap along with the current frame parity.
485 */
486 if (reflist_builder.cur_pic_fields != V4L2_H264_FRAME_REF) {
487 deduplicate_reflist(&reflist_builder, h264_ctx->reflists.p);
488 deduplicate_reflist(&reflist_builder, h264_ctx->reflists.b0);
489 deduplicate_reflist(&reflist_builder, h264_ctx->reflists.b1);
490 }
491
492 return 0;
493 }
494
hantro_h264_dec_exit(struct hantro_ctx * ctx)495 void hantro_h264_dec_exit(struct hantro_ctx *ctx)
496 {
497 struct hantro_dev *vpu = ctx->dev;
498 struct hantro_h264_dec_hw_ctx *h264_dec = &ctx->h264_dec;
499 struct hantro_aux_buf *priv = &h264_dec->priv;
500
501 dma_free_coherent(vpu->dev, priv->size, priv->cpu, priv->dma);
502 }
503
hantro_h264_dec_init(struct hantro_ctx * ctx)504 int hantro_h264_dec_init(struct hantro_ctx *ctx)
505 {
506 struct hantro_dev *vpu = ctx->dev;
507 struct hantro_h264_dec_hw_ctx *h264_dec = &ctx->h264_dec;
508 struct hantro_aux_buf *priv = &h264_dec->priv;
509 struct hantro_h264_dec_priv_tbl *tbl;
510
511 priv->cpu = dma_alloc_coherent(vpu->dev, sizeof(*tbl), &priv->dma,
512 GFP_KERNEL);
513 if (!priv->cpu)
514 return -ENOMEM;
515
516 priv->size = sizeof(*tbl);
517 tbl = priv->cpu;
518 memcpy(tbl->cabac_table, h264_cabac_table, sizeof(tbl->cabac_table));
519
520 return 0;
521 }
522