1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * Accelerated CRC32(C) using AArch64 CRC and PMULL instructions 4 * 5 * Copyright (C) 2016 - 2018 Linaro Ltd. 6 * Copyright (C) 2024 Google LLC 7 * 8 * Author: Ard Biesheuvel <ardb@kernel.org> 9 */ 10 11#include <linux/linkage.h> 12#include <asm/assembler.h> 13 14 .cpu generic+crc+crypto 15 16 .macro bitle, reg 17 .endm 18 19 .macro bitbe, reg 20 rbit \reg, \reg 21 .endm 22 23 .macro bytele, reg 24 .endm 25 26 .macro bytebe, reg 27 rbit \reg, \reg 28 lsr \reg, \reg, #24 29 .endm 30 31 .macro hwordle, reg 32CPU_BE( rev16 \reg, \reg ) 33 .endm 34 35 .macro hwordbe, reg 36CPU_LE( rev \reg, \reg ) 37 rbit \reg, \reg 38CPU_BE( lsr \reg, \reg, #16 ) 39 .endm 40 41 .macro le, regs:vararg 42 .irp r, \regs 43CPU_BE( rev \r, \r ) 44 .endr 45 .endm 46 47 .macro be, regs:vararg 48 .irp r, \regs 49CPU_LE( rev \r, \r ) 50 .endr 51 .irp r, \regs 52 rbit \r, \r 53 .endr 54 .endm 55 56 .macro __crc32, c, order=le 57 bit\order w0 58 cmp x2, #16 59 b.lt 8f // less than 16 bytes 60 61 and x7, x2, #0x1f 62 and x2, x2, #~0x1f 63 cbz x7, 32f // multiple of 32 bytes 64 65 and x8, x7, #0xf 66 ldp x3, x4, [x1] 67 add x8, x8, x1 68 add x1, x1, x7 69 ldp x5, x6, [x8] 70 \order x3, x4, x5, x6 71 72 tst x7, #8 73 crc32\c\()x w8, w0, x3 74 csel x3, x3, x4, eq 75 csel w0, w0, w8, eq 76 tst x7, #4 77 lsr x4, x3, #32 78 crc32\c\()w w8, w0, w3 79 csel x3, x3, x4, eq 80 csel w0, w0, w8, eq 81 tst x7, #2 82 lsr w4, w3, #16 83 crc32\c\()h w8, w0, w3 84 csel w3, w3, w4, eq 85 csel w0, w0, w8, eq 86 tst x7, #1 87 crc32\c\()b w8, w0, w3 88 csel w0, w0, w8, eq 89 tst x7, #16 90 crc32\c\()x w8, w0, x5 91 crc32\c\()x w8, w8, x6 92 csel w0, w0, w8, eq 93 cbz x2, 0f 94 9532: ldp x3, x4, [x1], #32 96 sub x2, x2, #32 97 ldp x5, x6, [x1, #-16] 98 \order x3, x4, x5, x6 99 crc32\c\()x w0, w0, x3 100 crc32\c\()x w0, w0, x4 101 crc32\c\()x w0, w0, x5 102 crc32\c\()x w0, w0, x6 103 cbnz x2, 32b 1040: bit\order w0 105 ret 106 1078: tbz x2, #3, 4f 108 ldr x3, [x1], #8 109 \order x3 110 crc32\c\()x w0, w0, x3 1114: tbz x2, #2, 2f 112 ldr w3, [x1], #4 113 \order w3 114 crc32\c\()w w0, w0, w3 1152: tbz x2, #1, 1f 116 ldrh w3, [x1], #2 117 hword\order w3 118 crc32\c\()h w0, w0, w3 1191: tbz x2, #0, 0f 120 ldrb w3, [x1] 121 byte\order w3 122 crc32\c\()b w0, w0, w3 1230: bit\order w0 124 ret 125 .endm 126 127 .align 5 128SYM_FUNC_START(crc32_le_arm64) 129 __crc32 130SYM_FUNC_END(crc32_le_arm64) 131 132 .align 5 133SYM_FUNC_START(crc32c_le_arm64) 134 __crc32 c 135SYM_FUNC_END(crc32c_le_arm64) 136 137 .align 5 138SYM_FUNC_START(crc32_be_arm64) 139 __crc32 order=be 140SYM_FUNC_END(crc32_be_arm64) 141 142 in .req x1 143 len .req x2 144 145 /* 146 * w0: input CRC at entry, output CRC at exit 147 * x1: pointer to input buffer 148 * x2: length of input in bytes 149 */ 150 .macro crc4way, insn, table, order=le 151 bit\order w0 152 lsr len, len, #6 // len := # of 64-byte blocks 153 154 /* Process up to 64 blocks of 64 bytes at a time */ 155.La\@: mov x3, #64 156 cmp len, #64 157 csel x3, x3, len, hi // x3 := min(len, 64) 158 sub len, len, x3 159 160 /* Divide the input into 4 contiguous blocks */ 161 add x4, x3, x3, lsl #1 // x4 := 3 * x3 162 add x7, in, x3, lsl #4 // x7 := in + 16 * x3 163 add x8, in, x3, lsl #5 // x8 := in + 32 * x3 164 add x9, in, x4, lsl #4 // x9 := in + 16 * x4 165 166 /* Load the folding coefficients from the lookup table */ 167 adr_l x5, \table - 12 // entry 0 omitted 168 add x5, x5, x4, lsl #2 // x5 += 12 * x3 169 ldp s0, s1, [x5] 170 ldr s2, [x5, #8] 171 172 /* Zero init partial CRCs for this iteration */ 173 mov w4, wzr 174 mov w5, wzr 175 mov w6, wzr 176 mov x17, xzr 177 178.Lb\@: sub x3, x3, #1 179 \insn w6, w6, x17 180 ldp x10, x11, [in], #16 181 ldp x12, x13, [x7], #16 182 ldp x14, x15, [x8], #16 183 ldp x16, x17, [x9], #16 184 185 \order x10, x11, x12, x13, x14, x15, x16, x17 186 187 /* Apply the CRC transform to 4 16-byte blocks in parallel */ 188 \insn w0, w0, x10 189 \insn w4, w4, x12 190 \insn w5, w5, x14 191 \insn w6, w6, x16 192 \insn w0, w0, x11 193 \insn w4, w4, x13 194 \insn w5, w5, x15 195 cbnz x3, .Lb\@ 196 197 /* Combine the 4 partial results into w0 */ 198 mov v3.d[0], x0 199 mov v4.d[0], x4 200 mov v5.d[0], x5 201 pmull v0.1q, v0.1d, v3.1d 202 pmull v1.1q, v1.1d, v4.1d 203 pmull v2.1q, v2.1d, v5.1d 204 eor v0.8b, v0.8b, v1.8b 205 eor v0.8b, v0.8b, v2.8b 206 mov x5, v0.d[0] 207 eor x5, x5, x17 208 \insn w0, w6, x5 209 210 mov in, x9 211 cbnz len, .La\@ 212 213 bit\order w0 214 ret 215 .endm 216 217 .align 5 218SYM_FUNC_START(crc32c_le_arm64_4way) 219 crc4way crc32cx, .L0 220SYM_FUNC_END(crc32c_le_arm64_4way) 221 222 .align 5 223SYM_FUNC_START(crc32_le_arm64_4way) 224 crc4way crc32x, .L1 225SYM_FUNC_END(crc32_le_arm64_4way) 226 227 .align 5 228SYM_FUNC_START(crc32_be_arm64_4way) 229 crc4way crc32x, .L1, be 230SYM_FUNC_END(crc32_be_arm64_4way) 231 232 .section .rodata, "a", %progbits 233 .align 6 234.L0: .long 0xddc0152b, 0xba4fc28e, 0x493c7d27 235 .long 0x0715ce53, 0x9e4addf8, 0xba4fc28e 236 .long 0xc96cfdc0, 0x0715ce53, 0xddc0152b 237 .long 0xab7aff2a, 0x0d3b6092, 0x9e4addf8 238 .long 0x299847d5, 0x878a92a7, 0x39d3b296 239 .long 0xb6dd949b, 0xab7aff2a, 0x0715ce53 240 .long 0xa60ce07b, 0x83348832, 0x47db8317 241 .long 0xd270f1a2, 0xb9e02b86, 0x0d3b6092 242 .long 0x65863b64, 0xb6dd949b, 0xc96cfdc0 243 .long 0xb3e32c28, 0xbac2fd7b, 0x878a92a7 244 .long 0xf285651c, 0xce7f39f4, 0xdaece73e 245 .long 0x271d9844, 0xd270f1a2, 0xab7aff2a 246 .long 0x6cb08e5c, 0x2b3cac5d, 0x2162d385 247 .long 0xcec3662e, 0x1b03397f, 0x83348832 248 .long 0x8227bb8a, 0xb3e32c28, 0x299847d5 249 .long 0xd7a4825c, 0xdd7e3b0c, 0xb9e02b86 250 .long 0xf6076544, 0x10746f3c, 0x18b33a4e 251 .long 0x98d8d9cb, 0x271d9844, 0xb6dd949b 252 .long 0x57a3d037, 0x93a5f730, 0x78d9ccb7 253 .long 0x3771e98f, 0x6b749fb2, 0xbac2fd7b 254 .long 0xe0ac139e, 0xcec3662e, 0xa60ce07b 255 .long 0x6f345e45, 0xe6fc4e6a, 0xce7f39f4 256 .long 0xa2b73df1, 0xb0cd4768, 0x61d82e56 257 .long 0x86d8e4d2, 0xd7a4825c, 0xd270f1a2 258 .long 0xa90fd27a, 0x0167d312, 0xc619809d 259 .long 0xca6ef3ac, 0x26f6a60a, 0x2b3cac5d 260 .long 0x4597456a, 0x98d8d9cb, 0x65863b64 261 .long 0xc9c8b782, 0x68bce87a, 0x1b03397f 262 .long 0x62ec6c6d, 0x6956fc3b, 0xebb883bd 263 .long 0x2342001e, 0x3771e98f, 0xb3e32c28 264 .long 0xe8b6368b, 0x2178513a, 0x064f7f26 265 .long 0x9ef68d35, 0x170076fa, 0xdd7e3b0c 266 .long 0x0b0bf8ca, 0x6f345e45, 0xf285651c 267 .long 0x02ee03b2, 0xff0dba97, 0x10746f3c 268 .long 0x135c83fd, 0xf872e54c, 0xc7a68855 269 .long 0x00bcf5f6, 0x86d8e4d2, 0x271d9844 270 .long 0x58ca5f00, 0x5bb8f1bc, 0x8e766a0c 271 .long 0xded288f8, 0xb3af077a, 0x93a5f730 272 .long 0x37170390, 0xca6ef3ac, 0x6cb08e5c 273 .long 0xf48642e9, 0xdd66cbbb, 0x6b749fb2 274 .long 0xb25b29f2, 0xe9e28eb4, 0x1393e203 275 .long 0x45cddf4e, 0xc9c8b782, 0xcec3662e 276 .long 0xdfd94fb2, 0x93e106a4, 0x96c515bb 277 .long 0x021ac5ef, 0xd813b325, 0xe6fc4e6a 278 .long 0x8e1450f7, 0x2342001e, 0x8227bb8a 279 .long 0xe0cdcf86, 0x6d9a4957, 0xb0cd4768 280 .long 0x613eee91, 0xd2c3ed1a, 0x39c7ff35 281 .long 0xbedc6ba1, 0x9ef68d35, 0xd7a4825c 282 .long 0x0cd1526a, 0xf2271e60, 0x0ab3844b 283 .long 0xd6c3a807, 0x2664fd8b, 0x0167d312 284 .long 0x1d31175f, 0x02ee03b2, 0xf6076544 285 .long 0x4be7fd90, 0x363bd6b3, 0x26f6a60a 286 .long 0x6eeed1c9, 0x5fabe670, 0xa741c1bf 287 .long 0xb3a6da94, 0x00bcf5f6, 0x98d8d9cb 288 .long 0x2e7d11a7, 0x17f27698, 0x49c3cc9c 289 .long 0x889774e1, 0xaa7c7ad5, 0x68bce87a 290 .long 0x8a074012, 0xded288f8, 0x57a3d037 291 .long 0xbd0bb25f, 0x6d390dec, 0x6956fc3b 292 .long 0x3be3c09b, 0x6353c1cc, 0x42d98888 293 .long 0x465a4eee, 0xf48642e9, 0x3771e98f 294 .long 0x2e5f3c8c, 0xdd35bc8d, 0xb42ae3d9 295 .long 0xa52f58ec, 0x9a5ede41, 0x2178513a 296 .long 0x47972100, 0x45cddf4e, 0xe0ac139e 297 .long 0x359674f7, 0xa51b6135, 0x170076fa 298 299.L1: .long 0xaf449247, 0x81256527, 0xccaa009e 300 .long 0x57c54819, 0x1d9513d7, 0x81256527 301 .long 0x3f41287a, 0x57c54819, 0xaf449247 302 .long 0xf5e48c85, 0x910eeec1, 0x1d9513d7 303 .long 0x1f0c2cdd, 0x9026d5b1, 0xae0b5394 304 .long 0x71d54a59, 0xf5e48c85, 0x57c54819 305 .long 0x1c63267b, 0xfe807bbd, 0x0cbec0ed 306 .long 0xd31343ea, 0xe95c1271, 0x910eeec1 307 .long 0xf9d9c7ee, 0x71d54a59, 0x3f41287a 308 .long 0x9ee62949, 0xcec97417, 0x9026d5b1 309 .long 0xa55d1514, 0xf183c71b, 0xd1df2327 310 .long 0x21aa2b26, 0xd31343ea, 0xf5e48c85 311 .long 0x9d842b80, 0xeea395c4, 0x3c656ced 312 .long 0xd8110ff1, 0xcd669a40, 0xfe807bbd 313 .long 0x3f9e9356, 0x9ee62949, 0x1f0c2cdd 314 .long 0x1d6708a0, 0x0c30f51d, 0xe95c1271 315 .long 0xef82aa68, 0xdb3935ea, 0xb918a347 316 .long 0xd14bcc9b, 0x21aa2b26, 0x71d54a59 317 .long 0x99cce860, 0x356d209f, 0xff6f2fc2 318 .long 0xd8af8e46, 0xc352f6de, 0xcec97417 319 .long 0xf1996890, 0xd8110ff1, 0x1c63267b 320 .long 0x631bc508, 0xe95c7216, 0xf183c71b 321 .long 0x8511c306, 0x8e031a19, 0x9b9bdbd0 322 .long 0xdb3839f3, 0x1d6708a0, 0xd31343ea 323 .long 0x7a92fffb, 0xf7003835, 0x4470ac44 324 .long 0x6ce68f2a, 0x00eba0c8, 0xeea395c4 325 .long 0x4caaa263, 0xd14bcc9b, 0xf9d9c7ee 326 .long 0xb46f7cff, 0x9a1b53c8, 0xcd669a40 327 .long 0x60290934, 0x81b6f443, 0x6d40f445 328 .long 0x8e976a7d, 0xd8af8e46, 0x9ee62949 329 .long 0xdcf5088a, 0x9dbdc100, 0x145575d5 330 .long 0x1753ab84, 0xbbf2f6d6, 0x0c30f51d 331 .long 0x255b139e, 0x631bc508, 0xa55d1514 332 .long 0xd784eaa8, 0xce26786c, 0xdb3935ea 333 .long 0x6d2c864a, 0x8068c345, 0x2586d334 334 .long 0x02072e24, 0xdb3839f3, 0x21aa2b26 335 .long 0x06689b0a, 0x5efd72f5, 0xe0575528 336 .long 0x1e52f5ea, 0x4117915b, 0x356d209f 337 .long 0x1d3d1db6, 0x6ce68f2a, 0x9d842b80 338 .long 0x3796455c, 0xb8e0e4a8, 0xc352f6de 339 .long 0xdf3a4eb3, 0xc55a2330, 0xb84ffa9c 340 .long 0x28ae0976, 0xb46f7cff, 0xd8110ff1 341 .long 0x9764bc8d, 0xd7e7a22c, 0x712510f0 342 .long 0x13a13e18, 0x3e9a43cd, 0xe95c7216 343 .long 0xb8ee242e, 0x8e976a7d, 0x3f9e9356 344 .long 0x0c540e7b, 0x753c81ff, 0x8e031a19 345 .long 0x9924c781, 0xb9220208, 0x3edcde65 346 .long 0x3954de39, 0x1753ab84, 0x1d6708a0 347 .long 0xf32238b5, 0xbec81497, 0x9e70b943 348 .long 0xbbd2cd2c, 0x0925d861, 0xf7003835 349 .long 0xcc401304, 0xd784eaa8, 0xef82aa68 350 .long 0x4987e684, 0x6044fbb0, 0x00eba0c8 351 .long 0x3aa11427, 0x18fe3b4a, 0x87441142 352 .long 0x297aad60, 0x02072e24, 0xd14bcc9b 353 .long 0xf60c5e51, 0x6ef6f487, 0x5b7fdd0a 354 .long 0x632d78c5, 0x3fc33de4, 0x9a1b53c8 355 .long 0x25b8822a, 0x1e52f5ea, 0x99cce860 356 .long 0xd4fc84bc, 0x1af62fb8, 0x81b6f443 357 .long 0x5690aa32, 0xa91fdefb, 0x688a110e 358 .long 0x1357a093, 0x3796455c, 0xd8af8e46 359 .long 0x798fdd33, 0xaaa18a37, 0x357b9517 360 .long 0xc2815395, 0x54d42691, 0x9dbdc100 361 .long 0x21cfc0f7, 0x28ae0976, 0xf1996890 362 .long 0xa0decef3, 0x7b4aa8b7, 0xbbf2f6d6 363