1a336c01fSEric Biggers/* SPDX-License-Identifier: GPL-2.0-only */ 2a336c01fSEric Biggers/* 3a336c01fSEric Biggers * Accelerated GHASH implementation with ARMv8 ASIMD instructions. 4a336c01fSEric Biggers * 5a336c01fSEric Biggers * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org> 6a336c01fSEric Biggers */ 7a336c01fSEric Biggers 8a336c01fSEric Biggers#include <linux/linkage.h> 9a336c01fSEric Biggers#include <asm/assembler.h> 10a336c01fSEric Biggers 11a336c01fSEric Biggers SHASH .req v0 12a336c01fSEric Biggers SHASH2 .req v1 13a336c01fSEric Biggers T1 .req v2 14a336c01fSEric Biggers T2 .req v3 15a336c01fSEric Biggers XM .req v5 16a336c01fSEric Biggers XL .req v6 17a336c01fSEric Biggers XH .req v7 18a336c01fSEric Biggers IN1 .req v7 19a336c01fSEric Biggers 20a336c01fSEric Biggers k00_16 .req v8 21a336c01fSEric Biggers k32_48 .req v9 22a336c01fSEric Biggers 23a336c01fSEric Biggers t3 .req v10 24a336c01fSEric Biggers t4 .req v11 25a336c01fSEric Biggers t5 .req v12 26a336c01fSEric Biggers t6 .req v13 27a336c01fSEric Biggers t7 .req v14 28a336c01fSEric Biggers t8 .req v15 29a336c01fSEric Biggers t9 .req v16 30a336c01fSEric Biggers 31a336c01fSEric Biggers perm1 .req v17 32a336c01fSEric Biggers perm2 .req v18 33a336c01fSEric Biggers perm3 .req v19 34a336c01fSEric Biggers 35a336c01fSEric Biggers sh1 .req v20 36a336c01fSEric Biggers sh2 .req v21 37a336c01fSEric Biggers sh3 .req v22 38a336c01fSEric Biggers sh4 .req v23 39a336c01fSEric Biggers 40a336c01fSEric Biggers ss1 .req v24 41a336c01fSEric Biggers ss2 .req v25 42a336c01fSEric Biggers ss3 .req v26 43a336c01fSEric Biggers ss4 .req v27 44a336c01fSEric Biggers 45a336c01fSEric Biggers .text 46a336c01fSEric Biggers 47a336c01fSEric Biggers .macro __pmull_p8, rq, ad, bd 48a336c01fSEric Biggers ext t3.8b, \ad\().8b, \ad\().8b, #1 // A1 49a336c01fSEric Biggers ext t5.8b, \ad\().8b, \ad\().8b, #2 // A2 50a336c01fSEric Biggers ext t7.8b, \ad\().8b, \ad\().8b, #3 // A3 51a336c01fSEric Biggers 52a336c01fSEric Biggers __pmull_p8_\bd \rq, \ad 53a336c01fSEric Biggers .endm 54a336c01fSEric Biggers 55a336c01fSEric Biggers .macro __pmull2_p8, rq, ad, bd 56a336c01fSEric Biggers tbl t3.16b, {\ad\().16b}, perm1.16b // A1 57a336c01fSEric Biggers tbl t5.16b, {\ad\().16b}, perm2.16b // A2 58a336c01fSEric Biggers tbl t7.16b, {\ad\().16b}, perm3.16b // A3 59a336c01fSEric Biggers 60a336c01fSEric Biggers __pmull2_p8_\bd \rq, \ad 61a336c01fSEric Biggers .endm 62a336c01fSEric Biggers 63a336c01fSEric Biggers .macro __pmull_p8_SHASH, rq, ad 64a336c01fSEric Biggers __pmull_p8_tail \rq, \ad\().8b, SHASH.8b, 8b,, sh1, sh2, sh3, sh4 65a336c01fSEric Biggers .endm 66a336c01fSEric Biggers 67a336c01fSEric Biggers .macro __pmull_p8_SHASH2, rq, ad 68a336c01fSEric Biggers __pmull_p8_tail \rq, \ad\().8b, SHASH2.8b, 8b,, ss1, ss2, ss3, ss4 69a336c01fSEric Biggers .endm 70a336c01fSEric Biggers 71a336c01fSEric Biggers .macro __pmull2_p8_SHASH, rq, ad 72a336c01fSEric Biggers __pmull_p8_tail \rq, \ad\().16b, SHASH.16b, 16b, 2, sh1, sh2, sh3, sh4 73a336c01fSEric Biggers .endm 74a336c01fSEric Biggers 75a336c01fSEric Biggers .macro __pmull_p8_tail, rq, ad, bd, nb, t, b1, b2, b3, b4 76a336c01fSEric Biggers pmull\t t3.8h, t3.\nb, \bd // F = A1*B 77a336c01fSEric Biggers pmull\t t4.8h, \ad, \b1\().\nb // E = A*B1 78a336c01fSEric Biggers pmull\t t5.8h, t5.\nb, \bd // H = A2*B 79a336c01fSEric Biggers pmull\t t6.8h, \ad, \b2\().\nb // G = A*B2 80a336c01fSEric Biggers pmull\t t7.8h, t7.\nb, \bd // J = A3*B 81a336c01fSEric Biggers pmull\t t8.8h, \ad, \b3\().\nb // I = A*B3 82a336c01fSEric Biggers pmull\t t9.8h, \ad, \b4\().\nb // K = A*B4 83a336c01fSEric Biggers pmull\t \rq\().8h, \ad, \bd // D = A*B 84a336c01fSEric Biggers 85a336c01fSEric Biggers eor t3.16b, t3.16b, t4.16b // L = E + F 86a336c01fSEric Biggers eor t5.16b, t5.16b, t6.16b // M = G + H 87a336c01fSEric Biggers eor t7.16b, t7.16b, t8.16b // N = I + J 88a336c01fSEric Biggers 89a336c01fSEric Biggers uzp1 t4.2d, t3.2d, t5.2d 90a336c01fSEric Biggers uzp2 t3.2d, t3.2d, t5.2d 91a336c01fSEric Biggers uzp1 t6.2d, t7.2d, t9.2d 92a336c01fSEric Biggers uzp2 t7.2d, t7.2d, t9.2d 93a336c01fSEric Biggers 94a336c01fSEric Biggers // t3 = (L) (P0 + P1) << 8 95a336c01fSEric Biggers // t5 = (M) (P2 + P3) << 16 96a336c01fSEric Biggers eor t4.16b, t4.16b, t3.16b 97a336c01fSEric Biggers and t3.16b, t3.16b, k32_48.16b 98a336c01fSEric Biggers 99a336c01fSEric Biggers // t7 = (N) (P4 + P5) << 24 100a336c01fSEric Biggers // t9 = (K) (P6 + P7) << 32 101a336c01fSEric Biggers eor t6.16b, t6.16b, t7.16b 102a336c01fSEric Biggers and t7.16b, t7.16b, k00_16.16b 103a336c01fSEric Biggers 104a336c01fSEric Biggers eor t4.16b, t4.16b, t3.16b 105a336c01fSEric Biggers eor t6.16b, t6.16b, t7.16b 106a336c01fSEric Biggers 107a336c01fSEric Biggers zip2 t5.2d, t4.2d, t3.2d 108a336c01fSEric Biggers zip1 t3.2d, t4.2d, t3.2d 109a336c01fSEric Biggers zip2 t9.2d, t6.2d, t7.2d 110a336c01fSEric Biggers zip1 t7.2d, t6.2d, t7.2d 111a336c01fSEric Biggers 112a336c01fSEric Biggers ext t3.16b, t3.16b, t3.16b, #15 113a336c01fSEric Biggers ext t5.16b, t5.16b, t5.16b, #14 114a336c01fSEric Biggers ext t7.16b, t7.16b, t7.16b, #13 115a336c01fSEric Biggers ext t9.16b, t9.16b, t9.16b, #12 116a336c01fSEric Biggers 117a336c01fSEric Biggers eor t3.16b, t3.16b, t5.16b 118a336c01fSEric Biggers eor t7.16b, t7.16b, t9.16b 119a336c01fSEric Biggers eor \rq\().16b, \rq\().16b, t3.16b 120a336c01fSEric Biggers eor \rq\().16b, \rq\().16b, t7.16b 121a336c01fSEric Biggers .endm 122a336c01fSEric Biggers 123a336c01fSEric Biggers .macro __pmull_pre_p8 124a336c01fSEric Biggers ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 125a336c01fSEric Biggers eor SHASH2.16b, SHASH2.16b, SHASH.16b 126a336c01fSEric Biggers 127a336c01fSEric Biggers // k00_16 := 0x0000000000000000_000000000000ffff 128a336c01fSEric Biggers // k32_48 := 0x00000000ffffffff_0000ffffffffffff 129a336c01fSEric Biggers movi k32_48.2d, #0xffffffff 130a336c01fSEric Biggers mov k32_48.h[2], k32_48.h[0] 131a336c01fSEric Biggers ushr k00_16.2d, k32_48.2d, #32 132a336c01fSEric Biggers 133a336c01fSEric Biggers // prepare the permutation vectors 134a336c01fSEric Biggers mov_q x5, 0x080f0e0d0c0b0a09 135a336c01fSEric Biggers movi T1.8b, #8 136a336c01fSEric Biggers dup perm1.2d, x5 137a336c01fSEric Biggers eor perm1.16b, perm1.16b, T1.16b 138a336c01fSEric Biggers ushr perm2.2d, perm1.2d, #8 139a336c01fSEric Biggers ushr perm3.2d, perm1.2d, #16 140a336c01fSEric Biggers ushr T1.2d, perm1.2d, #24 141a336c01fSEric Biggers sli perm2.2d, perm1.2d, #56 142a336c01fSEric Biggers sli perm3.2d, perm1.2d, #48 143a336c01fSEric Biggers sli T1.2d, perm1.2d, #40 144a336c01fSEric Biggers 145a336c01fSEric Biggers // precompute loop invariants 146a336c01fSEric Biggers tbl sh1.16b, {SHASH.16b}, perm1.16b 147a336c01fSEric Biggers tbl sh2.16b, {SHASH.16b}, perm2.16b 148a336c01fSEric Biggers tbl sh3.16b, {SHASH.16b}, perm3.16b 149a336c01fSEric Biggers tbl sh4.16b, {SHASH.16b}, T1.16b 150a336c01fSEric Biggers ext ss1.8b, SHASH2.8b, SHASH2.8b, #1 151a336c01fSEric Biggers ext ss2.8b, SHASH2.8b, SHASH2.8b, #2 152a336c01fSEric Biggers ext ss3.8b, SHASH2.8b, SHASH2.8b, #3 153a336c01fSEric Biggers ext ss4.8b, SHASH2.8b, SHASH2.8b, #4 154a336c01fSEric Biggers .endm 155a336c01fSEric Biggers 156a336c01fSEric Biggers .macro __pmull_reduce_p8 157a336c01fSEric Biggers eor XM.16b, XM.16b, T1.16b 158a336c01fSEric Biggers 159a336c01fSEric Biggers mov XL.d[1], XM.d[0] 160a336c01fSEric Biggers mov XH.d[0], XM.d[1] 161a336c01fSEric Biggers 162a336c01fSEric Biggers shl T1.2d, XL.2d, #57 163a336c01fSEric Biggers shl T2.2d, XL.2d, #62 164a336c01fSEric Biggers eor T2.16b, T2.16b, T1.16b 165a336c01fSEric Biggers shl T1.2d, XL.2d, #63 166a336c01fSEric Biggers eor T2.16b, T2.16b, T1.16b 167a336c01fSEric Biggers ext T1.16b, XL.16b, XH.16b, #8 168a336c01fSEric Biggers eor T2.16b, T2.16b, T1.16b 169a336c01fSEric Biggers 170a336c01fSEric Biggers mov XL.d[1], T2.d[0] 171a336c01fSEric Biggers mov XH.d[0], T2.d[1] 172a336c01fSEric Biggers 173a336c01fSEric Biggers ushr T2.2d, XL.2d, #1 174a336c01fSEric Biggers eor XH.16b, XH.16b, XL.16b 175a336c01fSEric Biggers eor XL.16b, XL.16b, T2.16b 176a336c01fSEric Biggers ushr T2.2d, T2.2d, #6 177a336c01fSEric Biggers ushr XL.2d, XL.2d, #1 178a336c01fSEric Biggers .endm 179a336c01fSEric Biggers 180a336c01fSEric Biggers /* 181a336c01fSEric Biggers * void pmull_ghash_update_p8(size_t blocks, struct polyval_elem *dg, 182a336c01fSEric Biggers * const u8 *src, 183a336c01fSEric Biggers * const struct polyval_elem *h) 184a336c01fSEric Biggers */ 185a336c01fSEric BiggersSYM_FUNC_START(pmull_ghash_update_p8) 186a336c01fSEric Biggers ld1 {SHASH.2d}, [x3] 187a336c01fSEric Biggers ld1 {XL.2d}, [x1] 188a336c01fSEric Biggers 189a336c01fSEric Biggers __pmull_pre_p8 190a336c01fSEric Biggers 191a336c01fSEric Biggers0: ld1 {T1.2d}, [x2], #16 192a336c01fSEric Biggers sub x0, x0, #1 193a336c01fSEric Biggers 194a336c01fSEric Biggers /* multiply XL by SHASH in GF(2^128) */ 195*12b11e47SEric Biggers rev64 T1.16b, T1.16b 196a336c01fSEric Biggers 197a336c01fSEric Biggers ext T2.16b, XL.16b, XL.16b, #8 198a336c01fSEric Biggers ext IN1.16b, T1.16b, T1.16b, #8 199a336c01fSEric Biggers eor T1.16b, T1.16b, T2.16b 200a336c01fSEric Biggers eor XL.16b, XL.16b, IN1.16b 201a336c01fSEric Biggers 202a336c01fSEric Biggers __pmull2_p8 XH, XL, SHASH // a1 * b1 203a336c01fSEric Biggers eor T1.16b, T1.16b, XL.16b 204a336c01fSEric Biggers __pmull_p8 XL, XL, SHASH // a0 * b0 205a336c01fSEric Biggers __pmull_p8 XM, T1, SHASH2 // (a1 + a0)(b1 + b0) 206a336c01fSEric Biggers 207a336c01fSEric Biggers eor T2.16b, XL.16b, XH.16b 208a336c01fSEric Biggers ext T1.16b, XL.16b, XH.16b, #8 209a336c01fSEric Biggers eor XM.16b, XM.16b, T2.16b 210a336c01fSEric Biggers 211a336c01fSEric Biggers __pmull_reduce_p8 212a336c01fSEric Biggers 213a336c01fSEric Biggers eor T2.16b, T2.16b, XH.16b 214a336c01fSEric Biggers eor XL.16b, XL.16b, T2.16b 215a336c01fSEric Biggers 216a336c01fSEric Biggers cbnz x0, 0b 217a336c01fSEric Biggers 218a336c01fSEric Biggers st1 {XL.2d}, [x1] 219a336c01fSEric Biggers ret 220a336c01fSEric BiggersSYM_FUNC_END(pmull_ghash_update_p8) 221