1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * NH - ε-almost-universal hash function, ARM64 NEON accelerated version 4 * 5 * Copyright 2018 Google LLC 6 * 7 * Author: Eric Biggers <ebiggers@google.com> 8 */ 9 10#include <linux/linkage.h> 11#include <linux/cfi_types.h> 12 13 KEY .req x0 14 MESSAGE .req x1 15 MESSAGE_LEN .req x2 16 HASH .req x3 17 18 PASS0_SUMS .req v0 19 PASS1_SUMS .req v1 20 PASS2_SUMS .req v2 21 PASS3_SUMS .req v3 22 K0 .req v4 23 K1 .req v5 24 K2 .req v6 25 K3 .req v7 26 T0 .req v8 27 T1 .req v9 28 T2 .req v10 29 T3 .req v11 30 T4 .req v12 31 T5 .req v13 32 T6 .req v14 33 T7 .req v15 34 35.macro _nh_stride k0, k1, k2, k3 36 37 // Load next message stride 38 ld1 {T3.16b}, [MESSAGE], #16 39 40 // Load next key stride 41 ld1 {\k3\().4s}, [KEY], #16 42 43 // Add message words to key words 44 add T0.4s, T3.4s, \k0\().4s 45 add T1.4s, T3.4s, \k1\().4s 46 add T2.4s, T3.4s, \k2\().4s 47 add T3.4s, T3.4s, \k3\().4s 48 49 // Multiply 32x32 => 64 and accumulate 50 mov T4.d[0], T0.d[1] 51 mov T5.d[0], T1.d[1] 52 mov T6.d[0], T2.d[1] 53 mov T7.d[0], T3.d[1] 54 umlal PASS0_SUMS.2d, T0.2s, T4.2s 55 umlal PASS1_SUMS.2d, T1.2s, T5.2s 56 umlal PASS2_SUMS.2d, T2.2s, T6.2s 57 umlal PASS3_SUMS.2d, T3.2s, T7.2s 58.endm 59 60/* 61 * void nh_neon(const u32 *key, const u8 *message, size_t message_len, 62 * __le64 hash[NH_NUM_PASSES]) 63 * 64 * It's guaranteed that message_len % 16 == 0. 65 */ 66SYM_TYPED_FUNC_START(nh_neon) 67 68 ld1 {K0.4s,K1.4s}, [KEY], #32 69 movi PASS0_SUMS.2d, #0 70 movi PASS1_SUMS.2d, #0 71 ld1 {K2.4s}, [KEY], #16 72 movi PASS2_SUMS.2d, #0 73 movi PASS3_SUMS.2d, #0 74 75 subs MESSAGE_LEN, MESSAGE_LEN, #64 76 blt .Lloop4_done 77.Lloop4: 78 _nh_stride K0, K1, K2, K3 79 _nh_stride K1, K2, K3, K0 80 _nh_stride K2, K3, K0, K1 81 _nh_stride K3, K0, K1, K2 82 subs MESSAGE_LEN, MESSAGE_LEN, #64 83 bge .Lloop4 84 85.Lloop4_done: 86 ands MESSAGE_LEN, MESSAGE_LEN, #63 87 beq .Ldone 88 _nh_stride K0, K1, K2, K3 89 90 subs MESSAGE_LEN, MESSAGE_LEN, #16 91 beq .Ldone 92 _nh_stride K1, K2, K3, K0 93 94 subs MESSAGE_LEN, MESSAGE_LEN, #16 95 beq .Ldone 96 _nh_stride K2, K3, K0, K1 97 98.Ldone: 99 // Sum the accumulators for each pass, then store the sums to 'hash' 100 addp T0.2d, PASS0_SUMS.2d, PASS1_SUMS.2d 101 addp T1.2d, PASS2_SUMS.2d, PASS3_SUMS.2d 102 st1 {T0.16b,T1.16b}, [HASH] 103 ret 104SYM_FUNC_END(nh_neon) 105