1
2 #include <stdint.h>
3 #include <stdlib.h>
4 #include <string.h>
5
6 #include "crypto_stream_salsa20.h"
7 #include "private/common.h"
8 #include "private/sse2_64_32.h"
9 #include "utils.h"
10
11 #if defined(HAVE_AVX2INTRIN_H) && defined(HAVE_EMMINTRIN_H) && \
12 defined(HAVE_TMMINTRIN_H) && defined(HAVE_SMMINTRIN_H)
13
14 # ifdef __GNUC__
15 # pragma GCC target("sse2")
16 # pragma GCC target("ssse3")
17 # pragma GCC target("sse4.1")
18 # pragma GCC target("avx2")
19 # endif
20
21 #include <emmintrin.h>
22 #include <immintrin.h>
23 #include <smmintrin.h>
24 #include <tmmintrin.h>
25
26 # include "../stream_salsa20.h"
27 # include "salsa20_xmm6int-avx2.h"
28
29 # define ROUNDS 20
30
31 typedef struct salsa_ctx {
32 uint32_t input[16];
33 } salsa_ctx;
34
35 static const int TR[16] = {
36 0, 5, 10, 15, 12, 1, 6, 11, 8, 13, 2, 7, 4, 9, 14, 3
37 };
38
39 static void
salsa_keysetup(salsa_ctx * ctx,const uint8_t * k)40 salsa_keysetup(salsa_ctx *ctx, const uint8_t *k)
41 {
42 ctx->input[TR[1]] = LOAD32_LE(k + 0);
43 ctx->input[TR[2]] = LOAD32_LE(k + 4);
44 ctx->input[TR[3]] = LOAD32_LE(k + 8);
45 ctx->input[TR[4]] = LOAD32_LE(k + 12);
46 ctx->input[TR[11]] = LOAD32_LE(k + 16);
47 ctx->input[TR[12]] = LOAD32_LE(k + 20);
48 ctx->input[TR[13]] = LOAD32_LE(k + 24);
49 ctx->input[TR[14]] = LOAD32_LE(k + 28);
50 ctx->input[TR[0]] = 0x61707865;
51 ctx->input[TR[5]] = 0x3320646e;
52 ctx->input[TR[10]] = 0x79622d32;
53 ctx->input[TR[15]] = 0x6b206574;
54 }
55
56 static void
salsa_ivsetup(salsa_ctx * ctx,const uint8_t * iv,const uint8_t * counter)57 salsa_ivsetup(salsa_ctx *ctx, const uint8_t *iv, const uint8_t *counter)
58 {
59 ctx->input[TR[6]] = LOAD32_LE(iv + 0);
60 ctx->input[TR[7]] = LOAD32_LE(iv + 4);
61 ctx->input[TR[8]] = counter == NULL ? 0 : LOAD32_LE(counter + 0);
62 ctx->input[TR[9]] = counter == NULL ? 0 : LOAD32_LE(counter + 4);
63 }
64
65 static void
salsa20_encrypt_bytes(salsa_ctx * ctx,const uint8_t * m,uint8_t * c,unsigned long long bytes)66 salsa20_encrypt_bytes(salsa_ctx *ctx, const uint8_t *m, uint8_t *c,
67 unsigned long long bytes)
68 {
69 uint32_t * const x = &ctx->input[0];
70
71 if (!bytes) {
72 return; /* LCOV_EXCL_LINE */
73 }
74
75 #include "u8.h"
76 #include "u4.h"
77 #include "u1.h"
78 #include "u0.h"
79 }
80
81 static int
stream_avx2(unsigned char * c,unsigned long long clen,const unsigned char * n,const unsigned char * k)82 stream_avx2(unsigned char *c, unsigned long long clen, const unsigned char *n,
83 const unsigned char *k)
84 {
85 struct salsa_ctx ctx;
86
87 if (!clen) {
88 return 0;
89 }
90 COMPILER_ASSERT(crypto_stream_salsa20_KEYBYTES == 256 / 8);
91 salsa_keysetup(&ctx, k);
92 salsa_ivsetup(&ctx, n, NULL);
93 memset(c, 0, clen);
94 salsa20_encrypt_bytes(&ctx, c, c, clen);
95 sodium_memzero(&ctx, sizeof ctx);
96
97 return 0;
98 }
99
100 static int
stream_avx2_xor_ic(unsigned char * c,const unsigned char * m,unsigned long long mlen,const unsigned char * n,uint64_t ic,const unsigned char * k)101 stream_avx2_xor_ic(unsigned char *c, const unsigned char *m,
102 unsigned long long mlen, const unsigned char *n, uint64_t ic,
103 const unsigned char *k)
104 {
105 struct salsa_ctx ctx;
106 uint8_t ic_bytes[8];
107 uint32_t ic_high;
108 uint32_t ic_low;
109
110 if (!mlen) {
111 return 0;
112 }
113 ic_high = (uint32_t) (ic >> 32);
114 ic_low = (uint32_t) ic;
115 STORE32_LE(&ic_bytes[0], ic_low);
116 STORE32_LE(&ic_bytes[4], ic_high);
117 salsa_keysetup(&ctx, k);
118 salsa_ivsetup(&ctx, n, ic_bytes);
119 salsa20_encrypt_bytes(&ctx, m, c, mlen);
120 sodium_memzero(&ctx, sizeof ctx);
121
122 return 0;
123 }
124
125 struct crypto_stream_salsa20_implementation
126 crypto_stream_salsa20_xmm6int_avx2_implementation = {
127 SODIUM_C99(.stream =) stream_avx2,
128 SODIUM_C99(.stream_xor_ic =) stream_avx2_xor_ic
129 };
130
131 #endif
132