xref: /freebsd/sys/contrib/libb2/blake2b.c (revision f126d349810fdb512c0b01e101342d430b947488)
1 /*
2    BLAKE2 reference source code package - optimized C implementations
3 
4    Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
5 
6    To the extent possible under law, the author(s) have dedicated all copyright
7    and related and neighboring rights to this software to the public domain
8    worldwide. This software is distributed without any warranty.
9 
10    You should have received a copy of the CC0 Public Domain Dedication along with
11    this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
12 */
13 
14 #include <stdint.h>
15 #include <string.h>
16 #include <stdio.h>
17 
18 #include "blake2.h"
19 #include "blake2-impl.h"
20 
21 #include "blake2-config.h"
22 
23 #if defined(_MSC_VER)
24 #include <intrin.h>
25 #endif
26 
27 #if defined(HAVE_SSE2)
28 #include <emmintrin.h>
29 // MSVC only defines  _mm_set_epi64x for x86_64...
30 #if defined(_MSC_VER) && !defined(_M_X64)
31 static inline __m128i _mm_set_epi64x( const uint64_t u1, const uint64_t u0 )
32 {
33   return _mm_set_epi32( u1 >> 32, u1, u0 >> 32, u0 );
34 }
35 #endif
36 #endif
37 
38 #if defined(HAVE_SSSE3)
39 #include <tmmintrin.h>
40 #endif
41 #if defined(HAVE_SSE4_1)
42 #include <smmintrin.h>
43 #endif
44 #if defined(HAVE_AVX)
45 #include <immintrin.h>
46 #endif
47 #if defined(HAVE_XOP) && !defined(_MSC_VER)
48 #include <x86intrin.h>
49 #endif
50 
51 
52 
53 #include "blake2b-round.h"
54 
55 static const uint64_t blake2b_IV[8] =
56 {
57   0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
58   0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
59   0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
60   0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
61 };
62 
63 static const uint8_t blake2b_sigma[12][16] =
64 {
65   {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 } ,
66   { 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 } ,
67   { 11,  8, 12,  0,  5,  2, 15, 13, 10, 14,  3,  6,  7,  1,  9,  4 } ,
68   {  7,  9,  3,  1, 13, 12, 11, 14,  2,  6,  5, 10,  4,  0, 15,  8 } ,
69   {  9,  0,  5,  7,  2,  4, 10, 15, 14,  1, 11, 12,  6,  8,  3, 13 } ,
70   {  2, 12,  6, 10,  0, 11,  8,  3,  4, 13,  7,  5, 15, 14,  1,  9 } ,
71   { 12,  5,  1, 15, 14, 13,  4, 10,  0,  7,  6,  3,  9,  2,  8, 11 } ,
72   { 13, 11,  7, 14, 12,  1,  3,  9,  5,  0, 15,  4,  8,  6,  2, 10 } ,
73   {  6, 15, 14,  9, 11,  3,  0,  8, 12,  2, 13,  7,  1,  4, 10,  5 } ,
74   { 10,  2,  8,  4,  7,  6,  1,  5, 15, 11,  9, 14,  3, 12, 13 , 0 } ,
75   {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 } ,
76   { 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 }
77 };
78 
79 
80 /* Some helper functions, not necessarily useful */
81 static inline int blake2b_set_lastnode( blake2b_state *S )
82 {
83   S->f[1] = ~0ULL;
84   return 0;
85 }
86 
87 static inline int blake2b_clear_lastnode( blake2b_state *S )
88 {
89   S->f[1] = 0ULL;
90   return 0;
91 }
92 
93 static inline int blake2b_set_lastblock( blake2b_state *S )
94 {
95   if( S->last_node ) blake2b_set_lastnode( S );
96 
97   S->f[0] = ~0ULL;
98   return 0;
99 }
100 
101 static inline int blake2b_clear_lastblock( blake2b_state *S )
102 {
103   if( S->last_node ) blake2b_clear_lastnode( S );
104 
105   S->f[0] = 0ULL;
106   return 0;
107 }
108 
109 
110 static inline int blake2b_increment_counter( blake2b_state *S, const uint64_t inc )
111 {
112 #if defined(__x86_64__) && (defined(__GNUC__) || defined(__clang__))
113   // ADD/ADC chain
114   __uint128_t t = ( ( __uint128_t )S->t[1] << 64 ) | S->t[0];
115   t += inc;
116   S->t[0] = ( uint64_t )( t >>  0 );
117   S->t[1] = ( uint64_t )( t >> 64 );
118 #else
119   S->t[0] += inc;
120   S->t[1] += ( S->t[0] < inc );
121 #endif
122   return 0;
123 }
124 
125 
126 // Parameter-related functions
127 static inline int blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length )
128 {
129   P->digest_length = digest_length;
130   return 0;
131 }
132 
133 static inline int blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout )
134 {
135   P->fanout = fanout;
136   return 0;
137 }
138 
139 static inline int blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth )
140 {
141   P->depth = depth;
142   return 0;
143 }
144 
145 static inline int blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length )
146 {
147   P->leaf_length = leaf_length;
148   return 0;
149 }
150 
151 static inline int blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset )
152 {
153   P->node_offset = node_offset;
154   return 0;
155 }
156 
157 static inline int blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth )
158 {
159   P->node_depth = node_depth;
160   return 0;
161 }
162 
163 static inline int blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length )
164 {
165   P->inner_length = inner_length;
166   return 0;
167 }
168 
169 static inline int blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] )
170 {
171   memcpy( P->salt, salt, BLAKE2B_SALTBYTES );
172   return 0;
173 }
174 
175 static inline int blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] )
176 {
177   memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES );
178   return 0;
179 }
180 
181 static inline int blake2b_init0( blake2b_state *S )
182 {
183   memset( S, 0, sizeof( blake2b_state ) );
184 
185   for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i];
186 
187   return 0;
188 }
189 
190 
191 
192 #define blake2b_init BLAKE2_IMPL_NAME(blake2b_init)
193 #define blake2b_init_param BLAKE2_IMPL_NAME(blake2b_init_param)
194 #define blake2b_init_key BLAKE2_IMPL_NAME(blake2b_init_key)
195 #define blake2b_update BLAKE2_IMPL_NAME(blake2b_update)
196 #define blake2b_final BLAKE2_IMPL_NAME(blake2b_final)
197 #define blake2b BLAKE2_IMPL_NAME(blake2b)
198 
199 #if defined(__cplusplus)
200 extern "C" {
201 #endif
202   int blake2b_init( blake2b_state *S, size_t outlen );
203   int blake2b_init_param( blake2b_state *S, const blake2b_param *P );
204   int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen );
205   int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen );
206   int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen );
207   int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
208 #if defined(__cplusplus)
209 }
210 #endif
211 
212 /* init xors IV with input parameter block */
213 int blake2b_init_param( blake2b_state *S, const blake2b_param *P )
214 {
215   uint8_t *p, *h, *v;
216   //blake2b_init0( S );
217   v = ( uint8_t * )( blake2b_IV );
218   h = ( uint8_t * )( S->h );
219   p = ( uint8_t * )( P );
220   /* IV XOR ParamBlock */
221   memset( S, 0, sizeof( blake2b_state ) );
222 
223   for( int i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
224 
225   S->outlen = P->digest_length;
226   return 0;
227 }
228 
229 
230 /* Some sort of default parameter block initialization, for sequential blake2b */
231 
232 int blake2b_init( blake2b_state *S, size_t outlen )
233 {
234   if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
235 
236   const blake2b_param P =
237   {
238     ( uint8_t ) outlen,
239     0,
240     1,
241     1,
242     0,
243     0,
244     0,
245     0,
246     {0},
247     {0},
248     {0}
249   };
250   return blake2b_init_param( S, &P );
251 }
252 
253 int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen )
254 {
255   if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
256 
257   if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1;
258 
259   const blake2b_param P =
260   {
261     ( uint8_t ) outlen,
262     ( uint8_t ) keylen,
263     1,
264     1,
265     0,
266     0,
267     0,
268     0,
269     {0},
270     {0},
271     {0}
272   };
273 
274   if( blake2b_init_param( S, &P ) < 0 )
275     return 0;
276 
277   {
278     uint8_t block[BLAKE2B_BLOCKBYTES];
279     memset( block, 0, BLAKE2B_BLOCKBYTES );
280     memcpy( block, key, keylen );
281     blake2b_update( S, block, BLAKE2B_BLOCKBYTES );
282     secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
283   }
284   return 0;
285 }
286 
287 static inline int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
288 {
289   __m128i row1l, row1h;
290   __m128i row2l, row2h;
291   __m128i row3l, row3h;
292   __m128i row4l, row4h;
293   __m128i b0, b1;
294   __m128i t0, t1;
295 #if defined(HAVE_SSSE3) && !defined(HAVE_XOP)
296   const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 );
297   const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 );
298 #endif
299 #if defined(HAVE_SSE4_1)
300   const __m128i m0 = LOADU( block + 00 );
301   const __m128i m1 = LOADU( block + 16 );
302   const __m128i m2 = LOADU( block + 32 );
303   const __m128i m3 = LOADU( block + 48 );
304   const __m128i m4 = LOADU( block + 64 );
305   const __m128i m5 = LOADU( block + 80 );
306   const __m128i m6 = LOADU( block + 96 );
307   const __m128i m7 = LOADU( block + 112 );
308 #else
309   const uint64_t  m0 = ( ( uint64_t * )block )[ 0];
310   const uint64_t  m1 = ( ( uint64_t * )block )[ 1];
311   const uint64_t  m2 = ( ( uint64_t * )block )[ 2];
312   const uint64_t  m3 = ( ( uint64_t * )block )[ 3];
313   const uint64_t  m4 = ( ( uint64_t * )block )[ 4];
314   const uint64_t  m5 = ( ( uint64_t * )block )[ 5];
315   const uint64_t  m6 = ( ( uint64_t * )block )[ 6];
316   const uint64_t  m7 = ( ( uint64_t * )block )[ 7];
317   const uint64_t  m8 = ( ( uint64_t * )block )[ 8];
318   const uint64_t  m9 = ( ( uint64_t * )block )[ 9];
319   const uint64_t m10 = ( ( uint64_t * )block )[10];
320   const uint64_t m11 = ( ( uint64_t * )block )[11];
321   const uint64_t m12 = ( ( uint64_t * )block )[12];
322   const uint64_t m13 = ( ( uint64_t * )block )[13];
323   const uint64_t m14 = ( ( uint64_t * )block )[14];
324   const uint64_t m15 = ( ( uint64_t * )block )[15];
325 #endif
326   row1l = LOADU( &S->h[0] );
327   row1h = LOADU( &S->h[2] );
328   row2l = LOADU( &S->h[4] );
329   row2h = LOADU( &S->h[6] );
330   row3l = LOADU( &blake2b_IV[0] );
331   row3h = LOADU( &blake2b_IV[2] );
332   row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &S->t[0] ) );
333   row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &S->f[0] ) );
334   ROUND( 0 );
335   ROUND( 1 );
336   ROUND( 2 );
337   ROUND( 3 );
338   ROUND( 4 );
339   ROUND( 5 );
340   ROUND( 6 );
341   ROUND( 7 );
342   ROUND( 8 );
343   ROUND( 9 );
344   ROUND( 10 );
345   ROUND( 11 );
346   row1l = _mm_xor_si128( row3l, row1l );
347   row1h = _mm_xor_si128( row3h, row1h );
348   STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) );
349   STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) );
350   row2l = _mm_xor_si128( row4l, row2l );
351   row2h = _mm_xor_si128( row4h, row2h );
352   STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) );
353   STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) );
354   return 0;
355 }
356 
357 
358 int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen )
359 {
360   while( inlen > 0 )
361   {
362     uint32_t left = S->buflen;
363     uint32_t fill = 2 * BLAKE2B_BLOCKBYTES - left;
364 
365     if( inlen > fill )
366     {
367       memcpy( S->buf + left, in, fill ); // Fill buffer
368       S->buflen += fill;
369       blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
370       blake2b_compress( S, S->buf ); // Compress
371       memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); // Shift buffer left
372       S->buflen -= BLAKE2B_BLOCKBYTES;
373       in += fill;
374       inlen -= fill;
375     }
376     else // inlen <= fill
377     {
378       memcpy( S->buf + left, in, inlen );
379       S->buflen += ( uint32_t ) inlen; // Be lazy, do not compress
380       in += inlen;
381       inlen -= inlen;
382     }
383   }
384 
385   return 0;
386 }
387 
388 
389 int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen )
390 {
391   if(S->outlen != outlen) return -1;
392 
393   if( S->buflen > BLAKE2B_BLOCKBYTES )
394   {
395     blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
396     blake2b_compress( S, S->buf );
397     S->buflen -= BLAKE2B_BLOCKBYTES;
398     memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen );
399   }
400 
401   blake2b_increment_counter( S, S->buflen );
402   blake2b_set_lastblock( S );
403   memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */
404   blake2b_compress( S, S->buf );
405   memcpy( out, &S->h[0], outlen );
406   return 0;
407 }
408 
409 
410 int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen )
411 {
412   blake2b_state S[1];
413 
414   /* Verify parameters */
415   if ( NULL == in && inlen > 0 ) return -1;
416 
417   if ( NULL == out ) return -1;
418 
419   if( NULL == key && keylen > 0 ) return -1;
420 
421   if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
422 
423   if( keylen > BLAKE2B_KEYBYTES ) return -1;
424 
425   if( keylen )
426   {
427     if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1;
428   }
429   else
430   {
431     if( blake2b_init( S, outlen ) < 0 ) return -1;
432   }
433 
434   if( blake2b_update( S, ( uint8_t * )in, inlen ) < 0) return -1;
435   return blake2b_final( S, out, outlen );
436 }
437 
438 #if defined(SUPERCOP)
439 int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
440 {
441   return blake2b( out, in, NULL, BLAKE2B_OUTBYTES, inlen, 0 );
442 }
443 #endif
444