xref: /freebsd/sys/contrib/libb2/blake2bp.c (revision 0e33efe4e4b5d24e2d416938af8bc6e6e4160ec8)
1*0e33efe4SConrad Meyer /*
2*0e33efe4SConrad Meyer    BLAKE2 reference source code package - optimized C implementations
3*0e33efe4SConrad Meyer 
4*0e33efe4SConrad Meyer    Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
5*0e33efe4SConrad Meyer 
6*0e33efe4SConrad Meyer    To the extent possible under law, the author(s) have dedicated all copyright
7*0e33efe4SConrad Meyer    and related and neighboring rights to this software to the public domain
8*0e33efe4SConrad Meyer    worldwide. This software is distributed without any warranty.
9*0e33efe4SConrad Meyer 
10*0e33efe4SConrad Meyer    You should have received a copy of the CC0 Public Domain Dedication along with
11*0e33efe4SConrad Meyer    this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
12*0e33efe4SConrad Meyer */
13*0e33efe4SConrad Meyer 
14*0e33efe4SConrad Meyer #include <stdio.h>
15*0e33efe4SConrad Meyer #include <stdlib.h>
16*0e33efe4SConrad Meyer #include <string.h>
17*0e33efe4SConrad Meyer #include <stdint.h>
18*0e33efe4SConrad Meyer 
19*0e33efe4SConrad Meyer #if defined(_OPENMP)
20*0e33efe4SConrad Meyer #include <omp.h>
21*0e33efe4SConrad Meyer #endif
22*0e33efe4SConrad Meyer 
23*0e33efe4SConrad Meyer #include "blake2.h"
24*0e33efe4SConrad Meyer #include "blake2-impl.h"
25*0e33efe4SConrad Meyer 
26*0e33efe4SConrad Meyer #define PARALLELISM_DEGREE 4
27*0e33efe4SConrad Meyer 
blake2bp_init_leaf(blake2b_state * S,uint8_t outlen,uint8_t keylen,uint64_t offset)28*0e33efe4SConrad Meyer static int blake2bp_init_leaf( blake2b_state *S, uint8_t outlen, uint8_t keylen, uint64_t offset )
29*0e33efe4SConrad Meyer {
30*0e33efe4SConrad Meyer   blake2b_param P[1];
31*0e33efe4SConrad Meyer   P->digest_length = outlen;
32*0e33efe4SConrad Meyer   P->key_length = keylen;
33*0e33efe4SConrad Meyer   P->fanout = PARALLELISM_DEGREE;
34*0e33efe4SConrad Meyer   P->depth = 2;
35*0e33efe4SConrad Meyer   store32(&P->leaf_length, 0);
36*0e33efe4SConrad Meyer   store64(&P->node_offset, offset);
37*0e33efe4SConrad Meyer   P->node_depth = 0;
38*0e33efe4SConrad Meyer   P->inner_length = BLAKE2B_OUTBYTES;
39*0e33efe4SConrad Meyer   memset( P->reserved, 0, sizeof( P->reserved ) );
40*0e33efe4SConrad Meyer   memset( P->salt, 0, sizeof( P->salt ) );
41*0e33efe4SConrad Meyer   memset( P->personal, 0, sizeof( P->personal ) );
42*0e33efe4SConrad Meyer   blake2b_init_param( S, P );
43*0e33efe4SConrad Meyer   S->outlen = P->inner_length;
44*0e33efe4SConrad Meyer   return 0;
45*0e33efe4SConrad Meyer }
46*0e33efe4SConrad Meyer 
blake2bp_init_root(blake2b_state * S,uint8_t outlen,uint8_t keylen)47*0e33efe4SConrad Meyer static int blake2bp_init_root( blake2b_state *S, uint8_t outlen, uint8_t keylen )
48*0e33efe4SConrad Meyer {
49*0e33efe4SConrad Meyer   blake2b_param P[1];
50*0e33efe4SConrad Meyer   P->digest_length = outlen;
51*0e33efe4SConrad Meyer   P->key_length = keylen;
52*0e33efe4SConrad Meyer   P->fanout = PARALLELISM_DEGREE;
53*0e33efe4SConrad Meyer   P->depth = 2;
54*0e33efe4SConrad Meyer   store32(&P->leaf_length, 0);
55*0e33efe4SConrad Meyer   store64(&P->node_offset, 0);
56*0e33efe4SConrad Meyer   P->node_depth = 1;
57*0e33efe4SConrad Meyer   P->inner_length = BLAKE2B_OUTBYTES;
58*0e33efe4SConrad Meyer   memset( P->reserved, 0, sizeof( P->reserved ) );
59*0e33efe4SConrad Meyer   memset( P->salt, 0, sizeof( P->salt ) );
60*0e33efe4SConrad Meyer   memset( P->personal, 0, sizeof( P->personal ) );
61*0e33efe4SConrad Meyer   blake2b_init_param( S, P );
62*0e33efe4SConrad Meyer   S->outlen = P->digest_length;
63*0e33efe4SConrad Meyer   return 0;
64*0e33efe4SConrad Meyer }
65*0e33efe4SConrad Meyer 
66*0e33efe4SConrad Meyer 
blake2bp_init(blake2bp_state * S,size_t outlen)67*0e33efe4SConrad Meyer int blake2bp_init( blake2bp_state *S, size_t outlen )
68*0e33efe4SConrad Meyer {
69*0e33efe4SConrad Meyer   if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
70*0e33efe4SConrad Meyer 
71*0e33efe4SConrad Meyer   memset( S->buf, 0, sizeof( S->buf ) );
72*0e33efe4SConrad Meyer   S->buflen = 0;
73*0e33efe4SConrad Meyer 
74*0e33efe4SConrad Meyer   if( blake2bp_init_root( S->R, ( uint8_t ) outlen, 0 ) < 0 )
75*0e33efe4SConrad Meyer     return -1;
76*0e33efe4SConrad Meyer 
77*0e33efe4SConrad Meyer   for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
78*0e33efe4SConrad Meyer     if( blake2bp_init_leaf( S->S[i], ( uint8_t ) outlen, 0, i ) < 0 ) return -1;
79*0e33efe4SConrad Meyer 
80*0e33efe4SConrad Meyer   S->R->last_node = 1;
81*0e33efe4SConrad Meyer   S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
82*0e33efe4SConrad Meyer   S->outlen = ( uint8_t ) outlen;
83*0e33efe4SConrad Meyer   return 0;
84*0e33efe4SConrad Meyer }
85*0e33efe4SConrad Meyer 
blake2bp_init_key(blake2bp_state * S,size_t outlen,const void * key,size_t keylen)86*0e33efe4SConrad Meyer int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen )
87*0e33efe4SConrad Meyer {
88*0e33efe4SConrad Meyer   if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
89*0e33efe4SConrad Meyer 
90*0e33efe4SConrad Meyer   if( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1;
91*0e33efe4SConrad Meyer 
92*0e33efe4SConrad Meyer   memset( S->buf, 0, sizeof( S->buf ) );
93*0e33efe4SConrad Meyer   S->buflen = 0;
94*0e33efe4SConrad Meyer 
95*0e33efe4SConrad Meyer   if( blake2bp_init_root( S->R, ( uint8_t ) outlen, ( uint8_t ) keylen ) < 0 )
96*0e33efe4SConrad Meyer     return -1;
97*0e33efe4SConrad Meyer 
98*0e33efe4SConrad Meyer   for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
99*0e33efe4SConrad Meyer     if( blake2bp_init_leaf( S->S[i], ( uint8_t ) outlen, ( uint8_t ) keylen, i ) < 0 )
100*0e33efe4SConrad Meyer       return -1;
101*0e33efe4SConrad Meyer 
102*0e33efe4SConrad Meyer   S->R->last_node = 1;
103*0e33efe4SConrad Meyer   S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
104*0e33efe4SConrad Meyer   S->outlen = ( uint8_t ) outlen;
105*0e33efe4SConrad Meyer   {
106*0e33efe4SConrad Meyer     uint8_t block[BLAKE2B_BLOCKBYTES];
107*0e33efe4SConrad Meyer     memset( block, 0, BLAKE2B_BLOCKBYTES );
108*0e33efe4SConrad Meyer     memcpy( block, key, keylen );
109*0e33efe4SConrad Meyer 
110*0e33efe4SConrad Meyer     for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
111*0e33efe4SConrad Meyer       blake2b_update( S->S[i], block, BLAKE2B_BLOCKBYTES );
112*0e33efe4SConrad Meyer 
113*0e33efe4SConrad Meyer     secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
114*0e33efe4SConrad Meyer   }
115*0e33efe4SConrad Meyer   return 0;
116*0e33efe4SConrad Meyer }
117*0e33efe4SConrad Meyer 
118*0e33efe4SConrad Meyer 
blake2bp_update(blake2bp_state * S,const uint8_t * in,size_t inlen)119*0e33efe4SConrad Meyer int blake2bp_update( blake2bp_state *S, const uint8_t *in, size_t inlen )
120*0e33efe4SConrad Meyer {
121*0e33efe4SConrad Meyer   size_t left = S->buflen;
122*0e33efe4SConrad Meyer   size_t fill = sizeof( S->buf ) - left;
123*0e33efe4SConrad Meyer 
124*0e33efe4SConrad Meyer   if( left && inlen >= fill )
125*0e33efe4SConrad Meyer   {
126*0e33efe4SConrad Meyer     memcpy( S->buf + left, in, fill );
127*0e33efe4SConrad Meyer 
128*0e33efe4SConrad Meyer     for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
129*0e33efe4SConrad Meyer       blake2b_update( S->S[i], S->buf + i * BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES );
130*0e33efe4SConrad Meyer 
131*0e33efe4SConrad Meyer     in += fill;
132*0e33efe4SConrad Meyer     inlen -= fill;
133*0e33efe4SConrad Meyer     left = 0;
134*0e33efe4SConrad Meyer   }
135*0e33efe4SConrad Meyer 
136*0e33efe4SConrad Meyer #if defined(_OPENMP)
137*0e33efe4SConrad Meyer   omp_set_num_threads(PARALLELISM_DEGREE);
138*0e33efe4SConrad Meyer   #pragma omp parallel shared(S)
139*0e33efe4SConrad Meyer #else
140*0e33efe4SConrad Meyer   for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ )
141*0e33efe4SConrad Meyer #endif
142*0e33efe4SConrad Meyer   {
143*0e33efe4SConrad Meyer #if defined(_OPENMP)
144*0e33efe4SConrad Meyer     size_t      id__ = ( size_t ) omp_get_thread_num();
145*0e33efe4SConrad Meyer #endif
146*0e33efe4SConrad Meyer     size_t inlen__ = inlen;
147*0e33efe4SConrad Meyer     const uint8_t *in__ = ( const uint8_t * )in;
148*0e33efe4SConrad Meyer     in__ += id__ * BLAKE2B_BLOCKBYTES;
149*0e33efe4SConrad Meyer 
150*0e33efe4SConrad Meyer     while( inlen__ >= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES )
151*0e33efe4SConrad Meyer     {
152*0e33efe4SConrad Meyer       blake2b_update( S->S[id__], in__, BLAKE2B_BLOCKBYTES );
153*0e33efe4SConrad Meyer       in__ += PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
154*0e33efe4SConrad Meyer       inlen__ -= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
155*0e33efe4SConrad Meyer     }
156*0e33efe4SConrad Meyer   }
157*0e33efe4SConrad Meyer 
158*0e33efe4SConrad Meyer   in += inlen - inlen % ( PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES );
159*0e33efe4SConrad Meyer   inlen %= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
160*0e33efe4SConrad Meyer 
161*0e33efe4SConrad Meyer   if( inlen > 0 )
162*0e33efe4SConrad Meyer     memcpy( S->buf + left, in, inlen );
163*0e33efe4SConrad Meyer 
164*0e33efe4SConrad Meyer   S->buflen = ( uint32_t ) left + ( uint32_t ) inlen;
165*0e33efe4SConrad Meyer   return 0;
166*0e33efe4SConrad Meyer }
167*0e33efe4SConrad Meyer 
168*0e33efe4SConrad Meyer 
169*0e33efe4SConrad Meyer 
blake2bp_final(blake2bp_state * S,uint8_t * out,size_t outlen)170*0e33efe4SConrad Meyer int blake2bp_final( blake2bp_state *S, uint8_t *out, size_t outlen )
171*0e33efe4SConrad Meyer {
172*0e33efe4SConrad Meyer   uint8_t hash[PARALLELISM_DEGREE][BLAKE2B_OUTBYTES];
173*0e33efe4SConrad Meyer 
174*0e33efe4SConrad Meyer   if(S->outlen != outlen) return -1;
175*0e33efe4SConrad Meyer 
176*0e33efe4SConrad Meyer   for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
177*0e33efe4SConrad Meyer   {
178*0e33efe4SConrad Meyer     if( S->buflen > i * BLAKE2B_BLOCKBYTES )
179*0e33efe4SConrad Meyer     {
180*0e33efe4SConrad Meyer       size_t left = S->buflen - i * BLAKE2B_BLOCKBYTES;
181*0e33efe4SConrad Meyer 
182*0e33efe4SConrad Meyer       if( left > BLAKE2B_BLOCKBYTES ) left = BLAKE2B_BLOCKBYTES;
183*0e33efe4SConrad Meyer 
184*0e33efe4SConrad Meyer       blake2b_update( S->S[i], S->buf + i * BLAKE2B_BLOCKBYTES, left );
185*0e33efe4SConrad Meyer     }
186*0e33efe4SConrad Meyer 
187*0e33efe4SConrad Meyer     blake2b_final( S->S[i], hash[i], BLAKE2B_OUTBYTES );
188*0e33efe4SConrad Meyer   }
189*0e33efe4SConrad Meyer 
190*0e33efe4SConrad Meyer   for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
191*0e33efe4SConrad Meyer     blake2b_update( S->R, hash[i], BLAKE2B_OUTBYTES );
192*0e33efe4SConrad Meyer 
193*0e33efe4SConrad Meyer   return blake2b_final( S->R, out, outlen );
194*0e33efe4SConrad Meyer }
195*0e33efe4SConrad Meyer 
blake2bp(uint8_t * out,const void * in,const void * key,size_t outlen,size_t inlen,size_t keylen)196*0e33efe4SConrad Meyer int blake2bp( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen )
197*0e33efe4SConrad Meyer {
198*0e33efe4SConrad Meyer   uint8_t hash[PARALLELISM_DEGREE][BLAKE2B_OUTBYTES];
199*0e33efe4SConrad Meyer   blake2b_state S[PARALLELISM_DEGREE][1];
200*0e33efe4SConrad Meyer   blake2b_state FS[1];
201*0e33efe4SConrad Meyer 
202*0e33efe4SConrad Meyer   /* Verify parameters */
203*0e33efe4SConrad Meyer   if ( NULL == in && inlen > 0 ) return -1;
204*0e33efe4SConrad Meyer 
205*0e33efe4SConrad Meyer   if ( NULL == out ) return -1;
206*0e33efe4SConrad Meyer 
207*0e33efe4SConrad Meyer   if ( NULL == key && keylen > 0) return -1;
208*0e33efe4SConrad Meyer 
209*0e33efe4SConrad Meyer   if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
210*0e33efe4SConrad Meyer 
211*0e33efe4SConrad Meyer   if( keylen > BLAKE2B_KEYBYTES ) return -1;
212*0e33efe4SConrad Meyer 
213*0e33efe4SConrad Meyer   for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
214*0e33efe4SConrad Meyer     if( blake2bp_init_leaf( S[i], ( uint8_t ) outlen, ( uint8_t ) keylen, i ) < 0 )
215*0e33efe4SConrad Meyer       return -1;
216*0e33efe4SConrad Meyer 
217*0e33efe4SConrad Meyer   S[PARALLELISM_DEGREE - 1]->last_node = 1; // mark last node
218*0e33efe4SConrad Meyer 
219*0e33efe4SConrad Meyer   if( keylen > 0 )
220*0e33efe4SConrad Meyer   {
221*0e33efe4SConrad Meyer     uint8_t block[BLAKE2B_BLOCKBYTES];
222*0e33efe4SConrad Meyer     memset( block, 0, BLAKE2B_BLOCKBYTES );
223*0e33efe4SConrad Meyer     memcpy( block, key, keylen );
224*0e33efe4SConrad Meyer 
225*0e33efe4SConrad Meyer     for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
226*0e33efe4SConrad Meyer       blake2b_update( S[i], block, BLAKE2B_BLOCKBYTES );
227*0e33efe4SConrad Meyer 
228*0e33efe4SConrad Meyer     secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
229*0e33efe4SConrad Meyer   }
230*0e33efe4SConrad Meyer 
231*0e33efe4SConrad Meyer #if defined(_OPENMP)
232*0e33efe4SConrad Meyer   omp_set_num_threads(PARALLELISM_DEGREE);
233*0e33efe4SConrad Meyer   #pragma omp parallel shared(S,hash)
234*0e33efe4SConrad Meyer #else
235*0e33efe4SConrad Meyer   for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ )
236*0e33efe4SConrad Meyer #endif
237*0e33efe4SConrad Meyer   {
238*0e33efe4SConrad Meyer #if defined(_OPENMP)
239*0e33efe4SConrad Meyer     size_t      id__ = ( size_t ) omp_get_thread_num();
240*0e33efe4SConrad Meyer #endif
241*0e33efe4SConrad Meyer     size_t inlen__ = inlen;
242*0e33efe4SConrad Meyer     const uint8_t *in__ = ( const uint8_t * )in;
243*0e33efe4SConrad Meyer     in__ += id__ * BLAKE2B_BLOCKBYTES;
244*0e33efe4SConrad Meyer 
245*0e33efe4SConrad Meyer     while( inlen__ >= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES )
246*0e33efe4SConrad Meyer     {
247*0e33efe4SConrad Meyer       blake2b_update( S[id__], in__, BLAKE2B_BLOCKBYTES );
248*0e33efe4SConrad Meyer       in__ += PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
249*0e33efe4SConrad Meyer       inlen__ -= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
250*0e33efe4SConrad Meyer     }
251*0e33efe4SConrad Meyer 
252*0e33efe4SConrad Meyer     if( inlen__ > id__ * BLAKE2B_BLOCKBYTES )
253*0e33efe4SConrad Meyer     {
254*0e33efe4SConrad Meyer       const size_t left = inlen__ - id__ * BLAKE2B_BLOCKBYTES;
255*0e33efe4SConrad Meyer       const size_t len = left <= BLAKE2B_BLOCKBYTES ? left : BLAKE2B_BLOCKBYTES;
256*0e33efe4SConrad Meyer       blake2b_update( S[id__], in__, len );
257*0e33efe4SConrad Meyer     }
258*0e33efe4SConrad Meyer 
259*0e33efe4SConrad Meyer     blake2b_final( S[id__], hash[id__], BLAKE2B_OUTBYTES );
260*0e33efe4SConrad Meyer   }
261*0e33efe4SConrad Meyer 
262*0e33efe4SConrad Meyer   if( blake2bp_init_root( FS, ( uint8_t ) outlen, ( uint8_t ) keylen ) < 0 )
263*0e33efe4SConrad Meyer     return -1;
264*0e33efe4SConrad Meyer 
265*0e33efe4SConrad Meyer   FS->last_node = 1; // Mark as last node
266*0e33efe4SConrad Meyer 
267*0e33efe4SConrad Meyer   for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
268*0e33efe4SConrad Meyer     blake2b_update( FS, hash[i], BLAKE2B_OUTBYTES );
269*0e33efe4SConrad Meyer 
270*0e33efe4SConrad Meyer   return blake2b_final( FS, out, outlen );
271*0e33efe4SConrad Meyer }
272*0e33efe4SConrad Meyer 
273*0e33efe4SConrad Meyer 
274*0e33efe4SConrad Meyer 
275