xref: /freebsd/contrib/bearssl/src/kdf/shake.c (revision 2aaf9152a852aba9eb2036b95f4948ee77988826)
1 /*
2  * Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining
5  * a copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sublicense, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be
13  * included in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #include "inner.h"
26 
27 /*
28  * Round constants.
29  */
30 static const uint64_t RC[] = {
31 	0x0000000000000001, 0x0000000000008082,
32 	0x800000000000808A, 0x8000000080008000,
33 	0x000000000000808B, 0x0000000080000001,
34 	0x8000000080008081, 0x8000000000008009,
35 	0x000000000000008A, 0x0000000000000088,
36 	0x0000000080008009, 0x000000008000000A,
37 	0x000000008000808B, 0x800000000000008B,
38 	0x8000000000008089, 0x8000000000008003,
39 	0x8000000000008002, 0x8000000000000080,
40 	0x000000000000800A, 0x800000008000000A,
41 	0x8000000080008081, 0x8000000000008080,
42 	0x0000000080000001, 0x8000000080008008
43 };
44 
45 /*
46  * XOR a block of data into the provided state. This supports only
47  * blocks whose length is a multiple of 64 bits.
48  */
49 static void
xor_block(uint64_t * A,const void * data,size_t rate)50 xor_block(uint64_t *A, const void *data, size_t rate)
51 {
52 	size_t u;
53 
54 	for (u = 0; u < rate; u += 8) {
55 		A[u >> 3] ^= br_dec64le((const unsigned char *)data + u);
56 	}
57 }
58 
59 /*
60  * Process a block with the provided data. The data length must be a
61  * multiple of 8 (in bytes); normally, this is the "rate".
62  */
63 static void
process_block(uint64_t * A)64 process_block(uint64_t *A)
65 {
66 	uint64_t t0, t1, t2, t3, t4;
67 	uint64_t tt0, tt1, tt2, tt3;
68 	uint64_t t, kt;
69 	uint64_t c0, c1, c2, c3, c4, bnn;
70 	int j;
71 
72 	/*
73 	 * Compute the 24 rounds. This loop is partially unrolled (each
74 	 * iteration computes two rounds).
75 	 */
76 	for (j = 0; j < 24; j += 2) {
77 
78 		tt0 = A[ 1] ^ A[ 6];
79 		tt1 = A[11] ^ A[16];
80 		tt0 ^= A[21] ^ tt1;
81 		tt0 = (tt0 << 1) | (tt0 >> 63);
82 		tt2 = A[ 4] ^ A[ 9];
83 		tt3 = A[14] ^ A[19];
84 		tt0 ^= A[24];
85 		tt2 ^= tt3;
86 		t0 = tt0 ^ tt2;
87 
88 		tt0 = A[ 2] ^ A[ 7];
89 		tt1 = A[12] ^ A[17];
90 		tt0 ^= A[22] ^ tt1;
91 		tt0 = (tt0 << 1) | (tt0 >> 63);
92 		tt2 = A[ 0] ^ A[ 5];
93 		tt3 = A[10] ^ A[15];
94 		tt0 ^= A[20];
95 		tt2 ^= tt3;
96 		t1 = tt0 ^ tt2;
97 
98 		tt0 = A[ 3] ^ A[ 8];
99 		tt1 = A[13] ^ A[18];
100 		tt0 ^= A[23] ^ tt1;
101 		tt0 = (tt0 << 1) | (tt0 >> 63);
102 		tt2 = A[ 1] ^ A[ 6];
103 		tt3 = A[11] ^ A[16];
104 		tt0 ^= A[21];
105 		tt2 ^= tt3;
106 		t2 = tt0 ^ tt2;
107 
108 		tt0 = A[ 4] ^ A[ 9];
109 		tt1 = A[14] ^ A[19];
110 		tt0 ^= A[24] ^ tt1;
111 		tt0 = (tt0 << 1) | (tt0 >> 63);
112 		tt2 = A[ 2] ^ A[ 7];
113 		tt3 = A[12] ^ A[17];
114 		tt0 ^= A[22];
115 		tt2 ^= tt3;
116 		t3 = tt0 ^ tt2;
117 
118 		tt0 = A[ 0] ^ A[ 5];
119 		tt1 = A[10] ^ A[15];
120 		tt0 ^= A[20] ^ tt1;
121 		tt0 = (tt0 << 1) | (tt0 >> 63);
122 		tt2 = A[ 3] ^ A[ 8];
123 		tt3 = A[13] ^ A[18];
124 		tt0 ^= A[23];
125 		tt2 ^= tt3;
126 		t4 = tt0 ^ tt2;
127 
128 		A[ 0] = A[ 0] ^ t0;
129 		A[ 5] = A[ 5] ^ t0;
130 		A[10] = A[10] ^ t0;
131 		A[15] = A[15] ^ t0;
132 		A[20] = A[20] ^ t0;
133 		A[ 1] = A[ 1] ^ t1;
134 		A[ 6] = A[ 6] ^ t1;
135 		A[11] = A[11] ^ t1;
136 		A[16] = A[16] ^ t1;
137 		A[21] = A[21] ^ t1;
138 		A[ 2] = A[ 2] ^ t2;
139 		A[ 7] = A[ 7] ^ t2;
140 		A[12] = A[12] ^ t2;
141 		A[17] = A[17] ^ t2;
142 		A[22] = A[22] ^ t2;
143 		A[ 3] = A[ 3] ^ t3;
144 		A[ 8] = A[ 8] ^ t3;
145 		A[13] = A[13] ^ t3;
146 		A[18] = A[18] ^ t3;
147 		A[23] = A[23] ^ t3;
148 		A[ 4] = A[ 4] ^ t4;
149 		A[ 9] = A[ 9] ^ t4;
150 		A[14] = A[14] ^ t4;
151 		A[19] = A[19] ^ t4;
152 		A[24] = A[24] ^ t4;
153 		A[ 5] = (A[ 5] << 36) | (A[ 5] >> (64 - 36));
154 		A[10] = (A[10] <<  3) | (A[10] >> (64 -  3));
155 		A[15] = (A[15] << 41) | (A[15] >> (64 - 41));
156 		A[20] = (A[20] << 18) | (A[20] >> (64 - 18));
157 		A[ 1] = (A[ 1] <<  1) | (A[ 1] >> (64 -  1));
158 		A[ 6] = (A[ 6] << 44) | (A[ 6] >> (64 - 44));
159 		A[11] = (A[11] << 10) | (A[11] >> (64 - 10));
160 		A[16] = (A[16] << 45) | (A[16] >> (64 - 45));
161 		A[21] = (A[21] <<  2) | (A[21] >> (64 - 2));
162 		A[ 2] = (A[ 2] << 62) | (A[ 2] >> (64 - 62));
163 		A[ 7] = (A[ 7] <<  6) | (A[ 7] >> (64 -  6));
164 		A[12] = (A[12] << 43) | (A[12] >> (64 - 43));
165 		A[17] = (A[17] << 15) | (A[17] >> (64 - 15));
166 		A[22] = (A[22] << 61) | (A[22] >> (64 - 61));
167 		A[ 3] = (A[ 3] << 28) | (A[ 3] >> (64 - 28));
168 		A[ 8] = (A[ 8] << 55) | (A[ 8] >> (64 - 55));
169 		A[13] = (A[13] << 25) | (A[13] >> (64 - 25));
170 		A[18] = (A[18] << 21) | (A[18] >> (64 - 21));
171 		A[23] = (A[23] << 56) | (A[23] >> (64 - 56));
172 		A[ 4] = (A[ 4] << 27) | (A[ 4] >> (64 - 27));
173 		A[ 9] = (A[ 9] << 20) | (A[ 9] >> (64 - 20));
174 		A[14] = (A[14] << 39) | (A[14] >> (64 - 39));
175 		A[19] = (A[19] <<  8) | (A[19] >> (64 -  8));
176 		A[24] = (A[24] << 14) | (A[24] >> (64 - 14));
177 		bnn = ~A[12];
178 		kt = A[ 6] | A[12];
179 		c0 = A[ 0] ^ kt;
180 		kt = bnn | A[18];
181 		c1 = A[ 6] ^ kt;
182 		kt = A[18] & A[24];
183 		c2 = A[12] ^ kt;
184 		kt = A[24] | A[ 0];
185 		c3 = A[18] ^ kt;
186 		kt = A[ 0] & A[ 6];
187 		c4 = A[24] ^ kt;
188 		A[ 0] = c0;
189 		A[ 6] = c1;
190 		A[12] = c2;
191 		A[18] = c3;
192 		A[24] = c4;
193 		bnn = ~A[22];
194 		kt = A[ 9] | A[10];
195 		c0 = A[ 3] ^ kt;
196 		kt = A[10] & A[16];
197 		c1 = A[ 9] ^ kt;
198 		kt = A[16] | bnn;
199 		c2 = A[10] ^ kt;
200 		kt = A[22] | A[ 3];
201 		c3 = A[16] ^ kt;
202 		kt = A[ 3] & A[ 9];
203 		c4 = A[22] ^ kt;
204 		A[ 3] = c0;
205 		A[ 9] = c1;
206 		A[10] = c2;
207 		A[16] = c3;
208 		A[22] = c4;
209 		bnn = ~A[19];
210 		kt = A[ 7] | A[13];
211 		c0 = A[ 1] ^ kt;
212 		kt = A[13] & A[19];
213 		c1 = A[ 7] ^ kt;
214 		kt = bnn & A[20];
215 		c2 = A[13] ^ kt;
216 		kt = A[20] | A[ 1];
217 		c3 = bnn ^ kt;
218 		kt = A[ 1] & A[ 7];
219 		c4 = A[20] ^ kt;
220 		A[ 1] = c0;
221 		A[ 7] = c1;
222 		A[13] = c2;
223 		A[19] = c3;
224 		A[20] = c4;
225 		bnn = ~A[17];
226 		kt = A[ 5] & A[11];
227 		c0 = A[ 4] ^ kt;
228 		kt = A[11] | A[17];
229 		c1 = A[ 5] ^ kt;
230 		kt = bnn | A[23];
231 		c2 = A[11] ^ kt;
232 		kt = A[23] & A[ 4];
233 		c3 = bnn ^ kt;
234 		kt = A[ 4] | A[ 5];
235 		c4 = A[23] ^ kt;
236 		A[ 4] = c0;
237 		A[ 5] = c1;
238 		A[11] = c2;
239 		A[17] = c3;
240 		A[23] = c4;
241 		bnn = ~A[ 8];
242 		kt = bnn & A[14];
243 		c0 = A[ 2] ^ kt;
244 		kt = A[14] | A[15];
245 		c1 = bnn ^ kt;
246 		kt = A[15] & A[21];
247 		c2 = A[14] ^ kt;
248 		kt = A[21] | A[ 2];
249 		c3 = A[15] ^ kt;
250 		kt = A[ 2] & A[ 8];
251 		c4 = A[21] ^ kt;
252 		A[ 2] = c0;
253 		A[ 8] = c1;
254 		A[14] = c2;
255 		A[15] = c3;
256 		A[21] = c4;
257 		A[ 0] = A[ 0] ^ RC[j + 0];
258 
259 		tt0 = A[ 6] ^ A[ 9];
260 		tt1 = A[ 7] ^ A[ 5];
261 		tt0 ^= A[ 8] ^ tt1;
262 		tt0 = (tt0 << 1) | (tt0 >> 63);
263 		tt2 = A[24] ^ A[22];
264 		tt3 = A[20] ^ A[23];
265 		tt0 ^= A[21];
266 		tt2 ^= tt3;
267 		t0 = tt0 ^ tt2;
268 
269 		tt0 = A[12] ^ A[10];
270 		tt1 = A[13] ^ A[11];
271 		tt0 ^= A[14] ^ tt1;
272 		tt0 = (tt0 << 1) | (tt0 >> 63);
273 		tt2 = A[ 0] ^ A[ 3];
274 		tt3 = A[ 1] ^ A[ 4];
275 		tt0 ^= A[ 2];
276 		tt2 ^= tt3;
277 		t1 = tt0 ^ tt2;
278 
279 		tt0 = A[18] ^ A[16];
280 		tt1 = A[19] ^ A[17];
281 		tt0 ^= A[15] ^ tt1;
282 		tt0 = (tt0 << 1) | (tt0 >> 63);
283 		tt2 = A[ 6] ^ A[ 9];
284 		tt3 = A[ 7] ^ A[ 5];
285 		tt0 ^= A[ 8];
286 		tt2 ^= tt3;
287 		t2 = tt0 ^ tt2;
288 
289 		tt0 = A[24] ^ A[22];
290 		tt1 = A[20] ^ A[23];
291 		tt0 ^= A[21] ^ tt1;
292 		tt0 = (tt0 << 1) | (tt0 >> 63);
293 		tt2 = A[12] ^ A[10];
294 		tt3 = A[13] ^ A[11];
295 		tt0 ^= A[14];
296 		tt2 ^= tt3;
297 		t3 = tt0 ^ tt2;
298 
299 		tt0 = A[ 0] ^ A[ 3];
300 		tt1 = A[ 1] ^ A[ 4];
301 		tt0 ^= A[ 2] ^ tt1;
302 		tt0 = (tt0 << 1) | (tt0 >> 63);
303 		tt2 = A[18] ^ A[16];
304 		tt3 = A[19] ^ A[17];
305 		tt0 ^= A[15];
306 		tt2 ^= tt3;
307 		t4 = tt0 ^ tt2;
308 
309 		A[ 0] = A[ 0] ^ t0;
310 		A[ 3] = A[ 3] ^ t0;
311 		A[ 1] = A[ 1] ^ t0;
312 		A[ 4] = A[ 4] ^ t0;
313 		A[ 2] = A[ 2] ^ t0;
314 		A[ 6] = A[ 6] ^ t1;
315 		A[ 9] = A[ 9] ^ t1;
316 		A[ 7] = A[ 7] ^ t1;
317 		A[ 5] = A[ 5] ^ t1;
318 		A[ 8] = A[ 8] ^ t1;
319 		A[12] = A[12] ^ t2;
320 		A[10] = A[10] ^ t2;
321 		A[13] = A[13] ^ t2;
322 		A[11] = A[11] ^ t2;
323 		A[14] = A[14] ^ t2;
324 		A[18] = A[18] ^ t3;
325 		A[16] = A[16] ^ t3;
326 		A[19] = A[19] ^ t3;
327 		A[17] = A[17] ^ t3;
328 		A[15] = A[15] ^ t3;
329 		A[24] = A[24] ^ t4;
330 		A[22] = A[22] ^ t4;
331 		A[20] = A[20] ^ t4;
332 		A[23] = A[23] ^ t4;
333 		A[21] = A[21] ^ t4;
334 		A[ 3] = (A[ 3] << 36) | (A[ 3] >> (64 - 36));
335 		A[ 1] = (A[ 1] <<  3) | (A[ 1] >> (64 -  3));
336 		A[ 4] = (A[ 4] << 41) | (A[ 4] >> (64 - 41));
337 		A[ 2] = (A[ 2] << 18) | (A[ 2] >> (64 - 18));
338 		A[ 6] = (A[ 6] <<  1) | (A[ 6] >> (64 -  1));
339 		A[ 9] = (A[ 9] << 44) | (A[ 9] >> (64 - 44));
340 		A[ 7] = (A[ 7] << 10) | (A[ 7] >> (64 - 10));
341 		A[ 5] = (A[ 5] << 45) | (A[ 5] >> (64 - 45));
342 		A[ 8] = (A[ 8] <<  2) | (A[ 8] >> (64 - 2));
343 		A[12] = (A[12] << 62) | (A[12] >> (64 - 62));
344 		A[10] = (A[10] <<  6) | (A[10] >> (64 -  6));
345 		A[13] = (A[13] << 43) | (A[13] >> (64 - 43));
346 		A[11] = (A[11] << 15) | (A[11] >> (64 - 15));
347 		A[14] = (A[14] << 61) | (A[14] >> (64 - 61));
348 		A[18] = (A[18] << 28) | (A[18] >> (64 - 28));
349 		A[16] = (A[16] << 55) | (A[16] >> (64 - 55));
350 		A[19] = (A[19] << 25) | (A[19] >> (64 - 25));
351 		A[17] = (A[17] << 21) | (A[17] >> (64 - 21));
352 		A[15] = (A[15] << 56) | (A[15] >> (64 - 56));
353 		A[24] = (A[24] << 27) | (A[24] >> (64 - 27));
354 		A[22] = (A[22] << 20) | (A[22] >> (64 - 20));
355 		A[20] = (A[20] << 39) | (A[20] >> (64 - 39));
356 		A[23] = (A[23] <<  8) | (A[23] >> (64 -  8));
357 		A[21] = (A[21] << 14) | (A[21] >> (64 - 14));
358 		bnn = ~A[13];
359 		kt = A[ 9] | A[13];
360 		c0 = A[ 0] ^ kt;
361 		kt = bnn | A[17];
362 		c1 = A[ 9] ^ kt;
363 		kt = A[17] & A[21];
364 		c2 = A[13] ^ kt;
365 		kt = A[21] | A[ 0];
366 		c3 = A[17] ^ kt;
367 		kt = A[ 0] & A[ 9];
368 		c4 = A[21] ^ kt;
369 		A[ 0] = c0;
370 		A[ 9] = c1;
371 		A[13] = c2;
372 		A[17] = c3;
373 		A[21] = c4;
374 		bnn = ~A[14];
375 		kt = A[22] | A[ 1];
376 		c0 = A[18] ^ kt;
377 		kt = A[ 1] & A[ 5];
378 		c1 = A[22] ^ kt;
379 		kt = A[ 5] | bnn;
380 		c2 = A[ 1] ^ kt;
381 		kt = A[14] | A[18];
382 		c3 = A[ 5] ^ kt;
383 		kt = A[18] & A[22];
384 		c4 = A[14] ^ kt;
385 		A[18] = c0;
386 		A[22] = c1;
387 		A[ 1] = c2;
388 		A[ 5] = c3;
389 		A[14] = c4;
390 		bnn = ~A[23];
391 		kt = A[10] | A[19];
392 		c0 = A[ 6] ^ kt;
393 		kt = A[19] & A[23];
394 		c1 = A[10] ^ kt;
395 		kt = bnn & A[ 2];
396 		c2 = A[19] ^ kt;
397 		kt = A[ 2] | A[ 6];
398 		c3 = bnn ^ kt;
399 		kt = A[ 6] & A[10];
400 		c4 = A[ 2] ^ kt;
401 		A[ 6] = c0;
402 		A[10] = c1;
403 		A[19] = c2;
404 		A[23] = c3;
405 		A[ 2] = c4;
406 		bnn = ~A[11];
407 		kt = A[ 3] & A[ 7];
408 		c0 = A[24] ^ kt;
409 		kt = A[ 7] | A[11];
410 		c1 = A[ 3] ^ kt;
411 		kt = bnn | A[15];
412 		c2 = A[ 7] ^ kt;
413 		kt = A[15] & A[24];
414 		c3 = bnn ^ kt;
415 		kt = A[24] | A[ 3];
416 		c4 = A[15] ^ kt;
417 		A[24] = c0;
418 		A[ 3] = c1;
419 		A[ 7] = c2;
420 		A[11] = c3;
421 		A[15] = c4;
422 		bnn = ~A[16];
423 		kt = bnn & A[20];
424 		c0 = A[12] ^ kt;
425 		kt = A[20] | A[ 4];
426 		c1 = bnn ^ kt;
427 		kt = A[ 4] & A[ 8];
428 		c2 = A[20] ^ kt;
429 		kt = A[ 8] | A[12];
430 		c3 = A[ 4] ^ kt;
431 		kt = A[12] & A[16];
432 		c4 = A[ 8] ^ kt;
433 		A[12] = c0;
434 		A[16] = c1;
435 		A[20] = c2;
436 		A[ 4] = c3;
437 		A[ 8] = c4;
438 		A[ 0] = A[ 0] ^ RC[j + 1];
439 		t = A[ 5];
440 		A[ 5] = A[18];
441 		A[18] = A[11];
442 		A[11] = A[10];
443 		A[10] = A[ 6];
444 		A[ 6] = A[22];
445 		A[22] = A[20];
446 		A[20] = A[12];
447 		A[12] = A[19];
448 		A[19] = A[15];
449 		A[15] = A[24];
450 		A[24] = A[ 8];
451 		A[ 8] = t;
452 		t = A[ 1];
453 		A[ 1] = A[ 9];
454 		A[ 9] = A[14];
455 		A[14] = A[ 2];
456 		A[ 2] = A[13];
457 		A[13] = A[23];
458 		A[23] = A[ 4];
459 		A[ 4] = A[21];
460 		A[21] = A[16];
461 		A[16] = A[ 3];
462 		A[ 3] = A[17];
463 		A[17] = A[ 7];
464 		A[ 7] = t;
465 	}
466 }
467 
468 /* see bearssl_kdf.h */
469 void
br_shake_init(br_shake_context * sc,int security_level)470 br_shake_init(br_shake_context *sc, int security_level)
471 {
472 	sc->rate = 200 - (size_t)(security_level >> 2);
473 	sc->dptr = 0;
474 	memset(sc->A, 0, sizeof sc->A);
475 	sc->A[ 1] = ~(uint64_t)0;
476 	sc->A[ 2] = ~(uint64_t)0;
477 	sc->A[ 8] = ~(uint64_t)0;
478 	sc->A[12] = ~(uint64_t)0;
479 	sc->A[17] = ~(uint64_t)0;
480 	sc->A[20] = ~(uint64_t)0;
481 }
482 
483 /* see bearssl_kdf.h */
484 void
br_shake_inject(br_shake_context * sc,const void * data,size_t len)485 br_shake_inject(br_shake_context *sc, const void *data, size_t len)
486 {
487 	const unsigned char *buf;
488 	size_t rate, dptr;
489 
490 	buf = data;
491 	rate = sc->rate;
492 	dptr = sc->dptr;
493 	while (len > 0) {
494 		size_t clen;
495 
496 		clen = rate - dptr;
497 		if (clen > len) {
498 			clen = len;
499 		}
500 		memcpy(sc->dbuf + dptr, buf, clen);
501 		dptr += clen;
502 		buf += clen;
503 		len -= clen;
504 		if (dptr == rate) {
505 			xor_block(sc->A, sc->dbuf, rate);
506 			process_block(sc->A);
507 			dptr = 0;
508 		}
509 	}
510 	sc->dptr = dptr;
511 }
512 
513 /* see bearssl_kdf.h */
514 void
br_shake_flip(br_shake_context * sc)515 br_shake_flip(br_shake_context *sc)
516 {
517 	/*
518 	 * We apply padding and pre-XOR the value into the state. We
519 	 * set dptr to the end of the buffer, so that first call to
520 	 * shake_extract() will process the block.
521 	 */
522 	if ((sc->dptr + 1) == sc->rate) {
523 		sc->dbuf[sc->dptr ++] = 0x9F;
524 	} else {
525 		sc->dbuf[sc->dptr ++] = 0x1F;
526 		memset(sc->dbuf + sc->dptr, 0x00, sc->rate - sc->dptr - 1);
527 		sc->dbuf[sc->rate - 1] = 0x80;
528 		sc->dptr = sc->rate;
529 	}
530 	xor_block(sc->A, sc->dbuf, sc->rate);
531 }
532 
533 /* see bearssl_kdf.h */
534 void
br_shake_produce(br_shake_context * sc,void * out,size_t len)535 br_shake_produce(br_shake_context *sc, void *out, size_t len)
536 {
537 	unsigned char *buf;
538 	size_t dptr, rate;
539 
540 	buf = out;
541 	dptr = sc->dptr;
542 	rate = sc->rate;
543 	while (len > 0) {
544 		size_t clen;
545 
546 		if (dptr == rate) {
547 			unsigned char *dbuf;
548 			uint64_t *A;
549 
550 			A = sc->A;
551 			dbuf = sc->dbuf;
552 			process_block(A);
553 			br_enc64le(dbuf +   0,  A[ 0]);
554 			br_enc64le(dbuf +   8, ~A[ 1]);
555 			br_enc64le(dbuf +  16, ~A[ 2]);
556 			br_enc64le(dbuf +  24,  A[ 3]);
557 			br_enc64le(dbuf +  32,  A[ 4]);
558 			br_enc64le(dbuf +  40,  A[ 5]);
559 			br_enc64le(dbuf +  48,  A[ 6]);
560 			br_enc64le(dbuf +  56,  A[ 7]);
561 			br_enc64le(dbuf +  64, ~A[ 8]);
562 			br_enc64le(dbuf +  72,  A[ 9]);
563 			br_enc64le(dbuf +  80,  A[10]);
564 			br_enc64le(dbuf +  88,  A[11]);
565 			br_enc64le(dbuf +  96, ~A[12]);
566 			br_enc64le(dbuf + 104,  A[13]);
567 			br_enc64le(dbuf + 112,  A[14]);
568 			br_enc64le(dbuf + 120,  A[15]);
569 			br_enc64le(dbuf + 128,  A[16]);
570 			br_enc64le(dbuf + 136, ~A[17]);
571 			br_enc64le(dbuf + 144,  A[18]);
572 			br_enc64le(dbuf + 152,  A[19]);
573 			br_enc64le(dbuf + 160, ~A[20]);
574 			br_enc64le(dbuf + 168,  A[21]);
575 			br_enc64le(dbuf + 176,  A[22]);
576 			br_enc64le(dbuf + 184,  A[23]);
577 			br_enc64le(dbuf + 192,  A[24]);
578 			dptr = 0;
579 		}
580 		clen = rate - dptr;
581 		if (clen > len) {
582 			clen = len;
583 		}
584 		memcpy(buf, sc->dbuf + dptr, clen);
585 		dptr += clen;
586 		buf += clen;
587 		len -= clen;
588 	}
589 	sc->dptr = dptr;
590 }
591