1150890b0SMark Murray /*
2150890b0SMark Murray chacha-merged.c version 20080118
3150890b0SMark Murray D. J. Bernstein
4150890b0SMark Murray Public domain.
5150890b0SMark Murray */
6150890b0SMark Murray
7150890b0SMark Murray /* $OpenBSD: chacha.c,v 1.1 2013/11/21 00:45:44 djm Exp $ */
8150890b0SMark Murray
9150890b0SMark Murray #include <sys/param.h>
10150890b0SMark Murray #include <sys/types.h>
11150890b0SMark Murray
12150890b0SMark Murray #include <crypto/chacha20/chacha.h>
13150890b0SMark Murray
14150890b0SMark Murray typedef uint8_t u8;
15150890b0SMark Murray typedef uint32_t u32;
16150890b0SMark Murray
17150890b0SMark Murray typedef struct chacha_ctx chacha_ctx;
18150890b0SMark Murray
19150890b0SMark Murray #define U8C(v) (v##U)
20150890b0SMark Murray #define U32C(v) (v##U)
21150890b0SMark Murray
22150890b0SMark Murray #define U8V(v) ((u8)(v) & U8C(0xFF))
23150890b0SMark Murray #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
24150890b0SMark Murray
25150890b0SMark Murray #define ROTL32(v, n) \
26150890b0SMark Murray (U32V((v) << (n)) | ((v) >> (32 - (n))))
27150890b0SMark Murray
28150890b0SMark Murray #define U8TO32_LITTLE(p) \
29150890b0SMark Murray (((u32)((p)[0]) ) | \
30150890b0SMark Murray ((u32)((p)[1]) << 8) | \
31150890b0SMark Murray ((u32)((p)[2]) << 16) | \
32150890b0SMark Murray ((u32)((p)[3]) << 24))
33150890b0SMark Murray
34150890b0SMark Murray #define U32TO8_LITTLE(p, v) \
35150890b0SMark Murray do { \
36150890b0SMark Murray (p)[0] = U8V((v) ); \
37150890b0SMark Murray (p)[1] = U8V((v) >> 8); \
38150890b0SMark Murray (p)[2] = U8V((v) >> 16); \
39150890b0SMark Murray (p)[3] = U8V((v) >> 24); \
40150890b0SMark Murray } while (0)
41150890b0SMark Murray
42150890b0SMark Murray #define ROTATE(v,c) (ROTL32(v,c))
43150890b0SMark Murray #define XOR(v,w) ((v) ^ (w))
44150890b0SMark Murray #define PLUS(v,w) (U32V((v) + (w)))
45150890b0SMark Murray #define PLUSONE(v) (PLUS((v),1))
46150890b0SMark Murray
47150890b0SMark Murray #define QUARTERROUND(a,b,c,d) \
48150890b0SMark Murray a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
49150890b0SMark Murray c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
50150890b0SMark Murray a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
51150890b0SMark Murray c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
52150890b0SMark Murray
53150890b0SMark Murray static const char sigma[16] = "expand 32-byte k";
54150890b0SMark Murray static const char tau[16] = "expand 16-byte k";
55150890b0SMark Murray
56c1e80940SXin LI LOCAL void
chacha_keysetup(chacha_ctx * x,const u8 * k,u32 kbits)57150890b0SMark Murray chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits)
58150890b0SMark Murray {
59150890b0SMark Murray const char *constants;
60150890b0SMark Murray
61150890b0SMark Murray x->input[4] = U8TO32_LITTLE(k + 0);
62150890b0SMark Murray x->input[5] = U8TO32_LITTLE(k + 4);
63150890b0SMark Murray x->input[6] = U8TO32_LITTLE(k + 8);
64150890b0SMark Murray x->input[7] = U8TO32_LITTLE(k + 12);
65150890b0SMark Murray if (kbits == 256) { /* recommended */
66150890b0SMark Murray k += 16;
67150890b0SMark Murray constants = sigma;
68150890b0SMark Murray } else { /* kbits == 128 */
69150890b0SMark Murray constants = tau;
70150890b0SMark Murray }
71150890b0SMark Murray x->input[8] = U8TO32_LITTLE(k + 0);
72150890b0SMark Murray x->input[9] = U8TO32_LITTLE(k + 4);
73150890b0SMark Murray x->input[10] = U8TO32_LITTLE(k + 8);
74150890b0SMark Murray x->input[11] = U8TO32_LITTLE(k + 12);
75150890b0SMark Murray x->input[0] = U8TO32_LITTLE(constants + 0);
76150890b0SMark Murray x->input[1] = U8TO32_LITTLE(constants + 4);
77150890b0SMark Murray x->input[2] = U8TO32_LITTLE(constants + 8);
78150890b0SMark Murray x->input[3] = U8TO32_LITTLE(constants + 12);
79150890b0SMark Murray }
80150890b0SMark Murray
81c1e80940SXin LI LOCAL void
chacha_ivsetup(chacha_ctx * x,const u8 * iv,const u8 * counter)82150890b0SMark Murray chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter)
83150890b0SMark Murray {
84*7d93ab5eSConrad Meyer #ifndef CHACHA_NONCE0_CTR128
85150890b0SMark Murray x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
86150890b0SMark Murray x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
87150890b0SMark Murray x->input[14] = U8TO32_LITTLE(iv + 0);
88150890b0SMark Murray x->input[15] = U8TO32_LITTLE(iv + 4);
89*7d93ab5eSConrad Meyer #else
90*7d93ab5eSConrad Meyer // CHACHA_STATELEN
91*7d93ab5eSConrad Meyer (void)iv;
92*7d93ab5eSConrad Meyer x->input[12] = U8TO32_LITTLE(counter + 0);
93*7d93ab5eSConrad Meyer x->input[13] = U8TO32_LITTLE(counter + 4);
94*7d93ab5eSConrad Meyer x->input[14] = U8TO32_LITTLE(counter + 8);
95*7d93ab5eSConrad Meyer x->input[15] = U8TO32_LITTLE(counter + 12);
96*7d93ab5eSConrad Meyer #endif
97150890b0SMark Murray }
98150890b0SMark Murray
99*7d93ab5eSConrad Meyer #ifdef CHACHA_NONCE0_CTR128
100*7d93ab5eSConrad Meyer LOCAL void
chacha_ctrsave(const chacha_ctx * x,u8 * counter)101*7d93ab5eSConrad Meyer chacha_ctrsave(const chacha_ctx *x, u8 *counter)
102*7d93ab5eSConrad Meyer {
103*7d93ab5eSConrad Meyer U32TO8_LITTLE(counter + 0, x->input[12]);
104*7d93ab5eSConrad Meyer U32TO8_LITTLE(counter + 4, x->input[13]);
105*7d93ab5eSConrad Meyer U32TO8_LITTLE(counter + 8, x->input[14]);
106*7d93ab5eSConrad Meyer U32TO8_LITTLE(counter + 12, x->input[15]);
107*7d93ab5eSConrad Meyer }
108*7d93ab5eSConrad Meyer #endif
109*7d93ab5eSConrad Meyer
110c1e80940SXin LI LOCAL void
chacha_encrypt_bytes(chacha_ctx * x,const u8 * m,u8 * c,u32 bytes)111150890b0SMark Murray chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
112150890b0SMark Murray {
113150890b0SMark Murray u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
114150890b0SMark Murray u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
115150890b0SMark Murray u8 *ctarget = NULL;
116150890b0SMark Murray u8 tmp[64];
117150890b0SMark Murray u_int i;
118150890b0SMark Murray
119150890b0SMark Murray if (!bytes) return;
120150890b0SMark Murray
121150890b0SMark Murray j0 = x->input[0];
122150890b0SMark Murray j1 = x->input[1];
123150890b0SMark Murray j2 = x->input[2];
124150890b0SMark Murray j3 = x->input[3];
125150890b0SMark Murray j4 = x->input[4];
126150890b0SMark Murray j5 = x->input[5];
127150890b0SMark Murray j6 = x->input[6];
128150890b0SMark Murray j7 = x->input[7];
129150890b0SMark Murray j8 = x->input[8];
130150890b0SMark Murray j9 = x->input[9];
131150890b0SMark Murray j10 = x->input[10];
132150890b0SMark Murray j11 = x->input[11];
133150890b0SMark Murray j12 = x->input[12];
134150890b0SMark Murray j13 = x->input[13];
135150890b0SMark Murray j14 = x->input[14];
136150890b0SMark Murray j15 = x->input[15];
137150890b0SMark Murray
138150890b0SMark Murray for (;;) {
139150890b0SMark Murray if (bytes < 64) {
140987733edSConrad Meyer #ifndef KEYSTREAM_ONLY
141150890b0SMark Murray for (i = 0;i < bytes;++i) tmp[i] = m[i];
142150890b0SMark Murray m = tmp;
143987733edSConrad Meyer #endif
144150890b0SMark Murray ctarget = c;
145150890b0SMark Murray c = tmp;
146150890b0SMark Murray }
147150890b0SMark Murray x0 = j0;
148150890b0SMark Murray x1 = j1;
149150890b0SMark Murray x2 = j2;
150150890b0SMark Murray x3 = j3;
151150890b0SMark Murray x4 = j4;
152150890b0SMark Murray x5 = j5;
153150890b0SMark Murray x6 = j6;
154150890b0SMark Murray x7 = j7;
155150890b0SMark Murray x8 = j8;
156150890b0SMark Murray x9 = j9;
157150890b0SMark Murray x10 = j10;
158150890b0SMark Murray x11 = j11;
159150890b0SMark Murray x12 = j12;
160150890b0SMark Murray x13 = j13;
161150890b0SMark Murray x14 = j14;
162150890b0SMark Murray x15 = j15;
163150890b0SMark Murray for (i = 20;i > 0;i -= 2) {
164150890b0SMark Murray QUARTERROUND( x0, x4, x8,x12)
165150890b0SMark Murray QUARTERROUND( x1, x5, x9,x13)
166150890b0SMark Murray QUARTERROUND( x2, x6,x10,x14)
167150890b0SMark Murray QUARTERROUND( x3, x7,x11,x15)
168150890b0SMark Murray QUARTERROUND( x0, x5,x10,x15)
169150890b0SMark Murray QUARTERROUND( x1, x6,x11,x12)
170150890b0SMark Murray QUARTERROUND( x2, x7, x8,x13)
171150890b0SMark Murray QUARTERROUND( x3, x4, x9,x14)
172150890b0SMark Murray }
173150890b0SMark Murray x0 = PLUS(x0,j0);
174150890b0SMark Murray x1 = PLUS(x1,j1);
175150890b0SMark Murray x2 = PLUS(x2,j2);
176150890b0SMark Murray x3 = PLUS(x3,j3);
177150890b0SMark Murray x4 = PLUS(x4,j4);
178150890b0SMark Murray x5 = PLUS(x5,j5);
179150890b0SMark Murray x6 = PLUS(x6,j6);
180150890b0SMark Murray x7 = PLUS(x7,j7);
181150890b0SMark Murray x8 = PLUS(x8,j8);
182150890b0SMark Murray x9 = PLUS(x9,j9);
183150890b0SMark Murray x10 = PLUS(x10,j10);
184150890b0SMark Murray x11 = PLUS(x11,j11);
185150890b0SMark Murray x12 = PLUS(x12,j12);
186150890b0SMark Murray x13 = PLUS(x13,j13);
187150890b0SMark Murray x14 = PLUS(x14,j14);
188150890b0SMark Murray x15 = PLUS(x15,j15);
189150890b0SMark Murray
190c1e80940SXin LI #ifndef KEYSTREAM_ONLY
191150890b0SMark Murray x0 = XOR(x0,U8TO32_LITTLE(m + 0));
192150890b0SMark Murray x1 = XOR(x1,U8TO32_LITTLE(m + 4));
193150890b0SMark Murray x2 = XOR(x2,U8TO32_LITTLE(m + 8));
194150890b0SMark Murray x3 = XOR(x3,U8TO32_LITTLE(m + 12));
195150890b0SMark Murray x4 = XOR(x4,U8TO32_LITTLE(m + 16));
196150890b0SMark Murray x5 = XOR(x5,U8TO32_LITTLE(m + 20));
197150890b0SMark Murray x6 = XOR(x6,U8TO32_LITTLE(m + 24));
198150890b0SMark Murray x7 = XOR(x7,U8TO32_LITTLE(m + 28));
199150890b0SMark Murray x8 = XOR(x8,U8TO32_LITTLE(m + 32));
200150890b0SMark Murray x9 = XOR(x9,U8TO32_LITTLE(m + 36));
201150890b0SMark Murray x10 = XOR(x10,U8TO32_LITTLE(m + 40));
202150890b0SMark Murray x11 = XOR(x11,U8TO32_LITTLE(m + 44));
203150890b0SMark Murray x12 = XOR(x12,U8TO32_LITTLE(m + 48));
204150890b0SMark Murray x13 = XOR(x13,U8TO32_LITTLE(m + 52));
205150890b0SMark Murray x14 = XOR(x14,U8TO32_LITTLE(m + 56));
206150890b0SMark Murray x15 = XOR(x15,U8TO32_LITTLE(m + 60));
207c1e80940SXin LI #endif
208150890b0SMark Murray
209150890b0SMark Murray j12 = PLUSONE(j12);
210150890b0SMark Murray if (!j12) {
211150890b0SMark Murray j13 = PLUSONE(j13);
212*7d93ab5eSConrad Meyer #ifndef CHACHA_NONCE0_CTR128
213150890b0SMark Murray /* stopping at 2^70 bytes per nonce is user's responsibility */
214*7d93ab5eSConrad Meyer #else
215*7d93ab5eSConrad Meyer if (!j13) {
216*7d93ab5eSConrad Meyer j14 = PLUSONE(j14);
217*7d93ab5eSConrad Meyer if (!j14) {
218*7d93ab5eSConrad Meyer j15 = PLUSONE(j15);
219*7d93ab5eSConrad Meyer }
220*7d93ab5eSConrad Meyer }
221*7d93ab5eSConrad Meyer #endif
222150890b0SMark Murray }
223150890b0SMark Murray
224150890b0SMark Murray U32TO8_LITTLE(c + 0,x0);
225150890b0SMark Murray U32TO8_LITTLE(c + 4,x1);
226150890b0SMark Murray U32TO8_LITTLE(c + 8,x2);
227150890b0SMark Murray U32TO8_LITTLE(c + 12,x3);
228150890b0SMark Murray U32TO8_LITTLE(c + 16,x4);
229150890b0SMark Murray U32TO8_LITTLE(c + 20,x5);
230150890b0SMark Murray U32TO8_LITTLE(c + 24,x6);
231150890b0SMark Murray U32TO8_LITTLE(c + 28,x7);
232150890b0SMark Murray U32TO8_LITTLE(c + 32,x8);
233150890b0SMark Murray U32TO8_LITTLE(c + 36,x9);
234150890b0SMark Murray U32TO8_LITTLE(c + 40,x10);
235150890b0SMark Murray U32TO8_LITTLE(c + 44,x11);
236150890b0SMark Murray U32TO8_LITTLE(c + 48,x12);
237150890b0SMark Murray U32TO8_LITTLE(c + 52,x13);
238150890b0SMark Murray U32TO8_LITTLE(c + 56,x14);
239150890b0SMark Murray U32TO8_LITTLE(c + 60,x15);
240150890b0SMark Murray
241150890b0SMark Murray if (bytes <= 64) {
242150890b0SMark Murray if (bytes < 64) {
243150890b0SMark Murray for (i = 0;i < bytes;++i) ctarget[i] = c[i];
244150890b0SMark Murray }
245150890b0SMark Murray x->input[12] = j12;
246150890b0SMark Murray x->input[13] = j13;
247*7d93ab5eSConrad Meyer #ifdef CHACHA_NONCE0_CTR128
248*7d93ab5eSConrad Meyer x->input[14] = j14;
249*7d93ab5eSConrad Meyer x->input[15] = j15;
250*7d93ab5eSConrad Meyer #endif
251150890b0SMark Murray return;
252150890b0SMark Murray }
253150890b0SMark Murray bytes -= 64;
254150890b0SMark Murray c += 64;
255c1e80940SXin LI #ifndef KEYSTREAM_ONLY
256150890b0SMark Murray m += 64;
257c1e80940SXin LI #endif
258150890b0SMark Murray }
259150890b0SMark Murray }
260