xref: /freebsd/crypto/openssh/chacha.c (revision 1d386b48a555f61cb7325543adbbb5c3f3407a66)
1 /* $OpenBSD: chacha.c,v 1.2 2023/07/17 05:26:38 djm Exp $ */
2 /*
3 chacha-merged.c version 20080118
4 D. J. Bernstein
5 Public domain.
6 */
7 
8 #include "includes.h"
9 
10 #include "chacha.h"
11 
12 typedef unsigned char u8;
13 typedef unsigned int u32;
14 
15 typedef struct chacha_ctx chacha_ctx;
16 
17 #define U8C(v) (v##U)
18 #define U32C(v) (v##U)
19 
20 #define U8V(v) ((u8)(v) & U8C(0xFF))
21 #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
22 
23 #define ROTL32(v, n) \
24   (U32V((v) << (n)) | ((v) >> (32 - (n))))
25 
26 #define U8TO32_LITTLE(p) \
27   (((u32)((p)[0])      ) | \
28    ((u32)((p)[1]) <<  8) | \
29    ((u32)((p)[2]) << 16) | \
30    ((u32)((p)[3]) << 24))
31 
32 #define U32TO8_LITTLE(p, v) \
33   do { \
34     (p)[0] = U8V((v)      ); \
35     (p)[1] = U8V((v) >>  8); \
36     (p)[2] = U8V((v) >> 16); \
37     (p)[3] = U8V((v) >> 24); \
38   } while (0)
39 
40 #define ROTATE(v,c) (ROTL32(v,c))
41 #define XOR(v,w) ((v) ^ (w))
42 #define PLUS(v,w) (U32V((v) + (w)))
43 #define PLUSONE(v) (PLUS((v),1))
44 
45 #define QUARTERROUND(a,b,c,d) \
46   a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
47   c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
48   a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
49   c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
50 
51 static const char sigma[16] = "expand 32-byte k";
52 static const char tau[16] = "expand 16-byte k";
53 
54 void
55 chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits)
56 {
57   const char *constants;
58 
59   x->input[4] = U8TO32_LITTLE(k + 0);
60   x->input[5] = U8TO32_LITTLE(k + 4);
61   x->input[6] = U8TO32_LITTLE(k + 8);
62   x->input[7] = U8TO32_LITTLE(k + 12);
63   if (kbits == 256) { /* recommended */
64     k += 16;
65     constants = sigma;
66   } else { /* kbits == 128 */
67     constants = tau;
68   }
69   x->input[8] = U8TO32_LITTLE(k + 0);
70   x->input[9] = U8TO32_LITTLE(k + 4);
71   x->input[10] = U8TO32_LITTLE(k + 8);
72   x->input[11] = U8TO32_LITTLE(k + 12);
73   x->input[0] = U8TO32_LITTLE(constants + 0);
74   x->input[1] = U8TO32_LITTLE(constants + 4);
75   x->input[2] = U8TO32_LITTLE(constants + 8);
76   x->input[3] = U8TO32_LITTLE(constants + 12);
77 }
78 
79 void
80 chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter)
81 {
82   x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
83   x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
84   x->input[14] = U8TO32_LITTLE(iv + 0);
85   x->input[15] = U8TO32_LITTLE(iv + 4);
86 }
87 
88 void
89 chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
90 {
91   u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
92   u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
93   u8 *ctarget = NULL;
94   u8 tmp[64];
95   u_int i;
96 
97   if (!bytes) return;
98 
99   j0 = x->input[0];
100   j1 = x->input[1];
101   j2 = x->input[2];
102   j3 = x->input[3];
103   j4 = x->input[4];
104   j5 = x->input[5];
105   j6 = x->input[6];
106   j7 = x->input[7];
107   j8 = x->input[8];
108   j9 = x->input[9];
109   j10 = x->input[10];
110   j11 = x->input[11];
111   j12 = x->input[12];
112   j13 = x->input[13];
113   j14 = x->input[14];
114   j15 = x->input[15];
115 
116   for (;;) {
117     if (bytes < 64) {
118       for (i = 0;i < bytes;++i) tmp[i] = m[i];
119       m = tmp;
120       ctarget = c;
121       c = tmp;
122     }
123     x0 = j0;
124     x1 = j1;
125     x2 = j2;
126     x3 = j3;
127     x4 = j4;
128     x5 = j5;
129     x6 = j6;
130     x7 = j7;
131     x8 = j8;
132     x9 = j9;
133     x10 = j10;
134     x11 = j11;
135     x12 = j12;
136     x13 = j13;
137     x14 = j14;
138     x15 = j15;
139     for (i = 20;i > 0;i -= 2) {
140       QUARTERROUND( x0, x4, x8,x12)
141       QUARTERROUND( x1, x5, x9,x13)
142       QUARTERROUND( x2, x6,x10,x14)
143       QUARTERROUND( x3, x7,x11,x15)
144       QUARTERROUND( x0, x5,x10,x15)
145       QUARTERROUND( x1, x6,x11,x12)
146       QUARTERROUND( x2, x7, x8,x13)
147       QUARTERROUND( x3, x4, x9,x14)
148     }
149     x0 = PLUS(x0,j0);
150     x1 = PLUS(x1,j1);
151     x2 = PLUS(x2,j2);
152     x3 = PLUS(x3,j3);
153     x4 = PLUS(x4,j4);
154     x5 = PLUS(x5,j5);
155     x6 = PLUS(x6,j6);
156     x7 = PLUS(x7,j7);
157     x8 = PLUS(x8,j8);
158     x9 = PLUS(x9,j9);
159     x10 = PLUS(x10,j10);
160     x11 = PLUS(x11,j11);
161     x12 = PLUS(x12,j12);
162     x13 = PLUS(x13,j13);
163     x14 = PLUS(x14,j14);
164     x15 = PLUS(x15,j15);
165 
166     x0 = XOR(x0,U8TO32_LITTLE(m + 0));
167     x1 = XOR(x1,U8TO32_LITTLE(m + 4));
168     x2 = XOR(x2,U8TO32_LITTLE(m + 8));
169     x3 = XOR(x3,U8TO32_LITTLE(m + 12));
170     x4 = XOR(x4,U8TO32_LITTLE(m + 16));
171     x5 = XOR(x5,U8TO32_LITTLE(m + 20));
172     x6 = XOR(x6,U8TO32_LITTLE(m + 24));
173     x7 = XOR(x7,U8TO32_LITTLE(m + 28));
174     x8 = XOR(x8,U8TO32_LITTLE(m + 32));
175     x9 = XOR(x9,U8TO32_LITTLE(m + 36));
176     x10 = XOR(x10,U8TO32_LITTLE(m + 40));
177     x11 = XOR(x11,U8TO32_LITTLE(m + 44));
178     x12 = XOR(x12,U8TO32_LITTLE(m + 48));
179     x13 = XOR(x13,U8TO32_LITTLE(m + 52));
180     x14 = XOR(x14,U8TO32_LITTLE(m + 56));
181     x15 = XOR(x15,U8TO32_LITTLE(m + 60));
182 
183     j12 = PLUSONE(j12);
184     if (!j12) {
185       j13 = PLUSONE(j13);
186       /* stopping at 2^70 bytes per nonce is user's responsibility */
187     }
188 
189     U32TO8_LITTLE(c + 0,x0);
190     U32TO8_LITTLE(c + 4,x1);
191     U32TO8_LITTLE(c + 8,x2);
192     U32TO8_LITTLE(c + 12,x3);
193     U32TO8_LITTLE(c + 16,x4);
194     U32TO8_LITTLE(c + 20,x5);
195     U32TO8_LITTLE(c + 24,x6);
196     U32TO8_LITTLE(c + 28,x7);
197     U32TO8_LITTLE(c + 32,x8);
198     U32TO8_LITTLE(c + 36,x9);
199     U32TO8_LITTLE(c + 40,x10);
200     U32TO8_LITTLE(c + 44,x11);
201     U32TO8_LITTLE(c + 48,x12);
202     U32TO8_LITTLE(c + 52,x13);
203     U32TO8_LITTLE(c + 56,x14);
204     U32TO8_LITTLE(c + 60,x15);
205 
206     if (bytes <= 64) {
207       if (bytes < 64) {
208         for (i = 0;i < bytes;++i) ctarget[i] = c[i];
209       }
210       x->input[12] = j12;
211       x->input[13] = j13;
212       return;
213     }
214     bytes -= 64;
215     c += 64;
216     m += 64;
217   }
218 }
219