1 /* 2 chacha-merged.c version 20080118 3 D. J. Bernstein 4 Public domain. 5 */ 6 7 /* $OpenBSD: chacha.c,v 1.1 2013/11/21 00:45:44 djm Exp $ */ 8 9 #include <sys/param.h> 10 #include <sys/types.h> 11 12 #include <crypto/chacha20/chacha.h> 13 14 typedef uint8_t u8; 15 typedef uint32_t u32; 16 17 typedef struct chacha_ctx chacha_ctx; 18 19 #define U8C(v) (v##U) 20 #define U32C(v) (v##U) 21 22 #define U8V(v) ((u8)(v) & U8C(0xFF)) 23 #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF)) 24 25 #define ROTL32(v, n) \ 26 (U32V((v) << (n)) | ((v) >> (32 - (n)))) 27 28 #define U8TO32_LITTLE(p) \ 29 (((u32)((p)[0]) ) | \ 30 ((u32)((p)[1]) << 8) | \ 31 ((u32)((p)[2]) << 16) | \ 32 ((u32)((p)[3]) << 24)) 33 34 #define U32TO8_LITTLE(p, v) \ 35 do { \ 36 (p)[0] = U8V((v) ); \ 37 (p)[1] = U8V((v) >> 8); \ 38 (p)[2] = U8V((v) >> 16); \ 39 (p)[3] = U8V((v) >> 24); \ 40 } while (0) 41 42 #define ROTATE(v,c) (ROTL32(v,c)) 43 #define XOR(v,w) ((v) ^ (w)) 44 #define PLUS(v,w) (U32V((v) + (w))) 45 #define PLUSONE(v) (PLUS((v),1)) 46 47 #define QUARTERROUND(a,b,c,d) \ 48 a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \ 49 c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \ 50 a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \ 51 c = PLUS(c,d); b = ROTATE(XOR(b,c), 7); 52 53 static const char sigma[16] = "expand 32-byte k"; 54 static const char tau[16] = "expand 16-byte k"; 55 56 LOCAL void 57 chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits) 58 { 59 const char *constants; 60 61 x->input[4] = U8TO32_LITTLE(k + 0); 62 x->input[5] = U8TO32_LITTLE(k + 4); 63 x->input[6] = U8TO32_LITTLE(k + 8); 64 x->input[7] = U8TO32_LITTLE(k + 12); 65 if (kbits == 256) { /* recommended */ 66 k += 16; 67 constants = sigma; 68 } else { /* kbits == 128 */ 69 constants = tau; 70 } 71 x->input[8] = U8TO32_LITTLE(k + 0); 72 x->input[9] = U8TO32_LITTLE(k + 4); 73 x->input[10] = U8TO32_LITTLE(k + 8); 74 x->input[11] = U8TO32_LITTLE(k + 12); 75 x->input[0] = U8TO32_LITTLE(constants + 0); 76 x->input[1] = U8TO32_LITTLE(constants + 4); 77 x->input[2] = U8TO32_LITTLE(constants + 8); 78 x->input[3] = U8TO32_LITTLE(constants + 12); 79 } 80 81 LOCAL void 82 chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter) 83 { 84 #ifndef CHACHA_NONCE0_CTR128 85 x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0); 86 x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4); 87 x->input[14] = U8TO32_LITTLE(iv + 0); 88 x->input[15] = U8TO32_LITTLE(iv + 4); 89 #else 90 // CHACHA_STATELEN 91 (void)iv; 92 x->input[12] = U8TO32_LITTLE(counter + 0); 93 x->input[13] = U8TO32_LITTLE(counter + 4); 94 x->input[14] = U8TO32_LITTLE(counter + 8); 95 x->input[15] = U8TO32_LITTLE(counter + 12); 96 #endif 97 } 98 99 #ifdef CHACHA_NONCE0_CTR128 100 LOCAL void 101 chacha_ctrsave(const chacha_ctx *x, u8 *counter) 102 { 103 U32TO8_LITTLE(counter + 0, x->input[12]); 104 U32TO8_LITTLE(counter + 4, x->input[13]); 105 U32TO8_LITTLE(counter + 8, x->input[14]); 106 U32TO8_LITTLE(counter + 12, x->input[15]); 107 } 108 #endif 109 110 LOCAL void 111 chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes) 112 { 113 u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; 114 u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; 115 u8 *ctarget = NULL; 116 u8 tmp[64]; 117 u_int i; 118 119 if (!bytes) return; 120 121 j0 = x->input[0]; 122 j1 = x->input[1]; 123 j2 = x->input[2]; 124 j3 = x->input[3]; 125 j4 = x->input[4]; 126 j5 = x->input[5]; 127 j6 = x->input[6]; 128 j7 = x->input[7]; 129 j8 = x->input[8]; 130 j9 = x->input[9]; 131 j10 = x->input[10]; 132 j11 = x->input[11]; 133 j12 = x->input[12]; 134 j13 = x->input[13]; 135 j14 = x->input[14]; 136 j15 = x->input[15]; 137 138 for (;;) { 139 if (bytes < 64) { 140 #ifndef KEYSTREAM_ONLY 141 for (i = 0;i < bytes;++i) tmp[i] = m[i]; 142 m = tmp; 143 #endif 144 ctarget = c; 145 c = tmp; 146 } 147 x0 = j0; 148 x1 = j1; 149 x2 = j2; 150 x3 = j3; 151 x4 = j4; 152 x5 = j5; 153 x6 = j6; 154 x7 = j7; 155 x8 = j8; 156 x9 = j9; 157 x10 = j10; 158 x11 = j11; 159 x12 = j12; 160 x13 = j13; 161 x14 = j14; 162 x15 = j15; 163 for (i = 20;i > 0;i -= 2) { 164 QUARTERROUND( x0, x4, x8,x12) 165 QUARTERROUND( x1, x5, x9,x13) 166 QUARTERROUND( x2, x6,x10,x14) 167 QUARTERROUND( x3, x7,x11,x15) 168 QUARTERROUND( x0, x5,x10,x15) 169 QUARTERROUND( x1, x6,x11,x12) 170 QUARTERROUND( x2, x7, x8,x13) 171 QUARTERROUND( x3, x4, x9,x14) 172 } 173 x0 = PLUS(x0,j0); 174 x1 = PLUS(x1,j1); 175 x2 = PLUS(x2,j2); 176 x3 = PLUS(x3,j3); 177 x4 = PLUS(x4,j4); 178 x5 = PLUS(x5,j5); 179 x6 = PLUS(x6,j6); 180 x7 = PLUS(x7,j7); 181 x8 = PLUS(x8,j8); 182 x9 = PLUS(x9,j9); 183 x10 = PLUS(x10,j10); 184 x11 = PLUS(x11,j11); 185 x12 = PLUS(x12,j12); 186 x13 = PLUS(x13,j13); 187 x14 = PLUS(x14,j14); 188 x15 = PLUS(x15,j15); 189 190 #ifndef KEYSTREAM_ONLY 191 x0 = XOR(x0,U8TO32_LITTLE(m + 0)); 192 x1 = XOR(x1,U8TO32_LITTLE(m + 4)); 193 x2 = XOR(x2,U8TO32_LITTLE(m + 8)); 194 x3 = XOR(x3,U8TO32_LITTLE(m + 12)); 195 x4 = XOR(x4,U8TO32_LITTLE(m + 16)); 196 x5 = XOR(x5,U8TO32_LITTLE(m + 20)); 197 x6 = XOR(x6,U8TO32_LITTLE(m + 24)); 198 x7 = XOR(x7,U8TO32_LITTLE(m + 28)); 199 x8 = XOR(x8,U8TO32_LITTLE(m + 32)); 200 x9 = XOR(x9,U8TO32_LITTLE(m + 36)); 201 x10 = XOR(x10,U8TO32_LITTLE(m + 40)); 202 x11 = XOR(x11,U8TO32_LITTLE(m + 44)); 203 x12 = XOR(x12,U8TO32_LITTLE(m + 48)); 204 x13 = XOR(x13,U8TO32_LITTLE(m + 52)); 205 x14 = XOR(x14,U8TO32_LITTLE(m + 56)); 206 x15 = XOR(x15,U8TO32_LITTLE(m + 60)); 207 #endif 208 209 j12 = PLUSONE(j12); 210 if (!j12) { 211 j13 = PLUSONE(j13); 212 #ifndef CHACHA_NONCE0_CTR128 213 /* stopping at 2^70 bytes per nonce is user's responsibility */ 214 #else 215 if (!j13) { 216 j14 = PLUSONE(j14); 217 if (!j14) { 218 j15 = PLUSONE(j15); 219 } 220 } 221 #endif 222 } 223 224 U32TO8_LITTLE(c + 0,x0); 225 U32TO8_LITTLE(c + 4,x1); 226 U32TO8_LITTLE(c + 8,x2); 227 U32TO8_LITTLE(c + 12,x3); 228 U32TO8_LITTLE(c + 16,x4); 229 U32TO8_LITTLE(c + 20,x5); 230 U32TO8_LITTLE(c + 24,x6); 231 U32TO8_LITTLE(c + 28,x7); 232 U32TO8_LITTLE(c + 32,x8); 233 U32TO8_LITTLE(c + 36,x9); 234 U32TO8_LITTLE(c + 40,x10); 235 U32TO8_LITTLE(c + 44,x11); 236 U32TO8_LITTLE(c + 48,x12); 237 U32TO8_LITTLE(c + 52,x13); 238 U32TO8_LITTLE(c + 56,x14); 239 U32TO8_LITTLE(c + 60,x15); 240 241 if (bytes <= 64) { 242 if (bytes < 64) { 243 for (i = 0;i < bytes;++i) ctarget[i] = c[i]; 244 } 245 x->input[12] = j12; 246 x->input[13] = j13; 247 #ifdef CHACHA_NONCE0_CTR128 248 x->input[14] = j14; 249 x->input[15] = j15; 250 #endif 251 return; 252 } 253 bytes -= 64; 254 c += 64; 255 #ifndef KEYSTREAM_ONLY 256 m += 64; 257 #endif 258 } 259 } 260