1 /* 2 chacha-merged.c version 20080118 3 D. J. Bernstein 4 Public domain. 5 */ 6 7 /* $OpenBSD: chacha_private.h,v 1.2 2013/10/04 07:02:27 djm Exp $ */ 8 9 #include <chacha.h> 10 #include <stddef.h> 11 12 typedef unsigned char u8; 13 typedef unsigned int u32; 14 typedef unsigned int u_int; 15 16 #define U8C(v) (v##U) 17 #define U32C(v) (v##U) 18 19 #define U8V(v) ((u8)(v) & U8C(0xFF)) 20 #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF)) 21 22 #define ROTL32(v, n) \ 23 (U32V((v) << (n)) | ((v) >> (32 - (n)))) 24 25 #define U8TO32_LITTLE(p) \ 26 (((u32)((p)[0]) ) | \ 27 ((u32)((p)[1]) << 8) | \ 28 ((u32)((p)[2]) << 16) | \ 29 ((u32)((p)[3]) << 24)) 30 31 #define U32TO8_LITTLE(p, v) \ 32 do { \ 33 (p)[0] = U8V((v) ); \ 34 (p)[1] = U8V((v) >> 8); \ 35 (p)[2] = U8V((v) >> 16); \ 36 (p)[3] = U8V((v) >> 24); \ 37 } while (0) 38 39 #define ROTATE(v,c) (ROTL32(v,c)) 40 #define XOR(v,w) ((v) ^ (w)) 41 #define PLUS(v,w) (U32V((v) + (w))) 42 #define PLUSONE(v) (PLUS((v),1)) 43 44 #define QUARTERROUND(a,b,c,d) \ 45 a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \ 46 c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \ 47 a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \ 48 c = PLUS(c,d); b = ROTATE(XOR(b,c), 7); 49 50 static const char sigma[16] = "expand 32-byte k"; 51 static const char tau[16] = "expand 16-byte k"; 52 53 void 54 chacha_keysetup(chacha_ctx_t *x, const u8 *k, u32 kbits, u32 ivbits __unused) 55 { 56 const char *constants; 57 58 x->chacha_input[4] = U8TO32_LITTLE(k + 0); 59 x->chacha_input[5] = U8TO32_LITTLE(k + 4); 60 x->chacha_input[6] = U8TO32_LITTLE(k + 8); 61 x->chacha_input[7] = U8TO32_LITTLE(k + 12); 62 if (kbits == 256) { /* recommended */ 63 k += 16; 64 constants = sigma; 65 } else { /* kbits == 128 */ 66 constants = tau; 67 } 68 x->chacha_input[8] = U8TO32_LITTLE(k + 0); 69 x->chacha_input[9] = U8TO32_LITTLE(k + 4); 70 x->chacha_input[10] = U8TO32_LITTLE(k + 8); 71 x->chacha_input[11] = U8TO32_LITTLE(k + 12); 72 x->chacha_input[0] = U8TO32_LITTLE(constants + 0); 73 x->chacha_input[1] = U8TO32_LITTLE(constants + 4); 74 x->chacha_input[2] = U8TO32_LITTLE(constants + 8); 75 x->chacha_input[3] = U8TO32_LITTLE(constants + 12); 76 } 77 78 void 79 chacha_ivsetup(chacha_ctx_t *x, const u8 *iv) 80 { 81 x->chacha_input[12] = 0; 82 x->chacha_input[13] = 0; 83 x->chacha_input[14] = U8TO32_LITTLE(iv + 0); 84 x->chacha_input[15] = U8TO32_LITTLE(iv + 4); 85 } 86 87 void 88 chacha_encrypt_bytes(chacha_ctx_t *x, const u8 *m, u8 *c, u32 bytes) 89 { 90 u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; 91 u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; 92 u8 *ctarget = NULL; 93 u8 tmp[64]; 94 u_int i; 95 96 if (!bytes) return; 97 98 j0 = x->chacha_input[0]; 99 j1 = x->chacha_input[1]; 100 j2 = x->chacha_input[2]; 101 j3 = x->chacha_input[3]; 102 j4 = x->chacha_input[4]; 103 j5 = x->chacha_input[5]; 104 j6 = x->chacha_input[6]; 105 j7 = x->chacha_input[7]; 106 j8 = x->chacha_input[8]; 107 j9 = x->chacha_input[9]; 108 j10 = x->chacha_input[10]; 109 j11 = x->chacha_input[11]; 110 j12 = x->chacha_input[12]; 111 j13 = x->chacha_input[13]; 112 j14 = x->chacha_input[14]; 113 j15 = x->chacha_input[15]; 114 115 for (;;) { 116 if (bytes < 64) { 117 for (i = 0;i < bytes;++i) tmp[i] = m[i]; 118 m = tmp; 119 ctarget = c; 120 c = tmp; 121 } 122 x0 = j0; 123 x1 = j1; 124 x2 = j2; 125 x3 = j3; 126 x4 = j4; 127 x5 = j5; 128 x6 = j6; 129 x7 = j7; 130 x8 = j8; 131 x9 = j9; 132 x10 = j10; 133 x11 = j11; 134 x12 = j12; 135 x13 = j13; 136 x14 = j14; 137 x15 = j15; 138 for (i = 20;i > 0;i -= 2) { 139 QUARTERROUND( x0, x4, x8,x12) 140 QUARTERROUND( x1, x5, x9,x13) 141 QUARTERROUND( x2, x6,x10,x14) 142 QUARTERROUND( x3, x7,x11,x15) 143 QUARTERROUND( x0, x5,x10,x15) 144 QUARTERROUND( x1, x6,x11,x12) 145 QUARTERROUND( x2, x7, x8,x13) 146 QUARTERROUND( x3, x4, x9,x14) 147 } 148 x0 = PLUS(x0,j0); 149 x1 = PLUS(x1,j1); 150 x2 = PLUS(x2,j2); 151 x3 = PLUS(x3,j3); 152 x4 = PLUS(x4,j4); 153 x5 = PLUS(x5,j5); 154 x6 = PLUS(x6,j6); 155 x7 = PLUS(x7,j7); 156 x8 = PLUS(x8,j8); 157 x9 = PLUS(x9,j9); 158 x10 = PLUS(x10,j10); 159 x11 = PLUS(x11,j11); 160 x12 = PLUS(x12,j12); 161 x13 = PLUS(x13,j13); 162 x14 = PLUS(x14,j14); 163 x15 = PLUS(x15,j15); 164 165 #ifndef KEYSTREAM_ONLY 166 x0 = XOR(x0,U8TO32_LITTLE(m + 0)); 167 x1 = XOR(x1,U8TO32_LITTLE(m + 4)); 168 x2 = XOR(x2,U8TO32_LITTLE(m + 8)); 169 x3 = XOR(x3,U8TO32_LITTLE(m + 12)); 170 x4 = XOR(x4,U8TO32_LITTLE(m + 16)); 171 x5 = XOR(x5,U8TO32_LITTLE(m + 20)); 172 x6 = XOR(x6,U8TO32_LITTLE(m + 24)); 173 x7 = XOR(x7,U8TO32_LITTLE(m + 28)); 174 x8 = XOR(x8,U8TO32_LITTLE(m + 32)); 175 x9 = XOR(x9,U8TO32_LITTLE(m + 36)); 176 x10 = XOR(x10,U8TO32_LITTLE(m + 40)); 177 x11 = XOR(x11,U8TO32_LITTLE(m + 44)); 178 x12 = XOR(x12,U8TO32_LITTLE(m + 48)); 179 x13 = XOR(x13,U8TO32_LITTLE(m + 52)); 180 x14 = XOR(x14,U8TO32_LITTLE(m + 56)); 181 x15 = XOR(x15,U8TO32_LITTLE(m + 60)); 182 #endif 183 184 j12 = PLUSONE(j12); 185 if (!j12) { 186 j13 = PLUSONE(j13); 187 /* stopping at 2^70 bytes per nonce is user's responsibility */ 188 } 189 190 U32TO8_LITTLE(c + 0,x0); 191 U32TO8_LITTLE(c + 4,x1); 192 U32TO8_LITTLE(c + 8,x2); 193 U32TO8_LITTLE(c + 12,x3); 194 U32TO8_LITTLE(c + 16,x4); 195 U32TO8_LITTLE(c + 20,x5); 196 U32TO8_LITTLE(c + 24,x6); 197 U32TO8_LITTLE(c + 28,x7); 198 U32TO8_LITTLE(c + 32,x8); 199 U32TO8_LITTLE(c + 36,x9); 200 U32TO8_LITTLE(c + 40,x10); 201 U32TO8_LITTLE(c + 44,x11); 202 U32TO8_LITTLE(c + 48,x12); 203 U32TO8_LITTLE(c + 52,x13); 204 U32TO8_LITTLE(c + 56,x14); 205 U32TO8_LITTLE(c + 60,x15); 206 207 if (bytes <= 64) { 208 if (bytes < 64) { 209 for (i = 0;i < bytes;++i) ctarget[i] = c[i]; 210 } 211 x->chacha_input[12] = j12; 212 x->chacha_input[13] = j13; 213 return; 214 } 215 bytes -= 64; 216 c += 64; 217 #ifndef KEYSTREAM_ONLY 218 m += 64; 219 #endif 220 } 221 } 222