1 /* 2 chacha-merged.c version 20080118 3 D. J. Bernstein 4 Public domain. 5 */ 6 7 /* $OpenBSD: chacha.c,v 1.1 2013/11/21 00:45:44 djm Exp $ */ 8 9 #include <sys/cdefs.h> 10 __FBSDID("$FreeBSD$"); 11 12 #include <sys/param.h> 13 #include <sys/types.h> 14 15 #include <crypto/chacha20/chacha.h> 16 17 typedef uint8_t u8; 18 typedef uint32_t u32; 19 20 typedef struct chacha_ctx chacha_ctx; 21 22 #define U8C(v) (v##U) 23 #define U32C(v) (v##U) 24 25 #define U8V(v) ((u8)(v) & U8C(0xFF)) 26 #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF)) 27 28 #define ROTL32(v, n) \ 29 (U32V((v) << (n)) | ((v) >> (32 - (n)))) 30 31 #define U8TO32_LITTLE(p) \ 32 (((u32)((p)[0]) ) | \ 33 ((u32)((p)[1]) << 8) | \ 34 ((u32)((p)[2]) << 16) | \ 35 ((u32)((p)[3]) << 24)) 36 37 #define U32TO8_LITTLE(p, v) \ 38 do { \ 39 (p)[0] = U8V((v) ); \ 40 (p)[1] = U8V((v) >> 8); \ 41 (p)[2] = U8V((v) >> 16); \ 42 (p)[3] = U8V((v) >> 24); \ 43 } while (0) 44 45 #define ROTATE(v,c) (ROTL32(v,c)) 46 #define XOR(v,w) ((v) ^ (w)) 47 #define PLUS(v,w) (U32V((v) + (w))) 48 #define PLUSONE(v) (PLUS((v),1)) 49 50 #define QUARTERROUND(a,b,c,d) \ 51 a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \ 52 c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \ 53 a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \ 54 c = PLUS(c,d); b = ROTATE(XOR(b,c), 7); 55 56 static const char sigma[16] = "expand 32-byte k"; 57 static const char tau[16] = "expand 16-byte k"; 58 59 LOCAL void 60 chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits) 61 { 62 const char *constants; 63 64 x->input[4] = U8TO32_LITTLE(k + 0); 65 x->input[5] = U8TO32_LITTLE(k + 4); 66 x->input[6] = U8TO32_LITTLE(k + 8); 67 x->input[7] = U8TO32_LITTLE(k + 12); 68 if (kbits == 256) { /* recommended */ 69 k += 16; 70 constants = sigma; 71 } else { /* kbits == 128 */ 72 constants = tau; 73 } 74 x->input[8] = U8TO32_LITTLE(k + 0); 75 x->input[9] = U8TO32_LITTLE(k + 4); 76 x->input[10] = U8TO32_LITTLE(k + 8); 77 x->input[11] = U8TO32_LITTLE(k + 12); 78 x->input[0] = U8TO32_LITTLE(constants + 0); 79 x->input[1] = U8TO32_LITTLE(constants + 4); 80 x->input[2] = U8TO32_LITTLE(constants + 8); 81 x->input[3] = U8TO32_LITTLE(constants + 12); 82 } 83 84 LOCAL void 85 chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter) 86 { 87 x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0); 88 x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4); 89 x->input[14] = U8TO32_LITTLE(iv + 0); 90 x->input[15] = U8TO32_LITTLE(iv + 4); 91 } 92 93 LOCAL void 94 chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes) 95 { 96 u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; 97 u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; 98 u8 *ctarget = NULL; 99 u8 tmp[64]; 100 u_int i; 101 102 if (!bytes) return; 103 104 j0 = x->input[0]; 105 j1 = x->input[1]; 106 j2 = x->input[2]; 107 j3 = x->input[3]; 108 j4 = x->input[4]; 109 j5 = x->input[5]; 110 j6 = x->input[6]; 111 j7 = x->input[7]; 112 j8 = x->input[8]; 113 j9 = x->input[9]; 114 j10 = x->input[10]; 115 j11 = x->input[11]; 116 j12 = x->input[12]; 117 j13 = x->input[13]; 118 j14 = x->input[14]; 119 j15 = x->input[15]; 120 121 for (;;) { 122 if (bytes < 64) { 123 #ifndef KEYSTREAM_ONLY 124 for (i = 0;i < bytes;++i) tmp[i] = m[i]; 125 m = tmp; 126 #endif 127 ctarget = c; 128 c = tmp; 129 } 130 x0 = j0; 131 x1 = j1; 132 x2 = j2; 133 x3 = j3; 134 x4 = j4; 135 x5 = j5; 136 x6 = j6; 137 x7 = j7; 138 x8 = j8; 139 x9 = j9; 140 x10 = j10; 141 x11 = j11; 142 x12 = j12; 143 x13 = j13; 144 x14 = j14; 145 x15 = j15; 146 for (i = 20;i > 0;i -= 2) { 147 QUARTERROUND( x0, x4, x8,x12) 148 QUARTERROUND( x1, x5, x9,x13) 149 QUARTERROUND( x2, x6,x10,x14) 150 QUARTERROUND( x3, x7,x11,x15) 151 QUARTERROUND( x0, x5,x10,x15) 152 QUARTERROUND( x1, x6,x11,x12) 153 QUARTERROUND( x2, x7, x8,x13) 154 QUARTERROUND( x3, x4, x9,x14) 155 } 156 x0 = PLUS(x0,j0); 157 x1 = PLUS(x1,j1); 158 x2 = PLUS(x2,j2); 159 x3 = PLUS(x3,j3); 160 x4 = PLUS(x4,j4); 161 x5 = PLUS(x5,j5); 162 x6 = PLUS(x6,j6); 163 x7 = PLUS(x7,j7); 164 x8 = PLUS(x8,j8); 165 x9 = PLUS(x9,j9); 166 x10 = PLUS(x10,j10); 167 x11 = PLUS(x11,j11); 168 x12 = PLUS(x12,j12); 169 x13 = PLUS(x13,j13); 170 x14 = PLUS(x14,j14); 171 x15 = PLUS(x15,j15); 172 173 #ifndef KEYSTREAM_ONLY 174 x0 = XOR(x0,U8TO32_LITTLE(m + 0)); 175 x1 = XOR(x1,U8TO32_LITTLE(m + 4)); 176 x2 = XOR(x2,U8TO32_LITTLE(m + 8)); 177 x3 = XOR(x3,U8TO32_LITTLE(m + 12)); 178 x4 = XOR(x4,U8TO32_LITTLE(m + 16)); 179 x5 = XOR(x5,U8TO32_LITTLE(m + 20)); 180 x6 = XOR(x6,U8TO32_LITTLE(m + 24)); 181 x7 = XOR(x7,U8TO32_LITTLE(m + 28)); 182 x8 = XOR(x8,U8TO32_LITTLE(m + 32)); 183 x9 = XOR(x9,U8TO32_LITTLE(m + 36)); 184 x10 = XOR(x10,U8TO32_LITTLE(m + 40)); 185 x11 = XOR(x11,U8TO32_LITTLE(m + 44)); 186 x12 = XOR(x12,U8TO32_LITTLE(m + 48)); 187 x13 = XOR(x13,U8TO32_LITTLE(m + 52)); 188 x14 = XOR(x14,U8TO32_LITTLE(m + 56)); 189 x15 = XOR(x15,U8TO32_LITTLE(m + 60)); 190 #endif 191 192 j12 = PLUSONE(j12); 193 if (!j12) { 194 j13 = PLUSONE(j13); 195 /* stopping at 2^70 bytes per nonce is user's responsibility */ 196 } 197 198 U32TO8_LITTLE(c + 0,x0); 199 U32TO8_LITTLE(c + 4,x1); 200 U32TO8_LITTLE(c + 8,x2); 201 U32TO8_LITTLE(c + 12,x3); 202 U32TO8_LITTLE(c + 16,x4); 203 U32TO8_LITTLE(c + 20,x5); 204 U32TO8_LITTLE(c + 24,x6); 205 U32TO8_LITTLE(c + 28,x7); 206 U32TO8_LITTLE(c + 32,x8); 207 U32TO8_LITTLE(c + 36,x9); 208 U32TO8_LITTLE(c + 40,x10); 209 U32TO8_LITTLE(c + 44,x11); 210 U32TO8_LITTLE(c + 48,x12); 211 U32TO8_LITTLE(c + 52,x13); 212 U32TO8_LITTLE(c + 56,x14); 213 U32TO8_LITTLE(c + 60,x15); 214 215 if (bytes <= 64) { 216 if (bytes < 64) { 217 for (i = 0;i < bytes;++i) ctarget[i] = c[i]; 218 } 219 x->input[12] = j12; 220 x->input[13] = j13; 221 return; 222 } 223 bytes -= 64; 224 c += 64; 225 #ifndef KEYSTREAM_ONLY 226 m += 64; 227 #endif 228 } 229 } 230