1 /* 2 chacha-merged.c version 20080118 3 D. J. Bernstein 4 Public domain. 5 */ 6 7 /* $OpenBSD: chacha.c,v 1.1 2013/11/21 00:45:44 djm Exp $ */ 8 9 #include <sys/cdefs.h> 10 __FBSDID("$FreeBSD$"); 11 12 #include <sys/param.h> 13 #include <sys/types.h> 14 15 #include <crypto/chacha20/chacha.h> 16 17 typedef uint8_t u8; 18 typedef uint32_t u32; 19 20 typedef struct chacha_ctx chacha_ctx; 21 22 #define U8C(v) (v##U) 23 #define U32C(v) (v##U) 24 25 #define U8V(v) ((u8)(v) & U8C(0xFF)) 26 #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF)) 27 28 #define ROTL32(v, n) \ 29 (U32V((v) << (n)) | ((v) >> (32 - (n)))) 30 31 #define U8TO32_LITTLE(p) \ 32 (((u32)((p)[0]) ) | \ 33 ((u32)((p)[1]) << 8) | \ 34 ((u32)((p)[2]) << 16) | \ 35 ((u32)((p)[3]) << 24)) 36 37 #define U32TO8_LITTLE(p, v) \ 38 do { \ 39 (p)[0] = U8V((v) ); \ 40 (p)[1] = U8V((v) >> 8); \ 41 (p)[2] = U8V((v) >> 16); \ 42 (p)[3] = U8V((v) >> 24); \ 43 } while (0) 44 45 #define ROTATE(v,c) (ROTL32(v,c)) 46 #define XOR(v,w) ((v) ^ (w)) 47 #define PLUS(v,w) (U32V((v) + (w))) 48 #define PLUSONE(v) (PLUS((v),1)) 49 50 #define QUARTERROUND(a,b,c,d) \ 51 a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \ 52 c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \ 53 a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \ 54 c = PLUS(c,d); b = ROTATE(XOR(b,c), 7); 55 56 static const char sigma[16] = "expand 32-byte k"; 57 static const char tau[16] = "expand 16-byte k"; 58 59 LOCAL void 60 chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits) 61 { 62 const char *constants; 63 64 x->input[4] = U8TO32_LITTLE(k + 0); 65 x->input[5] = U8TO32_LITTLE(k + 4); 66 x->input[6] = U8TO32_LITTLE(k + 8); 67 x->input[7] = U8TO32_LITTLE(k + 12); 68 if (kbits == 256) { /* recommended */ 69 k += 16; 70 constants = sigma; 71 } else { /* kbits == 128 */ 72 constants = tau; 73 } 74 x->input[8] = U8TO32_LITTLE(k + 0); 75 x->input[9] = U8TO32_LITTLE(k + 4); 76 x->input[10] = U8TO32_LITTLE(k + 8); 77 x->input[11] = U8TO32_LITTLE(k + 12); 78 x->input[0] = U8TO32_LITTLE(constants + 0); 79 x->input[1] = U8TO32_LITTLE(constants + 4); 80 x->input[2] = U8TO32_LITTLE(constants + 8); 81 x->input[3] = U8TO32_LITTLE(constants + 12); 82 } 83 84 LOCAL void 85 chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter) 86 { 87 x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0); 88 x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4); 89 x->input[14] = U8TO32_LITTLE(iv + 0); 90 x->input[15] = U8TO32_LITTLE(iv + 4); 91 } 92 93 LOCAL void 94 chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes) 95 { 96 u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; 97 u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; 98 u8 *ctarget = NULL; 99 u8 tmp[64]; 100 u_int i; 101 102 if (!bytes) return; 103 104 j0 = x->input[0]; 105 j1 = x->input[1]; 106 j2 = x->input[2]; 107 j3 = x->input[3]; 108 j4 = x->input[4]; 109 j5 = x->input[5]; 110 j6 = x->input[6]; 111 j7 = x->input[7]; 112 j8 = x->input[8]; 113 j9 = x->input[9]; 114 j10 = x->input[10]; 115 j11 = x->input[11]; 116 j12 = x->input[12]; 117 j13 = x->input[13]; 118 j14 = x->input[14]; 119 j15 = x->input[15]; 120 121 for (;;) { 122 if (bytes < 64) { 123 for (i = 0;i < bytes;++i) tmp[i] = m[i]; 124 m = tmp; 125 ctarget = c; 126 c = tmp; 127 } 128 x0 = j0; 129 x1 = j1; 130 x2 = j2; 131 x3 = j3; 132 x4 = j4; 133 x5 = j5; 134 x6 = j6; 135 x7 = j7; 136 x8 = j8; 137 x9 = j9; 138 x10 = j10; 139 x11 = j11; 140 x12 = j12; 141 x13 = j13; 142 x14 = j14; 143 x15 = j15; 144 for (i = 20;i > 0;i -= 2) { 145 QUARTERROUND( x0, x4, x8,x12) 146 QUARTERROUND( x1, x5, x9,x13) 147 QUARTERROUND( x2, x6,x10,x14) 148 QUARTERROUND( x3, x7,x11,x15) 149 QUARTERROUND( x0, x5,x10,x15) 150 QUARTERROUND( x1, x6,x11,x12) 151 QUARTERROUND( x2, x7, x8,x13) 152 QUARTERROUND( x3, x4, x9,x14) 153 } 154 x0 = PLUS(x0,j0); 155 x1 = PLUS(x1,j1); 156 x2 = PLUS(x2,j2); 157 x3 = PLUS(x3,j3); 158 x4 = PLUS(x4,j4); 159 x5 = PLUS(x5,j5); 160 x6 = PLUS(x6,j6); 161 x7 = PLUS(x7,j7); 162 x8 = PLUS(x8,j8); 163 x9 = PLUS(x9,j9); 164 x10 = PLUS(x10,j10); 165 x11 = PLUS(x11,j11); 166 x12 = PLUS(x12,j12); 167 x13 = PLUS(x13,j13); 168 x14 = PLUS(x14,j14); 169 x15 = PLUS(x15,j15); 170 171 #ifndef KEYSTREAM_ONLY 172 x0 = XOR(x0,U8TO32_LITTLE(m + 0)); 173 x1 = XOR(x1,U8TO32_LITTLE(m + 4)); 174 x2 = XOR(x2,U8TO32_LITTLE(m + 8)); 175 x3 = XOR(x3,U8TO32_LITTLE(m + 12)); 176 x4 = XOR(x4,U8TO32_LITTLE(m + 16)); 177 x5 = XOR(x5,U8TO32_LITTLE(m + 20)); 178 x6 = XOR(x6,U8TO32_LITTLE(m + 24)); 179 x7 = XOR(x7,U8TO32_LITTLE(m + 28)); 180 x8 = XOR(x8,U8TO32_LITTLE(m + 32)); 181 x9 = XOR(x9,U8TO32_LITTLE(m + 36)); 182 x10 = XOR(x10,U8TO32_LITTLE(m + 40)); 183 x11 = XOR(x11,U8TO32_LITTLE(m + 44)); 184 x12 = XOR(x12,U8TO32_LITTLE(m + 48)); 185 x13 = XOR(x13,U8TO32_LITTLE(m + 52)); 186 x14 = XOR(x14,U8TO32_LITTLE(m + 56)); 187 x15 = XOR(x15,U8TO32_LITTLE(m + 60)); 188 #endif 189 190 j12 = PLUSONE(j12); 191 if (!j12) { 192 j13 = PLUSONE(j13); 193 /* stopping at 2^70 bytes per nonce is user's responsibility */ 194 } 195 196 U32TO8_LITTLE(c + 0,x0); 197 U32TO8_LITTLE(c + 4,x1); 198 U32TO8_LITTLE(c + 8,x2); 199 U32TO8_LITTLE(c + 12,x3); 200 U32TO8_LITTLE(c + 16,x4); 201 U32TO8_LITTLE(c + 20,x5); 202 U32TO8_LITTLE(c + 24,x6); 203 U32TO8_LITTLE(c + 28,x7); 204 U32TO8_LITTLE(c + 32,x8); 205 U32TO8_LITTLE(c + 36,x9); 206 U32TO8_LITTLE(c + 40,x10); 207 U32TO8_LITTLE(c + 44,x11); 208 U32TO8_LITTLE(c + 48,x12); 209 U32TO8_LITTLE(c + 52,x13); 210 U32TO8_LITTLE(c + 56,x14); 211 U32TO8_LITTLE(c + 60,x15); 212 213 if (bytes <= 64) { 214 if (bytes < 64) { 215 for (i = 0;i < bytes;++i) ctarget[i] = c[i]; 216 } 217 x->input[12] = j12; 218 x->input[13] = j13; 219 return; 220 } 221 bytes -= 64; 222 c += 64; 223 #ifndef KEYSTREAM_ONLY 224 m += 64; 225 #endif 226 } 227 } 228