1 /* 2 chacha-merged.c version 20080118 3 D. J. Bernstein 4 Public domain. 5 */ 6 7 /* $OpenBSD: chacha.c,v 1.1 2013/11/21 00:45:44 djm Exp $ */ 8 9 #include <sys/cdefs.h> 10 __FBSDID("$FreeBSD$"); 11 12 #include <sys/param.h> 13 #include <sys/types.h> 14 15 #include <crypto/chacha20/chacha.h> 16 17 18 typedef uint8_t u8; 19 typedef uint32_t u32; 20 21 typedef struct chacha_ctx chacha_ctx; 22 23 #define U8C(v) (v##U) 24 #define U32C(v) (v##U) 25 26 #define U8V(v) ((u8)(v) & U8C(0xFF)) 27 #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF)) 28 29 #define ROTL32(v, n) \ 30 (U32V((v) << (n)) | ((v) >> (32 - (n)))) 31 32 #define U8TO32_LITTLE(p) \ 33 (((u32)((p)[0]) ) | \ 34 ((u32)((p)[1]) << 8) | \ 35 ((u32)((p)[2]) << 16) | \ 36 ((u32)((p)[3]) << 24)) 37 38 #define U32TO8_LITTLE(p, v) \ 39 do { \ 40 (p)[0] = U8V((v) ); \ 41 (p)[1] = U8V((v) >> 8); \ 42 (p)[2] = U8V((v) >> 16); \ 43 (p)[3] = U8V((v) >> 24); \ 44 } while (0) 45 46 #define ROTATE(v,c) (ROTL32(v,c)) 47 #define XOR(v,w) ((v) ^ (w)) 48 #define PLUS(v,w) (U32V((v) + (w))) 49 #define PLUSONE(v) (PLUS((v),1)) 50 51 #define QUARTERROUND(a,b,c,d) \ 52 a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \ 53 c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \ 54 a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \ 55 c = PLUS(c,d); b = ROTATE(XOR(b,c), 7); 56 57 static const char sigma[16] = "expand 32-byte k"; 58 static const char tau[16] = "expand 16-byte k"; 59 60 void 61 chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits) 62 { 63 const char *constants; 64 65 x->input[4] = U8TO32_LITTLE(k + 0); 66 x->input[5] = U8TO32_LITTLE(k + 4); 67 x->input[6] = U8TO32_LITTLE(k + 8); 68 x->input[7] = U8TO32_LITTLE(k + 12); 69 if (kbits == 256) { /* recommended */ 70 k += 16; 71 constants = sigma; 72 } else { /* kbits == 128 */ 73 constants = tau; 74 } 75 x->input[8] = U8TO32_LITTLE(k + 0); 76 x->input[9] = U8TO32_LITTLE(k + 4); 77 x->input[10] = U8TO32_LITTLE(k + 8); 78 x->input[11] = U8TO32_LITTLE(k + 12); 79 x->input[0] = U8TO32_LITTLE(constants + 0); 80 x->input[1] = U8TO32_LITTLE(constants + 4); 81 x->input[2] = U8TO32_LITTLE(constants + 8); 82 x->input[3] = U8TO32_LITTLE(constants + 12); 83 } 84 85 void 86 chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter) 87 { 88 x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0); 89 x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4); 90 x->input[14] = U8TO32_LITTLE(iv + 0); 91 x->input[15] = U8TO32_LITTLE(iv + 4); 92 } 93 94 void 95 chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes) 96 { 97 u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; 98 u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; 99 u8 *ctarget = NULL; 100 u8 tmp[64]; 101 u_int i; 102 103 if (!bytes) return; 104 105 j0 = x->input[0]; 106 j1 = x->input[1]; 107 j2 = x->input[2]; 108 j3 = x->input[3]; 109 j4 = x->input[4]; 110 j5 = x->input[5]; 111 j6 = x->input[6]; 112 j7 = x->input[7]; 113 j8 = x->input[8]; 114 j9 = x->input[9]; 115 j10 = x->input[10]; 116 j11 = x->input[11]; 117 j12 = x->input[12]; 118 j13 = x->input[13]; 119 j14 = x->input[14]; 120 j15 = x->input[15]; 121 122 for (;;) { 123 if (bytes < 64) { 124 for (i = 0;i < bytes;++i) tmp[i] = m[i]; 125 m = tmp; 126 ctarget = c; 127 c = tmp; 128 } 129 x0 = j0; 130 x1 = j1; 131 x2 = j2; 132 x3 = j3; 133 x4 = j4; 134 x5 = j5; 135 x6 = j6; 136 x7 = j7; 137 x8 = j8; 138 x9 = j9; 139 x10 = j10; 140 x11 = j11; 141 x12 = j12; 142 x13 = j13; 143 x14 = j14; 144 x15 = j15; 145 for (i = 20;i > 0;i -= 2) { 146 QUARTERROUND( x0, x4, x8,x12) 147 QUARTERROUND( x1, x5, x9,x13) 148 QUARTERROUND( x2, x6,x10,x14) 149 QUARTERROUND( x3, x7,x11,x15) 150 QUARTERROUND( x0, x5,x10,x15) 151 QUARTERROUND( x1, x6,x11,x12) 152 QUARTERROUND( x2, x7, x8,x13) 153 QUARTERROUND( x3, x4, x9,x14) 154 } 155 x0 = PLUS(x0,j0); 156 x1 = PLUS(x1,j1); 157 x2 = PLUS(x2,j2); 158 x3 = PLUS(x3,j3); 159 x4 = PLUS(x4,j4); 160 x5 = PLUS(x5,j5); 161 x6 = PLUS(x6,j6); 162 x7 = PLUS(x7,j7); 163 x8 = PLUS(x8,j8); 164 x9 = PLUS(x9,j9); 165 x10 = PLUS(x10,j10); 166 x11 = PLUS(x11,j11); 167 x12 = PLUS(x12,j12); 168 x13 = PLUS(x13,j13); 169 x14 = PLUS(x14,j14); 170 x15 = PLUS(x15,j15); 171 172 x0 = XOR(x0,U8TO32_LITTLE(m + 0)); 173 x1 = XOR(x1,U8TO32_LITTLE(m + 4)); 174 x2 = XOR(x2,U8TO32_LITTLE(m + 8)); 175 x3 = XOR(x3,U8TO32_LITTLE(m + 12)); 176 x4 = XOR(x4,U8TO32_LITTLE(m + 16)); 177 x5 = XOR(x5,U8TO32_LITTLE(m + 20)); 178 x6 = XOR(x6,U8TO32_LITTLE(m + 24)); 179 x7 = XOR(x7,U8TO32_LITTLE(m + 28)); 180 x8 = XOR(x8,U8TO32_LITTLE(m + 32)); 181 x9 = XOR(x9,U8TO32_LITTLE(m + 36)); 182 x10 = XOR(x10,U8TO32_LITTLE(m + 40)); 183 x11 = XOR(x11,U8TO32_LITTLE(m + 44)); 184 x12 = XOR(x12,U8TO32_LITTLE(m + 48)); 185 x13 = XOR(x13,U8TO32_LITTLE(m + 52)); 186 x14 = XOR(x14,U8TO32_LITTLE(m + 56)); 187 x15 = XOR(x15,U8TO32_LITTLE(m + 60)); 188 189 j12 = PLUSONE(j12); 190 if (!j12) { 191 j13 = PLUSONE(j13); 192 /* stopping at 2^70 bytes per nonce is user's responsibility */ 193 } 194 195 U32TO8_LITTLE(c + 0,x0); 196 U32TO8_LITTLE(c + 4,x1); 197 U32TO8_LITTLE(c + 8,x2); 198 U32TO8_LITTLE(c + 12,x3); 199 U32TO8_LITTLE(c + 16,x4); 200 U32TO8_LITTLE(c + 20,x5); 201 U32TO8_LITTLE(c + 24,x6); 202 U32TO8_LITTLE(c + 28,x7); 203 U32TO8_LITTLE(c + 32,x8); 204 U32TO8_LITTLE(c + 36,x9); 205 U32TO8_LITTLE(c + 40,x10); 206 U32TO8_LITTLE(c + 44,x11); 207 U32TO8_LITTLE(c + 48,x12); 208 U32TO8_LITTLE(c + 52,x13); 209 U32TO8_LITTLE(c + 56,x14); 210 U32TO8_LITTLE(c + 60,x15); 211 212 if (bytes <= 64) { 213 if (bytes < 64) { 214 for (i = 0;i < bytes;++i) ctarget[i] = c[i]; 215 } 216 x->input[12] = j12; 217 x->input[13] = j13; 218 return; 219 } 220 bytes -= 64; 221 c += 64; 222 m += 64; 223 } 224 } 225