1 /* 2 * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining 5 * a copy of this software and associated documentation files (the 6 * "Software"), to deal in the Software without restriction, including 7 * without limitation the rights to use, copy, modify, merge, publish, 8 * distribute, sublicense, and/or sell copies of the Software, and to 9 * permit persons to whom the Software is furnished to do so, subject to 10 * the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25 #include "inner.h" 26 27 /* 28 * Parameters for the field: 29 * - field modulus p = 2^255-19 30 * - R^2 mod p (R = 2^(15k) for the smallest k such that R >= p) 31 */ 32 33 static const uint16_t C255_P[] = { 34 0x0110, 35 0x7FED, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 36 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 37 0x7FFF 38 }; 39 40 #define P0I 0x4A1B 41 42 static const uint16_t C255_R2[] = { 43 0x0110, 44 0x0169, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 45 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 46 0x0000 47 }; 48 49 /* obsolete 50 #include <stdio.h> 51 #include <stdlib.h> 52 static void 53 print_int_mont(const char *name, const uint16_t *x) 54 { 55 uint16_t y[18]; 56 unsigned char tmp[32]; 57 size_t u; 58 59 printf("%s = ", name); 60 memcpy(y, x, sizeof y); 61 br_i15_from_monty(y, C255_P, P0I); 62 br_i15_encode(tmp, sizeof tmp, y); 63 for (u = 0; u < sizeof tmp; u ++) { 64 printf("%02X", tmp[u]); 65 } 66 printf("\n"); 67 } 68 */ 69 70 static const uint16_t C255_A24[] = { 71 0x0110, 72 0x45D3, 0x0046, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 73 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 74 0x0000 75 }; 76 77 static const unsigned char GEN[] = { 78 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 79 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 80 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 81 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 82 }; 83 84 static const unsigned char ORDER[] = { 85 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 86 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 87 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 88 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF 89 }; 90 91 static const unsigned char * 92 api_generator(int curve, size_t *len) 93 { 94 (void)curve; 95 *len = 32; 96 return GEN; 97 } 98 99 static const unsigned char * 100 api_order(int curve, size_t *len) 101 { 102 (void)curve; 103 *len = 32; 104 return ORDER; 105 } 106 107 static size_t 108 api_xoff(int curve, size_t *len) 109 { 110 (void)curve; 111 *len = 32; 112 return 0; 113 } 114 115 static void 116 cswap(uint16_t *a, uint16_t *b, uint32_t ctl) 117 { 118 int i; 119 120 ctl = -ctl; 121 for (i = 0; i < 18; i ++) { 122 uint32_t aw, bw, tw; 123 124 aw = a[i]; 125 bw = b[i]; 126 tw = ctl & (aw ^ bw); 127 a[i] = aw ^ tw; 128 b[i] = bw ^ tw; 129 } 130 } 131 132 static void 133 c255_add(uint16_t *d, const uint16_t *a, const uint16_t *b) 134 { 135 uint32_t ctl; 136 uint16_t t[18]; 137 138 memcpy(t, a, sizeof t); 139 ctl = br_i15_add(t, b, 1); 140 ctl |= NOT(br_i15_sub(t, C255_P, 0)); 141 br_i15_sub(t, C255_P, ctl); 142 memcpy(d, t, sizeof t); 143 } 144 145 static void 146 c255_sub(uint16_t *d, const uint16_t *a, const uint16_t *b) 147 { 148 uint16_t t[18]; 149 150 memcpy(t, a, sizeof t); 151 br_i15_add(t, C255_P, br_i15_sub(t, b, 1)); 152 memcpy(d, t, sizeof t); 153 } 154 155 static void 156 c255_mul(uint16_t *d, const uint16_t *a, const uint16_t *b) 157 { 158 uint16_t t[18]; 159 160 br_i15_montymul(t, a, b, C255_P, P0I); 161 memcpy(d, t, sizeof t); 162 } 163 164 static void 165 byteswap(unsigned char *G) 166 { 167 int i; 168 169 for (i = 0; i < 16; i ++) { 170 unsigned char t; 171 172 t = G[i]; 173 G[i] = G[31 - i]; 174 G[31 - i] = t; 175 } 176 } 177 178 static uint32_t 179 api_mul(unsigned char *G, size_t Glen, 180 const unsigned char *kb, size_t kblen, int curve) 181 { 182 #define ILEN (18 * sizeof(uint16_t)) 183 184 /* 185 * The a[] and b[] arrays have an extra word to allow for 186 * decoding without using br_i15_decode_reduce(). 187 */ 188 uint16_t x1[18], x2[18], x3[18], z2[18], z3[18]; 189 uint16_t a[19], aa[18], b[19], bb[18]; 190 uint16_t c[18], d[18], e[18], da[18], cb[18]; 191 unsigned char k[32]; 192 uint32_t swap; 193 int i; 194 195 (void)curve; 196 197 /* 198 * Points are encoded over exactly 32 bytes. Multipliers must fit 199 * in 32 bytes as well. 200 * RFC 7748 mandates that the high bit of the last point byte must 201 * be ignored/cleared. 202 */ 203 if (Glen != 32 || kblen > 32) { 204 return 0; 205 } 206 G[31] &= 0x7F; 207 208 /* 209 * Byteswap the point encoding, because it uses little-endian, and 210 * the generic decoding routine uses big-endian. 211 */ 212 byteswap(G); 213 214 /* 215 * Decode the point ('u' coordinate). This should be reduced 216 * modulo p, but we prefer to avoid the dependency on 217 * br_i15_decode_reduce(). Instead, we use br_i15_decode_mod() 218 * with a synthetic modulus of value 2^255 (this must work 219 * since G was truncated to 255 bits), then use a conditional 220 * subtraction. We use br_i15_decode_mod() and not 221 * br_i15_decode(), because the ec_prime_i15 implementation uses 222 * the former but not the latter. 223 * br_i15_decode_reduce(a, G, 32, C255_P); 224 */ 225 br_i15_zero(b, 0x111); 226 b[18] = 1; 227 br_i15_decode_mod(a, G, 32, b); 228 a[0] = 0x110; 229 br_i15_sub(a, C255_P, NOT(br_i15_sub(a, C255_P, 0))); 230 231 /* 232 * Initialise variables x1, x2, z2, x3 and z3. We set all of them 233 * into Montgomery representation. 234 */ 235 br_i15_montymul(x1, a, C255_R2, C255_P, P0I); 236 memcpy(x3, x1, ILEN); 237 br_i15_zero(z2, C255_P[0]); 238 memcpy(x2, z2, ILEN); 239 x2[1] = 19; 240 memcpy(z3, x2, ILEN); 241 242 memset(k, 0, (sizeof k) - kblen); 243 memcpy(k + (sizeof k) - kblen, kb, kblen); 244 k[31] &= 0xF8; 245 k[0] &= 0x7F; 246 k[0] |= 0x40; 247 248 /* obsolete 249 print_int_mont("x1", x1); 250 */ 251 252 swap = 0; 253 for (i = 254; i >= 0; i --) { 254 uint32_t kt; 255 256 kt = (k[31 - (i >> 3)] >> (i & 7)) & 1; 257 swap ^= kt; 258 cswap(x2, x3, swap); 259 cswap(z2, z3, swap); 260 swap = kt; 261 262 /* obsolete 263 print_int_mont("x2", x2); 264 print_int_mont("z2", z2); 265 print_int_mont("x3", x3); 266 print_int_mont("z3", z3); 267 */ 268 269 c255_add(a, x2, z2); 270 c255_mul(aa, a, a); 271 c255_sub(b, x2, z2); 272 c255_mul(bb, b, b); 273 c255_sub(e, aa, bb); 274 c255_add(c, x3, z3); 275 c255_sub(d, x3, z3); 276 c255_mul(da, d, a); 277 c255_mul(cb, c, b); 278 279 /* obsolete 280 print_int_mont("a ", a); 281 print_int_mont("aa", aa); 282 print_int_mont("b ", b); 283 print_int_mont("bb", bb); 284 print_int_mont("e ", e); 285 print_int_mont("c ", c); 286 print_int_mont("d ", d); 287 print_int_mont("da", da); 288 print_int_mont("cb", cb); 289 */ 290 291 c255_add(x3, da, cb); 292 c255_mul(x3, x3, x3); 293 c255_sub(z3, da, cb); 294 c255_mul(z3, z3, z3); 295 c255_mul(z3, z3, x1); 296 c255_mul(x2, aa, bb); 297 c255_mul(z2, C255_A24, e); 298 c255_add(z2, z2, aa); 299 c255_mul(z2, e, z2); 300 301 /* obsolete 302 print_int_mont("x2", x2); 303 print_int_mont("z2", z2); 304 print_int_mont("x3", x3); 305 print_int_mont("z3", z3); 306 */ 307 } 308 cswap(x2, x3, swap); 309 cswap(z2, z3, swap); 310 311 /* 312 * Inverse z2 with a modular exponentiation. This is a simple 313 * square-and-multiply algorithm; we mutualise most non-squarings 314 * since the exponent contains almost only ones. 315 */ 316 memcpy(a, z2, ILEN); 317 for (i = 0; i < 15; i ++) { 318 c255_mul(a, a, a); 319 c255_mul(a, a, z2); 320 } 321 memcpy(b, a, ILEN); 322 for (i = 0; i < 14; i ++) { 323 int j; 324 325 for (j = 0; j < 16; j ++) { 326 c255_mul(b, b, b); 327 } 328 c255_mul(b, b, a); 329 } 330 for (i = 14; i >= 0; i --) { 331 c255_mul(b, b, b); 332 if ((0xFFEB >> i) & 1) { 333 c255_mul(b, z2, b); 334 } 335 } 336 c255_mul(b, x2, b); 337 338 /* 339 * To avoid a dependency on br_i15_from_monty(), we use a 340 * Montgomery multiplication with 1. 341 * memcpy(x2, b, ILEN); 342 * br_i15_from_monty(x2, C255_P, P0I); 343 */ 344 br_i15_zero(a, C255_P[0]); 345 a[1] = 1; 346 br_i15_montymul(x2, a, b, C255_P, P0I); 347 348 br_i15_encode(G, 32, x2); 349 byteswap(G); 350 return 1; 351 352 #undef ILEN 353 } 354 355 static size_t 356 api_mulgen(unsigned char *R, 357 const unsigned char *x, size_t xlen, int curve) 358 { 359 const unsigned char *G; 360 size_t Glen; 361 362 G = api_generator(curve, &Glen); 363 memcpy(R, G, Glen); 364 api_mul(R, Glen, x, xlen, curve); 365 return Glen; 366 } 367 368 static uint32_t 369 api_muladd(unsigned char *A, const unsigned char *B, size_t len, 370 const unsigned char *x, size_t xlen, 371 const unsigned char *y, size_t ylen, int curve) 372 { 373 /* 374 * We don't implement this method, since it is used for ECDSA 375 * only, and there is no ECDSA over Curve25519 (which instead 376 * uses EdDSA). 377 */ 378 (void)A; 379 (void)B; 380 (void)len; 381 (void)x; 382 (void)xlen; 383 (void)y; 384 (void)ylen; 385 (void)curve; 386 return 0; 387 } 388 389 /* see bearssl_ec.h */ 390 const br_ec_impl br_ec_c25519_i15 = { 391 (uint32_t)0x20000000, 392 &api_generator, 393 &api_order, 394 &api_xoff, 395 &api_mul, 396 &api_mulgen, 397 &api_muladd 398 }; 399