1 /* 2 * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining 5 * a copy of this software and associated documentation files (the 6 * "Software"), to deal in the Software without restriction, including 7 * without limitation the rights to use, copy, modify, merge, publish, 8 * distribute, sublicense, and/or sell copies of the Software, and to 9 * permit persons to whom the Software is furnished to do so, subject to 10 * the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25 #include "inner.h" 26 27 /* 28 * Parameters for the field: 29 * - field modulus p = 2^255-19 30 * - R^2 mod p (R = 2^(31k) for the smallest k such that R >= p) 31 */ 32 33 static const uint32_t C255_P[] = { 34 0x00000107, 35 0x7FFFFFED, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 36 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x0000007F 37 }; 38 39 #define P0I 0x286BCA1B 40 41 static const uint32_t C255_R2[] = { 42 0x00000107, 43 0x00000000, 0x02D20000, 0x00000000, 0x00000000, 0x00000000, 44 0x00000000, 0x00000000, 0x00000000, 0x00000000 45 }; 46 47 static const uint32_t C255_A24[] = { 48 0x00000107, 49 0x53000000, 0x0000468B, 0x00000000, 0x00000000, 0x00000000, 50 0x00000000, 0x00000000, 0x00000000, 0x00000000 51 }; 52 53 /* obsolete 54 #include <stdio.h> 55 #include <stdlib.h> 56 static void 57 print_int_mont(const char *name, const uint32_t *x) 58 { 59 uint32_t y[10]; 60 unsigned char tmp[32]; 61 size_t u; 62 63 printf("%s = ", name); 64 memcpy(y, x, sizeof y); 65 br_i31_from_monty(y, C255_P, P0I); 66 br_i31_encode(tmp, sizeof tmp, y); 67 for (u = 0; u < sizeof tmp; u ++) { 68 printf("%02X", tmp[u]); 69 } 70 printf("\n"); 71 } 72 */ 73 74 static const unsigned char GEN[] = { 75 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 76 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 77 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 78 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 79 }; 80 81 static const unsigned char ORDER[] = { 82 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 83 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 84 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 85 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF 86 }; 87 88 static const unsigned char * 89 api_generator(int curve, size_t *len) 90 { 91 (void)curve; 92 *len = 32; 93 return GEN; 94 } 95 96 static const unsigned char * 97 api_order(int curve, size_t *len) 98 { 99 (void)curve; 100 *len = 32; 101 return ORDER; 102 } 103 104 static size_t 105 api_xoff(int curve, size_t *len) 106 { 107 (void)curve; 108 *len = 32; 109 return 0; 110 } 111 112 static void 113 cswap(uint32_t *a, uint32_t *b, uint32_t ctl) 114 { 115 int i; 116 117 ctl = -ctl; 118 for (i = 0; i < 10; i ++) { 119 uint32_t aw, bw, tw; 120 121 aw = a[i]; 122 bw = b[i]; 123 tw = ctl & (aw ^ bw); 124 a[i] = aw ^ tw; 125 b[i] = bw ^ tw; 126 } 127 } 128 129 static void 130 c255_add(uint32_t *d, const uint32_t *a, const uint32_t *b) 131 { 132 uint32_t ctl; 133 uint32_t t[10]; 134 135 memcpy(t, a, sizeof t); 136 ctl = br_i31_add(t, b, 1); 137 ctl |= NOT(br_i31_sub(t, C255_P, 0)); 138 br_i31_sub(t, C255_P, ctl); 139 memcpy(d, t, sizeof t); 140 } 141 142 static void 143 c255_sub(uint32_t *d, const uint32_t *a, const uint32_t *b) 144 { 145 uint32_t t[10]; 146 147 memcpy(t, a, sizeof t); 148 br_i31_add(t, C255_P, br_i31_sub(t, b, 1)); 149 memcpy(d, t, sizeof t); 150 } 151 152 static void 153 c255_mul(uint32_t *d, const uint32_t *a, const uint32_t *b) 154 { 155 uint32_t t[10]; 156 157 br_i31_montymul(t, a, b, C255_P, P0I); 158 memcpy(d, t, sizeof t); 159 } 160 161 static void 162 byteswap(unsigned char *G) 163 { 164 int i; 165 166 for (i = 0; i < 16; i ++) { 167 unsigned char t; 168 169 t = G[i]; 170 G[i] = G[31 - i]; 171 G[31 - i] = t; 172 } 173 } 174 175 static uint32_t 176 api_mul(unsigned char *G, size_t Glen, 177 const unsigned char *kb, size_t kblen, int curve) 178 { 179 uint32_t x1[10], x2[10], x3[10], z2[10], z3[10]; 180 uint32_t a[10], aa[10], b[10], bb[10]; 181 uint32_t c[10], d[10], e[10], da[10], cb[10]; 182 unsigned char k[32]; 183 uint32_t swap; 184 int i; 185 186 (void)curve; 187 188 /* 189 * Points are encoded over exactly 32 bytes. Multipliers must fit 190 * in 32 bytes as well. 191 * RFC 7748 mandates that the high bit of the last point byte must 192 * be ignored/cleared. 193 */ 194 if (Glen != 32 || kblen > 32) { 195 return 0; 196 } 197 G[31] &= 0x7F; 198 199 /* 200 * Byteswap the point encoding, because it uses little-endian, and 201 * the generic decoding routine uses big-endian. 202 */ 203 byteswap(G); 204 205 /* 206 * Decode the point ('u' coordinate). This should be reduced 207 * modulo p, but we prefer to avoid the dependency on 208 * br_i31_decode_reduce(). Instead, we use br_i31_decode_mod() 209 * with a synthetic modulus of value 2^255 (this must work 210 * since G was truncated to 255 bits), then use a conditional 211 * subtraction. We use br_i31_decode_mod() and not 212 * br_i31_decode(), because the ec_prime_i31 implementation uses 213 * the former but not the latter. 214 * br_i31_decode_reduce(a, G, 32, C255_P); 215 */ 216 br_i31_zero(b, 0x108); 217 b[9] = 0x0080; 218 br_i31_decode_mod(a, G, 32, b); 219 a[0] = 0x107; 220 br_i31_sub(a, C255_P, NOT(br_i31_sub(a, C255_P, 0))); 221 222 /* 223 * Initialise variables x1, x2, z2, x3 and z3. We set all of them 224 * into Montgomery representation. 225 */ 226 br_i31_montymul(x1, a, C255_R2, C255_P, P0I); 227 memcpy(x3, x1, sizeof x1); 228 br_i31_zero(z2, C255_P[0]); 229 memcpy(x2, z2, sizeof z2); 230 x2[1] = 0x13000000; 231 memcpy(z3, x2, sizeof x2); 232 233 /* 234 * kb[] is in big-endian notation, but possibly shorter than k[]. 235 */ 236 memset(k, 0, (sizeof k) - kblen); 237 memcpy(k + (sizeof k) - kblen, kb, kblen); 238 k[31] &= 0xF8; 239 k[0] &= 0x7F; 240 k[0] |= 0x40; 241 242 /* obsolete 243 print_int_mont("x1", x1); 244 */ 245 246 swap = 0; 247 for (i = 254; i >= 0; i --) { 248 uint32_t kt; 249 250 kt = (k[31 - (i >> 3)] >> (i & 7)) & 1; 251 swap ^= kt; 252 cswap(x2, x3, swap); 253 cswap(z2, z3, swap); 254 swap = kt; 255 256 /* obsolete 257 print_int_mont("x2", x2); 258 print_int_mont("z2", z2); 259 print_int_mont("x3", x3); 260 print_int_mont("z3", z3); 261 */ 262 263 c255_add(a, x2, z2); 264 c255_mul(aa, a, a); 265 c255_sub(b, x2, z2); 266 c255_mul(bb, b, b); 267 c255_sub(e, aa, bb); 268 c255_add(c, x3, z3); 269 c255_sub(d, x3, z3); 270 c255_mul(da, d, a); 271 c255_mul(cb, c, b); 272 273 /* obsolete 274 print_int_mont("a ", a); 275 print_int_mont("aa", aa); 276 print_int_mont("b ", b); 277 print_int_mont("bb", bb); 278 print_int_mont("e ", e); 279 print_int_mont("c ", c); 280 print_int_mont("d ", d); 281 print_int_mont("da", da); 282 print_int_mont("cb", cb); 283 */ 284 285 c255_add(x3, da, cb); 286 c255_mul(x3, x3, x3); 287 c255_sub(z3, da, cb); 288 c255_mul(z3, z3, z3); 289 c255_mul(z3, z3, x1); 290 c255_mul(x2, aa, bb); 291 c255_mul(z2, C255_A24, e); 292 c255_add(z2, z2, aa); 293 c255_mul(z2, e, z2); 294 295 /* obsolete 296 print_int_mont("x2", x2); 297 print_int_mont("z2", z2); 298 print_int_mont("x3", x3); 299 print_int_mont("z3", z3); 300 */ 301 } 302 cswap(x2, x3, swap); 303 cswap(z2, z3, swap); 304 305 /* 306 * Inverse z2 with a modular exponentiation. This is a simple 307 * square-and-multiply algorithm; we mutualise most non-squarings 308 * since the exponent contains almost only ones. 309 */ 310 memcpy(a, z2, sizeof z2); 311 for (i = 0; i < 15; i ++) { 312 c255_mul(a, a, a); 313 c255_mul(a, a, z2); 314 } 315 memcpy(b, a, sizeof a); 316 for (i = 0; i < 14; i ++) { 317 int j; 318 319 for (j = 0; j < 16; j ++) { 320 c255_mul(b, b, b); 321 } 322 c255_mul(b, b, a); 323 } 324 for (i = 14; i >= 0; i --) { 325 c255_mul(b, b, b); 326 if ((0xFFEB >> i) & 1) { 327 c255_mul(b, z2, b); 328 } 329 } 330 c255_mul(b, x2, b); 331 332 /* 333 * To avoid a dependency on br_i31_from_monty(), we use 334 * a Montgomery multiplication with 1. 335 * memcpy(x2, b, sizeof b); 336 * br_i31_from_monty(x2, C255_P, P0I); 337 */ 338 br_i31_zero(a, C255_P[0]); 339 a[1] = 1; 340 br_i31_montymul(x2, a, b, C255_P, P0I); 341 342 br_i31_encode(G, 32, x2); 343 byteswap(G); 344 return 1; 345 } 346 347 static size_t 348 api_mulgen(unsigned char *R, 349 const unsigned char *x, size_t xlen, int curve) 350 { 351 const unsigned char *G; 352 size_t Glen; 353 354 G = api_generator(curve, &Glen); 355 memcpy(R, G, Glen); 356 api_mul(R, Glen, x, xlen, curve); 357 return Glen; 358 } 359 360 static uint32_t 361 api_muladd(unsigned char *A, const unsigned char *B, size_t len, 362 const unsigned char *x, size_t xlen, 363 const unsigned char *y, size_t ylen, int curve) 364 { 365 /* 366 * We don't implement this method, since it is used for ECDSA 367 * only, and there is no ECDSA over Curve25519 (which instead 368 * uses EdDSA). 369 */ 370 (void)A; 371 (void)B; 372 (void)len; 373 (void)x; 374 (void)xlen; 375 (void)y; 376 (void)ylen; 377 (void)curve; 378 return 0; 379 } 380 381 /* see bearssl_ec.h */ 382 const br_ec_impl br_ec_c25519_i31 = { 383 (uint32_t)0x20000000, 384 &api_generator, 385 &api_order, 386 &api_xoff, 387 &api_mul, 388 &api_mulgen, 389 &api_muladd 390 }; 391