1 /* 2 * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining 5 * a copy of this software and associated documentation files (the 6 * "Software"), to deal in the Software without restriction, including 7 * without limitation the rights to use, copy, modify, merge, publish, 8 * distribute, sublicense, and/or sell copies of the Software, and to 9 * permit persons to whom the Software is furnished to do so, subject to 10 * the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25 #include "inner.h" 26 27 /* 28 * Parameters for supported curves: 29 * - field modulus p 30 * - R^2 mod p (R = 2^(15k) for the smallest k such that R >= p) 31 * - b*R mod p (b is the second curve equation parameter) 32 */ 33 34 static const uint16_t P256_P[] = { 35 0x0111, 36 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x003F, 0x0000, 37 0x0000, 0x0000, 0x0000, 0x0000, 0x1000, 0x0000, 0x4000, 0x7FFF, 38 0x7FFF, 0x0001 39 }; 40 41 static const uint16_t P256_R2[] = { 42 0x0111, 43 0x0000, 0x6000, 0x0000, 0x0000, 0x0000, 0x0000, 0x7FFC, 0x7FFF, 44 0x7FBF, 0x7FFF, 0x7FBF, 0x7FFF, 0x7FFF, 0x7FFF, 0x77FF, 0x7FFF, 45 0x4FFF, 0x0000 46 }; 47 48 static const uint16_t P256_B[] = { 49 0x0111, 50 0x770C, 0x5EEF, 0x29C4, 0x3EC4, 0x6273, 0x0486, 0x4543, 0x3993, 51 0x3C01, 0x6B56, 0x212E, 0x57EE, 0x4882, 0x204B, 0x7483, 0x3C16, 52 0x0187, 0x0000 53 }; 54 55 static const uint16_t P384_P[] = { 56 0x0199, 57 0x7FFF, 0x7FFF, 0x0003, 0x0000, 0x0000, 0x0000, 0x7FC0, 0x7FFF, 58 0x7EFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 59 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 60 0x7FFF, 0x01FF 61 }; 62 63 static const uint16_t P384_R2[] = { 64 0x0199, 65 0x1000, 0x0000, 0x0000, 0x7FFF, 0x7FFF, 0x0001, 0x0000, 0x0010, 66 0x0000, 0x0000, 0x0000, 0x7F00, 0x7FFF, 0x01FF, 0x0000, 0x1000, 67 0x0000, 0x2000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 68 0x0000, 0x0000 69 }; 70 71 static const uint16_t P384_B[] = { 72 0x0199, 73 0x7333, 0x2096, 0x70D1, 0x2310, 0x3020, 0x6197, 0x1464, 0x35BB, 74 0x70CA, 0x0117, 0x1920, 0x4136, 0x5FC8, 0x5713, 0x4938, 0x7DD2, 75 0x4DD2, 0x4A71, 0x0220, 0x683E, 0x2C87, 0x4DB1, 0x7BFF, 0x6C09, 76 0x0452, 0x0084 77 }; 78 79 static const uint16_t P521_P[] = { 80 0x022B, 81 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 82 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 83 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 84 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 85 0x7FFF, 0x7FFF, 0x07FF 86 }; 87 88 static const uint16_t P521_R2[] = { 89 0x022B, 90 0x0100, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 91 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 92 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 93 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 94 0x0000, 0x0000, 0x0000 95 }; 96 97 static const uint16_t P521_B[] = { 98 0x022B, 99 0x7002, 0x6A07, 0x751A, 0x228F, 0x71EF, 0x5869, 0x20F4, 0x1EFC, 100 0x7357, 0x37E0, 0x4EEC, 0x605E, 0x1652, 0x26F6, 0x31FA, 0x4A8F, 101 0x6193, 0x3C2A, 0x3C42, 0x48C7, 0x3489, 0x6771, 0x4C57, 0x5CCD, 102 0x2725, 0x545B, 0x503B, 0x5B42, 0x21A0, 0x2534, 0x687E, 0x70E4, 103 0x1618, 0x27D7, 0x0465 104 }; 105 106 typedef struct { 107 const uint16_t *p; 108 const uint16_t *b; 109 const uint16_t *R2; 110 uint16_t p0i; 111 size_t point_len; 112 } curve_params; 113 114 static inline const curve_params * 115 id_to_curve(int curve) 116 { 117 static const curve_params pp[] = { 118 { P256_P, P256_B, P256_R2, 0x0001, 65 }, 119 { P384_P, P384_B, P384_R2, 0x0001, 97 }, 120 { P521_P, P521_B, P521_R2, 0x0001, 133 } 121 }; 122 123 return &pp[curve - BR_EC_secp256r1]; 124 } 125 126 #define I15_LEN ((BR_MAX_EC_SIZE + 29) / 15) 127 128 /* 129 * Type for a point in Jacobian coordinates: 130 * -- three values, x, y and z, in Montgomery representation 131 * -- affine coordinates are X = x / z^2 and Y = y / z^3 132 * -- for the point at infinity, z = 0 133 */ 134 typedef struct { 135 uint16_t c[3][I15_LEN]; 136 } jacobian; 137 138 /* 139 * We use a custom interpreter that uses a dozen registers, and 140 * only six operations: 141 * MSET(d, a) copy a into d 142 * MADD(d, a) d = d+a (modular) 143 * MSUB(d, a) d = d-a (modular) 144 * MMUL(d, a, b) d = a*b (Montgomery multiplication) 145 * MINV(d, a, b) invert d modulo p; a and b are used as scratch registers 146 * MTZ(d) clear return value if d = 0 147 * Destination of MMUL (d) must be distinct from operands (a and b). 148 * There is no such constraint for MSUB and MADD. 149 * 150 * Registers include the operand coordinates, and temporaries. 151 */ 152 #define MSET(d, a) (0x0000 + ((d) << 8) + ((a) << 4)) 153 #define MADD(d, a) (0x1000 + ((d) << 8) + ((a) << 4)) 154 #define MSUB(d, a) (0x2000 + ((d) << 8) + ((a) << 4)) 155 #define MMUL(d, a, b) (0x3000 + ((d) << 8) + ((a) << 4) + (b)) 156 #define MINV(d, a, b) (0x4000 + ((d) << 8) + ((a) << 4) + (b)) 157 #define MTZ(d) (0x5000 + ((d) << 8)) 158 #define ENDCODE 0 159 160 /* 161 * Registers for the input operands. 162 */ 163 #define P1x 0 164 #define P1y 1 165 #define P1z 2 166 #define P2x 3 167 #define P2y 4 168 #define P2z 5 169 170 /* 171 * Alternate names for the first input operand. 172 */ 173 #define Px 0 174 #define Py 1 175 #define Pz 2 176 177 /* 178 * Temporaries. 179 */ 180 #define t1 6 181 #define t2 7 182 #define t3 8 183 #define t4 9 184 #define t5 10 185 #define t6 11 186 #define t7 12 187 188 /* 189 * Extra scratch registers available when there is no second operand (e.g. 190 * for "double" and "affine"). 191 */ 192 #define t8 3 193 #define t9 4 194 #define t10 5 195 196 /* 197 * Doubling formulas are: 198 * 199 * s = 4*x*y^2 200 * m = 3*(x + z^2)*(x - z^2) 201 * x' = m^2 - 2*s 202 * y' = m*(s - x') - 8*y^4 203 * z' = 2*y*z 204 * 205 * If y = 0 (P has order 2) then this yields infinity (z' = 0), as it 206 * should. This case should not happen anyway, because our curves have 207 * prime order, and thus do not contain any point of order 2. 208 * 209 * If P is infinity (z = 0), then again the formulas yield infinity, 210 * which is correct. Thus, this code works for all points. 211 * 212 * Cost: 8 multiplications 213 */ 214 static const uint16_t code_double[] = { 215 /* 216 * Compute z^2 (in t1). 217 */ 218 MMUL(t1, Pz, Pz), 219 220 /* 221 * Compute x-z^2 (in t2) and then x+z^2 (in t1). 222 */ 223 MSET(t2, Px), 224 MSUB(t2, t1), 225 MADD(t1, Px), 226 227 /* 228 * Compute m = 3*(x+z^2)*(x-z^2) (in t1). 229 */ 230 MMUL(t3, t1, t2), 231 MSET(t1, t3), 232 MADD(t1, t3), 233 MADD(t1, t3), 234 235 /* 236 * Compute s = 4*x*y^2 (in t2) and 2*y^2 (in t3). 237 */ 238 MMUL(t3, Py, Py), 239 MADD(t3, t3), 240 MMUL(t2, Px, t3), 241 MADD(t2, t2), 242 243 /* 244 * Compute x' = m^2 - 2*s. 245 */ 246 MMUL(Px, t1, t1), 247 MSUB(Px, t2), 248 MSUB(Px, t2), 249 250 /* 251 * Compute z' = 2*y*z. 252 */ 253 MMUL(t4, Py, Pz), 254 MSET(Pz, t4), 255 MADD(Pz, t4), 256 257 /* 258 * Compute y' = m*(s - x') - 8*y^4. Note that we already have 259 * 2*y^2 in t3. 260 */ 261 MSUB(t2, Px), 262 MMUL(Py, t1, t2), 263 MMUL(t4, t3, t3), 264 MSUB(Py, t4), 265 MSUB(Py, t4), 266 267 ENDCODE 268 }; 269 270 /* 271 * Addtions formulas are: 272 * 273 * u1 = x1 * z2^2 274 * u2 = x2 * z1^2 275 * s1 = y1 * z2^3 276 * s2 = y2 * z1^3 277 * h = u2 - u1 278 * r = s2 - s1 279 * x3 = r^2 - h^3 - 2 * u1 * h^2 280 * y3 = r * (u1 * h^2 - x3) - s1 * h^3 281 * z3 = h * z1 * z2 282 * 283 * If both P1 and P2 are infinity, then z1 == 0 and z2 == 0, implying that 284 * z3 == 0, so the result is correct. 285 * If either of P1 or P2 is infinity, but not both, then z3 == 0, which is 286 * not correct. 287 * h == 0 only if u1 == u2; this happens in two cases: 288 * -- if s1 == s2 then P1 and/or P2 is infinity, or P1 == P2 289 * -- if s1 != s2 then P1 + P2 == infinity (but neither P1 or P2 is infinity) 290 * 291 * Thus, the following situations are not handled correctly: 292 * -- P1 = 0 and P2 != 0 293 * -- P1 != 0 and P2 = 0 294 * -- P1 = P2 295 * All other cases are properly computed. However, even in "incorrect" 296 * situations, the three coordinates still are properly formed field 297 * elements. 298 * 299 * The returned flag is cleared if r == 0. This happens in the following 300 * cases: 301 * -- Both points are on the same horizontal line (same Y coordinate). 302 * -- Both points are infinity. 303 * -- One point is infinity and the other is on line Y = 0. 304 * The third case cannot happen with our curves (there is no valid point 305 * on line Y = 0 since that would be a point of order 2). If the two 306 * source points are non-infinity, then remains only the case where the 307 * two points are on the same horizontal line. 308 * 309 * This allows us to detect the "P1 == P2" case, assuming that P1 != 0 and 310 * P2 != 0: 311 * -- If the returned value is not the point at infinity, then it was properly 312 * computed. 313 * -- Otherwise, if the returned flag is 1, then P1+P2 = 0, and the result 314 * is indeed the point at infinity. 315 * -- Otherwise (result is infinity, flag is 0), then P1 = P2 and we should 316 * use the 'double' code. 317 * 318 * Cost: 16 multiplications 319 */ 320 static const uint16_t code_add[] = { 321 /* 322 * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3). 323 */ 324 MMUL(t3, P2z, P2z), 325 MMUL(t1, P1x, t3), 326 MMUL(t4, P2z, t3), 327 MMUL(t3, P1y, t4), 328 329 /* 330 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4). 331 */ 332 MMUL(t4, P1z, P1z), 333 MMUL(t2, P2x, t4), 334 MMUL(t5, P1z, t4), 335 MMUL(t4, P2y, t5), 336 337 /* 338 * Compute h = u2 - u1 (in t2) and r = s2 - s1 (in t4). 339 */ 340 MSUB(t2, t1), 341 MSUB(t4, t3), 342 343 /* 344 * Report cases where r = 0 through the returned flag. 345 */ 346 MTZ(t4), 347 348 /* 349 * Compute u1*h^2 (in t6) and h^3 (in t5). 350 */ 351 MMUL(t7, t2, t2), 352 MMUL(t6, t1, t7), 353 MMUL(t5, t7, t2), 354 355 /* 356 * Compute x3 = r^2 - h^3 - 2*u1*h^2. 357 * t1 and t7 can be used as scratch registers. 358 */ 359 MMUL(P1x, t4, t4), 360 MSUB(P1x, t5), 361 MSUB(P1x, t6), 362 MSUB(P1x, t6), 363 364 /* 365 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3. 366 */ 367 MSUB(t6, P1x), 368 MMUL(P1y, t4, t6), 369 MMUL(t1, t5, t3), 370 MSUB(P1y, t1), 371 372 /* 373 * Compute z3 = h*z1*z2. 374 */ 375 MMUL(t1, P1z, P2z), 376 MMUL(P1z, t1, t2), 377 378 ENDCODE 379 }; 380 381 /* 382 * Check that the point is on the curve. This code snippet assumes the 383 * following conventions: 384 * -- Coordinates x and y have been freshly decoded in P1 (but not 385 * converted to Montgomery coordinates yet). 386 * -- P2x, P2y and P2z are set to, respectively, R^2, b*R and 1. 387 */ 388 static const uint16_t code_check[] = { 389 390 /* Convert x and y to Montgomery representation. */ 391 MMUL(t1, P1x, P2x), 392 MMUL(t2, P1y, P2x), 393 MSET(P1x, t1), 394 MSET(P1y, t2), 395 396 /* Compute x^3 in t1. */ 397 MMUL(t2, P1x, P1x), 398 MMUL(t1, P1x, t2), 399 400 /* Subtract 3*x from t1. */ 401 MSUB(t1, P1x), 402 MSUB(t1, P1x), 403 MSUB(t1, P1x), 404 405 /* Add b. */ 406 MADD(t1, P2y), 407 408 /* Compute y^2 in t2. */ 409 MMUL(t2, P1y, P1y), 410 411 /* Compare y^2 with x^3 - 3*x + b; they must match. */ 412 MSUB(t1, t2), 413 MTZ(t1), 414 415 /* Set z to 1 (in Montgomery representation). */ 416 MMUL(P1z, P2x, P2z), 417 418 ENDCODE 419 }; 420 421 /* 422 * Conversion back to affine coordinates. This code snippet assumes that 423 * the z coordinate of P2 is set to 1 (not in Montgomery representation). 424 */ 425 static const uint16_t code_affine[] = { 426 427 /* Save z*R in t1. */ 428 MSET(t1, P1z), 429 430 /* Compute z^3 in t2. */ 431 MMUL(t2, P1z, P1z), 432 MMUL(t3, P1z, t2), 433 MMUL(t2, t3, P2z), 434 435 /* Invert to (1/z^3) in t2. */ 436 MINV(t2, t3, t4), 437 438 /* Compute y. */ 439 MSET(t3, P1y), 440 MMUL(P1y, t2, t3), 441 442 /* Compute (1/z^2) in t3. */ 443 MMUL(t3, t2, t1), 444 445 /* Compute x. */ 446 MSET(t2, P1x), 447 MMUL(P1x, t2, t3), 448 449 ENDCODE 450 }; 451 452 static uint32_t 453 run_code(jacobian *P1, const jacobian *P2, 454 const curve_params *cc, const uint16_t *code) 455 { 456 uint32_t r; 457 uint16_t t[13][I15_LEN]; 458 size_t u; 459 460 r = 1; 461 462 /* 463 * Copy the two operands in the dedicated registers. 464 */ 465 memcpy(t[P1x], P1->c, 3 * I15_LEN * sizeof(uint16_t)); 466 memcpy(t[P2x], P2->c, 3 * I15_LEN * sizeof(uint16_t)); 467 468 /* 469 * Run formulas. 470 */ 471 for (u = 0;; u ++) { 472 unsigned op, d, a, b; 473 474 op = code[u]; 475 if (op == 0) { 476 break; 477 } 478 d = (op >> 8) & 0x0F; 479 a = (op >> 4) & 0x0F; 480 b = op & 0x0F; 481 op >>= 12; 482 switch (op) { 483 uint32_t ctl; 484 size_t plen; 485 unsigned char tp[(BR_MAX_EC_SIZE + 7) >> 3]; 486 487 case 0: 488 memcpy(t[d], t[a], I15_LEN * sizeof(uint16_t)); 489 break; 490 case 1: 491 ctl = br_i15_add(t[d], t[a], 1); 492 ctl |= NOT(br_i15_sub(t[d], cc->p, 0)); 493 br_i15_sub(t[d], cc->p, ctl); 494 break; 495 case 2: 496 br_i15_add(t[d], cc->p, br_i15_sub(t[d], t[a], 1)); 497 break; 498 case 3: 499 br_i15_montymul(t[d], t[a], t[b], cc->p, cc->p0i); 500 break; 501 case 4: 502 plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3; 503 br_i15_encode(tp, plen, cc->p); 504 tp[plen - 1] -= 2; 505 br_i15_modpow(t[d], tp, plen, 506 cc->p, cc->p0i, t[a], t[b]); 507 break; 508 default: 509 r &= ~br_i15_iszero(t[d]); 510 break; 511 } 512 } 513 514 /* 515 * Copy back result. 516 */ 517 memcpy(P1->c, t[P1x], 3 * I15_LEN * sizeof(uint16_t)); 518 return r; 519 } 520 521 static void 522 set_one(uint16_t *x, const uint16_t *p) 523 { 524 size_t plen; 525 526 plen = (p[0] + 31) >> 4; 527 memset(x, 0, plen * sizeof *x); 528 x[0] = p[0]; 529 x[1] = 0x0001; 530 } 531 532 static void 533 point_zero(jacobian *P, const curve_params *cc) 534 { 535 memset(P, 0, sizeof *P); 536 P->c[0][0] = P->c[1][0] = P->c[2][0] = cc->p[0]; 537 } 538 539 static inline void 540 point_double(jacobian *P, const curve_params *cc) 541 { 542 run_code(P, P, cc, code_double); 543 } 544 545 static inline uint32_t 546 point_add(jacobian *P1, const jacobian *P2, const curve_params *cc) 547 { 548 return run_code(P1, P2, cc, code_add); 549 } 550 551 static void 552 point_mul(jacobian *P, const unsigned char *x, size_t xlen, 553 const curve_params *cc) 554 { 555 /* 556 * We do a simple double-and-add ladder with a 2-bit window 557 * to make only one add every two doublings. We thus first 558 * precompute 2P and 3P in some local buffers. 559 * 560 * We always perform two doublings and one addition; the 561 * addition is with P, 2P and 3P and is done in a temporary 562 * array. 563 * 564 * The addition code cannot handle cases where one of the 565 * operands is infinity, which is the case at the start of the 566 * ladder. We therefore need to maintain a flag that controls 567 * this situation. 568 */ 569 uint32_t qz; 570 jacobian P2, P3, Q, T, U; 571 572 memcpy(&P2, P, sizeof P2); 573 point_double(&P2, cc); 574 memcpy(&P3, P, sizeof P3); 575 point_add(&P3, &P2, cc); 576 577 point_zero(&Q, cc); 578 qz = 1; 579 while (xlen -- > 0) { 580 int k; 581 582 for (k = 6; k >= 0; k -= 2) { 583 uint32_t bits; 584 uint32_t bnz; 585 586 point_double(&Q, cc); 587 point_double(&Q, cc); 588 memcpy(&T, P, sizeof T); 589 memcpy(&U, &Q, sizeof U); 590 bits = (*x >> k) & (uint32_t)3; 591 bnz = NEQ(bits, 0); 592 CCOPY(EQ(bits, 2), &T, &P2, sizeof T); 593 CCOPY(EQ(bits, 3), &T, &P3, sizeof T); 594 point_add(&U, &T, cc); 595 CCOPY(bnz & qz, &Q, &T, sizeof Q); 596 CCOPY(bnz & ~qz, &Q, &U, sizeof Q); 597 qz &= ~bnz; 598 } 599 x ++; 600 } 601 memcpy(P, &Q, sizeof Q); 602 } 603 604 /* 605 * Decode point into Jacobian coordinates. This function does not support 606 * the point at infinity. If the point is invalid then this returns 0, but 607 * the coordinates are still set to properly formed field elements. 608 */ 609 static uint32_t 610 point_decode(jacobian *P, const void *src, size_t len, const curve_params *cc) 611 { 612 /* 613 * Points must use uncompressed format: 614 * -- first byte is 0x04; 615 * -- coordinates X and Y use unsigned big-endian, with the same 616 * length as the field modulus. 617 * 618 * We don't support hybrid format (uncompressed, but first byte 619 * has value 0x06 or 0x07, depending on the least significant bit 620 * of Y) because it is rather useless, and explicitly forbidden 621 * by PKIX (RFC 5480, section 2.2). 622 * 623 * We don't support compressed format either, because it is not 624 * much used in practice (there are or were patent-related 625 * concerns about point compression, which explains the lack of 626 * generalised support). Also, point compression support would 627 * need a bit more code. 628 */ 629 const unsigned char *buf; 630 size_t plen, zlen; 631 uint32_t r; 632 jacobian Q; 633 634 buf = src; 635 point_zero(P, cc); 636 plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3; 637 if (len != 1 + (plen << 1)) { 638 return 0; 639 } 640 r = br_i15_decode_mod(P->c[0], buf + 1, plen, cc->p); 641 r &= br_i15_decode_mod(P->c[1], buf + 1 + plen, plen, cc->p); 642 643 /* 644 * Check first byte. 645 */ 646 r &= EQ(buf[0], 0x04); 647 /* obsolete 648 r &= EQ(buf[0], 0x04) | (EQ(buf[0] & 0xFE, 0x06) 649 & ~(uint32_t)(buf[0] ^ buf[plen << 1])); 650 */ 651 652 /* 653 * Convert coordinates and check that the point is valid. 654 */ 655 zlen = ((cc->p[0] + 31) >> 4) * sizeof(uint16_t); 656 memcpy(Q.c[0], cc->R2, zlen); 657 memcpy(Q.c[1], cc->b, zlen); 658 set_one(Q.c[2], cc->p); 659 r &= ~run_code(P, &Q, cc, code_check); 660 return r; 661 } 662 663 /* 664 * Encode a point. This method assumes that the point is correct and is 665 * not the point at infinity. Encoded size is always 1+2*plen, where 666 * plen is the field modulus length, in bytes. 667 */ 668 static void 669 point_encode(void *dst, const jacobian *P, const curve_params *cc) 670 { 671 unsigned char *buf; 672 size_t plen; 673 jacobian Q, T; 674 675 buf = dst; 676 plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3; 677 buf[0] = 0x04; 678 memcpy(&Q, P, sizeof *P); 679 set_one(T.c[2], cc->p); 680 run_code(&Q, &T, cc, code_affine); 681 br_i15_encode(buf + 1, plen, Q.c[0]); 682 br_i15_encode(buf + 1 + plen, plen, Q.c[1]); 683 } 684 685 static const br_ec_curve_def * 686 id_to_curve_def(int curve) 687 { 688 switch (curve) { 689 case BR_EC_secp256r1: 690 return &br_secp256r1; 691 case BR_EC_secp384r1: 692 return &br_secp384r1; 693 case BR_EC_secp521r1: 694 return &br_secp521r1; 695 } 696 return NULL; 697 } 698 699 static const unsigned char * 700 api_generator(int curve, size_t *len) 701 { 702 const br_ec_curve_def *cd; 703 704 cd = id_to_curve_def(curve); 705 *len = cd->generator_len; 706 return cd->generator; 707 } 708 709 static const unsigned char * 710 api_order(int curve, size_t *len) 711 { 712 const br_ec_curve_def *cd; 713 714 cd = id_to_curve_def(curve); 715 *len = cd->order_len; 716 return cd->order; 717 } 718 719 static size_t 720 api_xoff(int curve, size_t *len) 721 { 722 api_generator(curve, len); 723 *len >>= 1; 724 return 1; 725 } 726 727 static uint32_t 728 api_mul(unsigned char *G, size_t Glen, 729 const unsigned char *x, size_t xlen, int curve) 730 { 731 uint32_t r; 732 const curve_params *cc; 733 jacobian P; 734 735 cc = id_to_curve(curve); 736 r = point_decode(&P, G, Glen, cc); 737 point_mul(&P, x, xlen, cc); 738 if (Glen == cc->point_len) { 739 point_encode(G, &P, cc); 740 } 741 return r; 742 } 743 744 static size_t 745 api_mulgen(unsigned char *R, 746 const unsigned char *x, size_t xlen, int curve) 747 { 748 const unsigned char *G; 749 size_t Glen; 750 751 G = api_generator(curve, &Glen); 752 memcpy(R, G, Glen); 753 api_mul(R, Glen, x, xlen, curve); 754 return Glen; 755 } 756 757 static uint32_t 758 api_muladd(unsigned char *A, const unsigned char *B, size_t len, 759 const unsigned char *x, size_t xlen, 760 const unsigned char *y, size_t ylen, int curve) 761 { 762 uint32_t r, t, z; 763 const curve_params *cc; 764 jacobian P, Q; 765 766 /* 767 * TODO: see about merging the two ladders. Right now, we do 768 * two independent point multiplications, which is a bit 769 * wasteful of CPU resources (but yields short code). 770 */ 771 772 cc = id_to_curve(curve); 773 r = point_decode(&P, A, len, cc); 774 if (B == NULL) { 775 size_t Glen; 776 777 B = api_generator(curve, &Glen); 778 } 779 r &= point_decode(&Q, B, len, cc); 780 point_mul(&P, x, xlen, cc); 781 point_mul(&Q, y, ylen, cc); 782 783 /* 784 * We want to compute P+Q. Since the base points A and B are distinct 785 * from infinity, and the multipliers are non-zero and lower than the 786 * curve order, then we know that P and Q are non-infinity. This 787 * leaves two special situations to test for: 788 * -- If P = Q then we must use point_double(). 789 * -- If P+Q = 0 then we must report an error. 790 */ 791 t = point_add(&P, &Q, cc); 792 point_double(&Q, cc); 793 z = br_i15_iszero(P.c[2]); 794 795 /* 796 * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we 797 * have the following: 798 * 799 * z = 0, t = 0 return P (normal addition) 800 * z = 0, t = 1 return P (normal addition) 801 * z = 1, t = 0 return Q (a 'double' case) 802 * z = 1, t = 1 report an error (P+Q = 0) 803 */ 804 CCOPY(z & ~t, &P, &Q, sizeof Q); 805 point_encode(A, &P, cc); 806 r &= ~(z & t); 807 808 return r; 809 } 810 811 /* see bearssl_ec.h */ 812 const br_ec_impl br_ec_prime_i15 = { 813 (uint32_t)0x03800000, 814 &api_generator, 815 &api_order, 816 &api_xoff, 817 &api_mul, 818 &api_mulgen, 819 &api_muladd 820 }; 821