1 /* crypto/ec/ecp_nistp521.c */ 2 /* 3 * Written by Adam Langley (Google) for the OpenSSL project 4 */ 5 /* Copyright 2011 Google Inc. 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License"); 8 * 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 /* 22 * A 64-bit implementation of the NIST P-521 elliptic curve point multiplication 23 * 24 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c. 25 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519 26 * work which got its smarts from Daniel J. Bernstein's work on the same. 27 */ 28 29 #include <openssl/opensslconf.h> 30 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 31 32 # ifndef OPENSSL_SYS_VMS 33 # include <stdint.h> 34 # else 35 # include <inttypes.h> 36 # endif 37 38 # include <string.h> 39 # include <openssl/err.h> 40 # include "ec_lcl.h" 41 42 # if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) 43 /* even with gcc, the typedef won't work for 32-bit platforms */ 44 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit 45 * platforms */ 46 # else 47 # error "Need GCC 3.1 or later to define type uint128_t" 48 # endif 49 50 typedef uint8_t u8; 51 typedef uint64_t u64; 52 53 /* 54 * The underlying field. P521 operates over GF(2^521-1). We can serialise an 55 * element of this field into 66 bytes where the most significant byte 56 * contains only a single bit. We call this an felem_bytearray. 57 */ 58 59 typedef u8 felem_bytearray[66]; 60 61 /* 62 * These are the parameters of P521, taken from FIPS 186-3, section D.1.2.5. 63 * These values are big-endian. 64 */ 65 static const felem_bytearray nistp521_curve_params[5] = { 66 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* p */ 67 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 68 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 69 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 70 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 71 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 72 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 73 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 74 0xff, 0xff}, 75 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* a = -3 */ 76 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 77 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 78 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 79 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 80 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 81 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 82 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 83 0xff, 0xfc}, 84 {0x00, 0x51, 0x95, 0x3e, 0xb9, 0x61, 0x8e, 0x1c, /* b */ 85 0x9a, 0x1f, 0x92, 0x9a, 0x21, 0xa0, 0xb6, 0x85, 86 0x40, 0xee, 0xa2, 0xda, 0x72, 0x5b, 0x99, 0xb3, 87 0x15, 0xf3, 0xb8, 0xb4, 0x89, 0x91, 0x8e, 0xf1, 88 0x09, 0xe1, 0x56, 0x19, 0x39, 0x51, 0xec, 0x7e, 89 0x93, 0x7b, 0x16, 0x52, 0xc0, 0xbd, 0x3b, 0xb1, 90 0xbf, 0x07, 0x35, 0x73, 0xdf, 0x88, 0x3d, 0x2c, 91 0x34, 0xf1, 0xef, 0x45, 0x1f, 0xd4, 0x6b, 0x50, 92 0x3f, 0x00}, 93 {0x00, 0xc6, 0x85, 0x8e, 0x06, 0xb7, 0x04, 0x04, /* x */ 94 0xe9, 0xcd, 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95, 95 0xb4, 0x42, 0x9c, 0x64, 0x81, 0x39, 0x05, 0x3f, 96 0xb5, 0x21, 0xf8, 0x28, 0xaf, 0x60, 0x6b, 0x4d, 97 0x3d, 0xba, 0xa1, 0x4b, 0x5e, 0x77, 0xef, 0xe7, 98 0x59, 0x28, 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff, 99 0xa8, 0xde, 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a, 100 0x42, 0x9b, 0xf9, 0x7e, 0x7e, 0x31, 0xc2, 0xe5, 101 0xbd, 0x66}, 102 {0x01, 0x18, 0x39, 0x29, 0x6a, 0x78, 0x9a, 0x3b, /* y */ 103 0xc0, 0x04, 0x5c, 0x8a, 0x5f, 0xb4, 0x2c, 0x7d, 104 0x1b, 0xd9, 0x98, 0xf5, 0x44, 0x49, 0x57, 0x9b, 105 0x44, 0x68, 0x17, 0xaf, 0xbd, 0x17, 0x27, 0x3e, 106 0x66, 0x2c, 0x97, 0xee, 0x72, 0x99, 0x5e, 0xf4, 107 0x26, 0x40, 0xc5, 0x50, 0xb9, 0x01, 0x3f, 0xad, 108 0x07, 0x61, 0x35, 0x3c, 0x70, 0x86, 0xa2, 0x72, 109 0xc2, 0x40, 0x88, 0xbe, 0x94, 0x76, 0x9f, 0xd1, 110 0x66, 0x50} 111 }; 112 113 /*- 114 * The representation of field elements. 115 * ------------------------------------ 116 * 117 * We represent field elements with nine values. These values are either 64 or 118 * 128 bits and the field element represented is: 119 * v[0]*2^0 + v[1]*2^58 + v[2]*2^116 + ... + v[8]*2^464 (mod p) 120 * Each of the nine values is called a 'limb'. Since the limbs are spaced only 121 * 58 bits apart, but are greater than 58 bits in length, the most significant 122 * bits of each limb overlap with the least significant bits of the next. 123 * 124 * A field element with 64-bit limbs is an 'felem'. One with 128-bit limbs is a 125 * 'largefelem' */ 126 127 # define NLIMBS 9 128 129 typedef uint64_t limb; 130 typedef limb felem[NLIMBS]; 131 typedef uint128_t largefelem[NLIMBS]; 132 133 static const limb bottom57bits = 0x1ffffffffffffff; 134 static const limb bottom58bits = 0x3ffffffffffffff; 135 136 /* 137 * bin66_to_felem takes a little-endian byte array and converts it into felem 138 * form. This assumes that the CPU is little-endian. 139 */ 140 static void bin66_to_felem(felem out, const u8 in[66]) 141 { 142 out[0] = (*((limb *) & in[0])) & bottom58bits; 143 out[1] = (*((limb *) & in[7]) >> 2) & bottom58bits; 144 out[2] = (*((limb *) & in[14]) >> 4) & bottom58bits; 145 out[3] = (*((limb *) & in[21]) >> 6) & bottom58bits; 146 out[4] = (*((limb *) & in[29])) & bottom58bits; 147 out[5] = (*((limb *) & in[36]) >> 2) & bottom58bits; 148 out[6] = (*((limb *) & in[43]) >> 4) & bottom58bits; 149 out[7] = (*((limb *) & in[50]) >> 6) & bottom58bits; 150 out[8] = (*((limb *) & in[58])) & bottom57bits; 151 } 152 153 /* 154 * felem_to_bin66 takes an felem and serialises into a little endian, 66 byte 155 * array. This assumes that the CPU is little-endian. 156 */ 157 static void felem_to_bin66(u8 out[66], const felem in) 158 { 159 memset(out, 0, 66); 160 (*((limb *) & out[0])) = in[0]; 161 (*((limb *) & out[7])) |= in[1] << 2; 162 (*((limb *) & out[14])) |= in[2] << 4; 163 (*((limb *) & out[21])) |= in[3] << 6; 164 (*((limb *) & out[29])) = in[4]; 165 (*((limb *) & out[36])) |= in[5] << 2; 166 (*((limb *) & out[43])) |= in[6] << 4; 167 (*((limb *) & out[50])) |= in[7] << 6; 168 (*((limb *) & out[58])) = in[8]; 169 } 170 171 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */ 172 static void flip_endian(u8 *out, const u8 *in, unsigned len) 173 { 174 unsigned i; 175 for (i = 0; i < len; ++i) 176 out[i] = in[len - 1 - i]; 177 } 178 179 /* BN_to_felem converts an OpenSSL BIGNUM into an felem */ 180 static int BN_to_felem(felem out, const BIGNUM *bn) 181 { 182 felem_bytearray b_in; 183 felem_bytearray b_out; 184 unsigned num_bytes; 185 186 /* BN_bn2bin eats leading zeroes */ 187 memset(b_out, 0, sizeof(b_out)); 188 num_bytes = BN_num_bytes(bn); 189 if (num_bytes > sizeof(b_out)) { 190 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); 191 return 0; 192 } 193 if (BN_is_negative(bn)) { 194 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); 195 return 0; 196 } 197 num_bytes = BN_bn2bin(bn, b_in); 198 flip_endian(b_out, b_in, num_bytes); 199 bin66_to_felem(out, b_out); 200 return 1; 201 } 202 203 /* felem_to_BN converts an felem into an OpenSSL BIGNUM */ 204 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in) 205 { 206 felem_bytearray b_in, b_out; 207 felem_to_bin66(b_in, in); 208 flip_endian(b_out, b_in, sizeof(b_out)); 209 return BN_bin2bn(b_out, sizeof(b_out), out); 210 } 211 212 /*- 213 * Field operations 214 * ---------------- 215 */ 216 217 static void felem_one(felem out) 218 { 219 out[0] = 1; 220 out[1] = 0; 221 out[2] = 0; 222 out[3] = 0; 223 out[4] = 0; 224 out[5] = 0; 225 out[6] = 0; 226 out[7] = 0; 227 out[8] = 0; 228 } 229 230 static void felem_assign(felem out, const felem in) 231 { 232 out[0] = in[0]; 233 out[1] = in[1]; 234 out[2] = in[2]; 235 out[3] = in[3]; 236 out[4] = in[4]; 237 out[5] = in[5]; 238 out[6] = in[6]; 239 out[7] = in[7]; 240 out[8] = in[8]; 241 } 242 243 /* felem_sum64 sets out = out + in. */ 244 static void felem_sum64(felem out, const felem in) 245 { 246 out[0] += in[0]; 247 out[1] += in[1]; 248 out[2] += in[2]; 249 out[3] += in[3]; 250 out[4] += in[4]; 251 out[5] += in[5]; 252 out[6] += in[6]; 253 out[7] += in[7]; 254 out[8] += in[8]; 255 } 256 257 /* felem_scalar sets out = in * scalar */ 258 static void felem_scalar(felem out, const felem in, limb scalar) 259 { 260 out[0] = in[0] * scalar; 261 out[1] = in[1] * scalar; 262 out[2] = in[2] * scalar; 263 out[3] = in[3] * scalar; 264 out[4] = in[4] * scalar; 265 out[5] = in[5] * scalar; 266 out[6] = in[6] * scalar; 267 out[7] = in[7] * scalar; 268 out[8] = in[8] * scalar; 269 } 270 271 /* felem_scalar64 sets out = out * scalar */ 272 static void felem_scalar64(felem out, limb scalar) 273 { 274 out[0] *= scalar; 275 out[1] *= scalar; 276 out[2] *= scalar; 277 out[3] *= scalar; 278 out[4] *= scalar; 279 out[5] *= scalar; 280 out[6] *= scalar; 281 out[7] *= scalar; 282 out[8] *= scalar; 283 } 284 285 /* felem_scalar128 sets out = out * scalar */ 286 static void felem_scalar128(largefelem out, limb scalar) 287 { 288 out[0] *= scalar; 289 out[1] *= scalar; 290 out[2] *= scalar; 291 out[3] *= scalar; 292 out[4] *= scalar; 293 out[5] *= scalar; 294 out[6] *= scalar; 295 out[7] *= scalar; 296 out[8] *= scalar; 297 } 298 299 /*- 300 * felem_neg sets |out| to |-in| 301 * On entry: 302 * in[i] < 2^59 + 2^14 303 * On exit: 304 * out[i] < 2^62 305 */ 306 static void felem_neg(felem out, const felem in) 307 { 308 /* In order to prevent underflow, we subtract from 0 mod p. */ 309 static const limb two62m3 = (((limb) 1) << 62) - (((limb) 1) << 5); 310 static const limb two62m2 = (((limb) 1) << 62) - (((limb) 1) << 4); 311 312 out[0] = two62m3 - in[0]; 313 out[1] = two62m2 - in[1]; 314 out[2] = two62m2 - in[2]; 315 out[3] = two62m2 - in[3]; 316 out[4] = two62m2 - in[4]; 317 out[5] = two62m2 - in[5]; 318 out[6] = two62m2 - in[6]; 319 out[7] = two62m2 - in[7]; 320 out[8] = two62m2 - in[8]; 321 } 322 323 /*- 324 * felem_diff64 subtracts |in| from |out| 325 * On entry: 326 * in[i] < 2^59 + 2^14 327 * On exit: 328 * out[i] < out[i] + 2^62 329 */ 330 static void felem_diff64(felem out, const felem in) 331 { 332 /* 333 * In order to prevent underflow, we add 0 mod p before subtracting. 334 */ 335 static const limb two62m3 = (((limb) 1) << 62) - (((limb) 1) << 5); 336 static const limb two62m2 = (((limb) 1) << 62) - (((limb) 1) << 4); 337 338 out[0] += two62m3 - in[0]; 339 out[1] += two62m2 - in[1]; 340 out[2] += two62m2 - in[2]; 341 out[3] += two62m2 - in[3]; 342 out[4] += two62m2 - in[4]; 343 out[5] += two62m2 - in[5]; 344 out[6] += two62m2 - in[6]; 345 out[7] += two62m2 - in[7]; 346 out[8] += two62m2 - in[8]; 347 } 348 349 /*- 350 * felem_diff_128_64 subtracts |in| from |out| 351 * On entry: 352 * in[i] < 2^62 + 2^17 353 * On exit: 354 * out[i] < out[i] + 2^63 355 */ 356 static void felem_diff_128_64(largefelem out, const felem in) 357 { 358 /* 359 * In order to prevent underflow, we add 0 mod p before subtracting. 360 */ 361 static const limb two63m6 = (((limb) 1) << 62) - (((limb) 1) << 5); 362 static const limb two63m5 = (((limb) 1) << 62) - (((limb) 1) << 4); 363 364 out[0] += two63m6 - in[0]; 365 out[1] += two63m5 - in[1]; 366 out[2] += two63m5 - in[2]; 367 out[3] += two63m5 - in[3]; 368 out[4] += two63m5 - in[4]; 369 out[5] += two63m5 - in[5]; 370 out[6] += two63m5 - in[6]; 371 out[7] += two63m5 - in[7]; 372 out[8] += two63m5 - in[8]; 373 } 374 375 /*- 376 * felem_diff_128_64 subtracts |in| from |out| 377 * On entry: 378 * in[i] < 2^126 379 * On exit: 380 * out[i] < out[i] + 2^127 - 2^69 381 */ 382 static void felem_diff128(largefelem out, const largefelem in) 383 { 384 /* 385 * In order to prevent underflow, we add 0 mod p before subtracting. 386 */ 387 static const uint128_t two127m70 = 388 (((uint128_t) 1) << 127) - (((uint128_t) 1) << 70); 389 static const uint128_t two127m69 = 390 (((uint128_t) 1) << 127) - (((uint128_t) 1) << 69); 391 392 out[0] += (two127m70 - in[0]); 393 out[1] += (two127m69 - in[1]); 394 out[2] += (two127m69 - in[2]); 395 out[3] += (two127m69 - in[3]); 396 out[4] += (two127m69 - in[4]); 397 out[5] += (two127m69 - in[5]); 398 out[6] += (two127m69 - in[6]); 399 out[7] += (two127m69 - in[7]); 400 out[8] += (two127m69 - in[8]); 401 } 402 403 /*- 404 * felem_square sets |out| = |in|^2 405 * On entry: 406 * in[i] < 2^62 407 * On exit: 408 * out[i] < 17 * max(in[i]) * max(in[i]) 409 */ 410 static void felem_square(largefelem out, const felem in) 411 { 412 felem inx2, inx4; 413 felem_scalar(inx2, in, 2); 414 felem_scalar(inx4, in, 4); 415 416 /*- 417 * We have many cases were we want to do 418 * in[x] * in[y] + 419 * in[y] * in[x] 420 * This is obviously just 421 * 2 * in[x] * in[y] 422 * However, rather than do the doubling on the 128 bit result, we 423 * double one of the inputs to the multiplication by reading from 424 * |inx2| 425 */ 426 427 out[0] = ((uint128_t) in[0]) * in[0]; 428 out[1] = ((uint128_t) in[0]) * inx2[1]; 429 out[2] = ((uint128_t) in[0]) * inx2[2] + ((uint128_t) in[1]) * in[1]; 430 out[3] = ((uint128_t) in[0]) * inx2[3] + ((uint128_t) in[1]) * inx2[2]; 431 out[4] = ((uint128_t) in[0]) * inx2[4] + 432 ((uint128_t) in[1]) * inx2[3] + ((uint128_t) in[2]) * in[2]; 433 out[5] = ((uint128_t) in[0]) * inx2[5] + 434 ((uint128_t) in[1]) * inx2[4] + ((uint128_t) in[2]) * inx2[3]; 435 out[6] = ((uint128_t) in[0]) * inx2[6] + 436 ((uint128_t) in[1]) * inx2[5] + 437 ((uint128_t) in[2]) * inx2[4] + ((uint128_t) in[3]) * in[3]; 438 out[7] = ((uint128_t) in[0]) * inx2[7] + 439 ((uint128_t) in[1]) * inx2[6] + 440 ((uint128_t) in[2]) * inx2[5] + ((uint128_t) in[3]) * inx2[4]; 441 out[8] = ((uint128_t) in[0]) * inx2[8] + 442 ((uint128_t) in[1]) * inx2[7] + 443 ((uint128_t) in[2]) * inx2[6] + 444 ((uint128_t) in[3]) * inx2[5] + ((uint128_t) in[4]) * in[4]; 445 446 /* 447 * The remaining limbs fall above 2^521, with the first falling at 2^522. 448 * They correspond to locations one bit up from the limbs produced above 449 * so we would have to multiply by two to align them. Again, rather than 450 * operate on the 128-bit result, we double one of the inputs to the 451 * multiplication. If we want to double for both this reason, and the 452 * reason above, then we end up multiplying by four. 453 */ 454 455 /* 9 */ 456 out[0] += ((uint128_t) in[1]) * inx4[8] + 457 ((uint128_t) in[2]) * inx4[7] + 458 ((uint128_t) in[3]) * inx4[6] + ((uint128_t) in[4]) * inx4[5]; 459 460 /* 10 */ 461 out[1] += ((uint128_t) in[2]) * inx4[8] + 462 ((uint128_t) in[3]) * inx4[7] + 463 ((uint128_t) in[4]) * inx4[6] + ((uint128_t) in[5]) * inx2[5]; 464 465 /* 11 */ 466 out[2] += ((uint128_t) in[3]) * inx4[8] + 467 ((uint128_t) in[4]) * inx4[7] + ((uint128_t) in[5]) * inx4[6]; 468 469 /* 12 */ 470 out[3] += ((uint128_t) in[4]) * inx4[8] + 471 ((uint128_t) in[5]) * inx4[7] + ((uint128_t) in[6]) * inx2[6]; 472 473 /* 13 */ 474 out[4] += ((uint128_t) in[5]) * inx4[8] + ((uint128_t) in[6]) * inx4[7]; 475 476 /* 14 */ 477 out[5] += ((uint128_t) in[6]) * inx4[8] + ((uint128_t) in[7]) * inx2[7]; 478 479 /* 15 */ 480 out[6] += ((uint128_t) in[7]) * inx4[8]; 481 482 /* 16 */ 483 out[7] += ((uint128_t) in[8]) * inx2[8]; 484 } 485 486 /*- 487 * felem_mul sets |out| = |in1| * |in2| 488 * On entry: 489 * in1[i] < 2^64 490 * in2[i] < 2^63 491 * On exit: 492 * out[i] < 17 * max(in1[i]) * max(in2[i]) 493 */ 494 static void felem_mul(largefelem out, const felem in1, const felem in2) 495 { 496 felem in2x2; 497 felem_scalar(in2x2, in2, 2); 498 499 out[0] = ((uint128_t) in1[0]) * in2[0]; 500 501 out[1] = ((uint128_t) in1[0]) * in2[1] + ((uint128_t) in1[1]) * in2[0]; 502 503 out[2] = ((uint128_t) in1[0]) * in2[2] + 504 ((uint128_t) in1[1]) * in2[1] + ((uint128_t) in1[2]) * in2[0]; 505 506 out[3] = ((uint128_t) in1[0]) * in2[3] + 507 ((uint128_t) in1[1]) * in2[2] + 508 ((uint128_t) in1[2]) * in2[1] + ((uint128_t) in1[3]) * in2[0]; 509 510 out[4] = ((uint128_t) in1[0]) * in2[4] + 511 ((uint128_t) in1[1]) * in2[3] + 512 ((uint128_t) in1[2]) * in2[2] + 513 ((uint128_t) in1[3]) * in2[1] + ((uint128_t) in1[4]) * in2[0]; 514 515 out[5] = ((uint128_t) in1[0]) * in2[5] + 516 ((uint128_t) in1[1]) * in2[4] + 517 ((uint128_t) in1[2]) * in2[3] + 518 ((uint128_t) in1[3]) * in2[2] + 519 ((uint128_t) in1[4]) * in2[1] + ((uint128_t) in1[5]) * in2[0]; 520 521 out[6] = ((uint128_t) in1[0]) * in2[6] + 522 ((uint128_t) in1[1]) * in2[5] + 523 ((uint128_t) in1[2]) * in2[4] + 524 ((uint128_t) in1[3]) * in2[3] + 525 ((uint128_t) in1[4]) * in2[2] + 526 ((uint128_t) in1[5]) * in2[1] + ((uint128_t) in1[6]) * in2[0]; 527 528 out[7] = ((uint128_t) in1[0]) * in2[7] + 529 ((uint128_t) in1[1]) * in2[6] + 530 ((uint128_t) in1[2]) * in2[5] + 531 ((uint128_t) in1[3]) * in2[4] + 532 ((uint128_t) in1[4]) * in2[3] + 533 ((uint128_t) in1[5]) * in2[2] + 534 ((uint128_t) in1[6]) * in2[1] + ((uint128_t) in1[7]) * in2[0]; 535 536 out[8] = ((uint128_t) in1[0]) * in2[8] + 537 ((uint128_t) in1[1]) * in2[7] + 538 ((uint128_t) in1[2]) * in2[6] + 539 ((uint128_t) in1[3]) * in2[5] + 540 ((uint128_t) in1[4]) * in2[4] + 541 ((uint128_t) in1[5]) * in2[3] + 542 ((uint128_t) in1[6]) * in2[2] + 543 ((uint128_t) in1[7]) * in2[1] + ((uint128_t) in1[8]) * in2[0]; 544 545 /* See comment in felem_square about the use of in2x2 here */ 546 547 out[0] += ((uint128_t) in1[1]) * in2x2[8] + 548 ((uint128_t) in1[2]) * in2x2[7] + 549 ((uint128_t) in1[3]) * in2x2[6] + 550 ((uint128_t) in1[4]) * in2x2[5] + 551 ((uint128_t) in1[5]) * in2x2[4] + 552 ((uint128_t) in1[6]) * in2x2[3] + 553 ((uint128_t) in1[7]) * in2x2[2] + ((uint128_t) in1[8]) * in2x2[1]; 554 555 out[1] += ((uint128_t) in1[2]) * in2x2[8] + 556 ((uint128_t) in1[3]) * in2x2[7] + 557 ((uint128_t) in1[4]) * in2x2[6] + 558 ((uint128_t) in1[5]) * in2x2[5] + 559 ((uint128_t) in1[6]) * in2x2[4] + 560 ((uint128_t) in1[7]) * in2x2[3] + ((uint128_t) in1[8]) * in2x2[2]; 561 562 out[2] += ((uint128_t) in1[3]) * in2x2[8] + 563 ((uint128_t) in1[4]) * in2x2[7] + 564 ((uint128_t) in1[5]) * in2x2[6] + 565 ((uint128_t) in1[6]) * in2x2[5] + 566 ((uint128_t) in1[7]) * in2x2[4] + ((uint128_t) in1[8]) * in2x2[3]; 567 568 out[3] += ((uint128_t) in1[4]) * in2x2[8] + 569 ((uint128_t) in1[5]) * in2x2[7] + 570 ((uint128_t) in1[6]) * in2x2[6] + 571 ((uint128_t) in1[7]) * in2x2[5] + ((uint128_t) in1[8]) * in2x2[4]; 572 573 out[4] += ((uint128_t) in1[5]) * in2x2[8] + 574 ((uint128_t) in1[6]) * in2x2[7] + 575 ((uint128_t) in1[7]) * in2x2[6] + ((uint128_t) in1[8]) * in2x2[5]; 576 577 out[5] += ((uint128_t) in1[6]) * in2x2[8] + 578 ((uint128_t) in1[7]) * in2x2[7] + ((uint128_t) in1[8]) * in2x2[6]; 579 580 out[6] += ((uint128_t) in1[7]) * in2x2[8] + 581 ((uint128_t) in1[8]) * in2x2[7]; 582 583 out[7] += ((uint128_t) in1[8]) * in2x2[8]; 584 } 585 586 static const limb bottom52bits = 0xfffffffffffff; 587 588 /*- 589 * felem_reduce converts a largefelem to an felem. 590 * On entry: 591 * in[i] < 2^128 592 * On exit: 593 * out[i] < 2^59 + 2^14 594 */ 595 static void felem_reduce(felem out, const largefelem in) 596 { 597 u64 overflow1, overflow2; 598 599 out[0] = ((limb) in[0]) & bottom58bits; 600 out[1] = ((limb) in[1]) & bottom58bits; 601 out[2] = ((limb) in[2]) & bottom58bits; 602 out[3] = ((limb) in[3]) & bottom58bits; 603 out[4] = ((limb) in[4]) & bottom58bits; 604 out[5] = ((limb) in[5]) & bottom58bits; 605 out[6] = ((limb) in[6]) & bottom58bits; 606 out[7] = ((limb) in[7]) & bottom58bits; 607 out[8] = ((limb) in[8]) & bottom58bits; 608 609 /* out[i] < 2^58 */ 610 611 out[1] += ((limb) in[0]) >> 58; 612 out[1] += (((limb) (in[0] >> 64)) & bottom52bits) << 6; 613 /*- 614 * out[1] < 2^58 + 2^6 + 2^58 615 * = 2^59 + 2^6 616 */ 617 out[2] += ((limb) (in[0] >> 64)) >> 52; 618 619 out[2] += ((limb) in[1]) >> 58; 620 out[2] += (((limb) (in[1] >> 64)) & bottom52bits) << 6; 621 out[3] += ((limb) (in[1] >> 64)) >> 52; 622 623 out[3] += ((limb) in[2]) >> 58; 624 out[3] += (((limb) (in[2] >> 64)) & bottom52bits) << 6; 625 out[4] += ((limb) (in[2] >> 64)) >> 52; 626 627 out[4] += ((limb) in[3]) >> 58; 628 out[4] += (((limb) (in[3] >> 64)) & bottom52bits) << 6; 629 out[5] += ((limb) (in[3] >> 64)) >> 52; 630 631 out[5] += ((limb) in[4]) >> 58; 632 out[5] += (((limb) (in[4] >> 64)) & bottom52bits) << 6; 633 out[6] += ((limb) (in[4] >> 64)) >> 52; 634 635 out[6] += ((limb) in[5]) >> 58; 636 out[6] += (((limb) (in[5] >> 64)) & bottom52bits) << 6; 637 out[7] += ((limb) (in[5] >> 64)) >> 52; 638 639 out[7] += ((limb) in[6]) >> 58; 640 out[7] += (((limb) (in[6] >> 64)) & bottom52bits) << 6; 641 out[8] += ((limb) (in[6] >> 64)) >> 52; 642 643 out[8] += ((limb) in[7]) >> 58; 644 out[8] += (((limb) (in[7] >> 64)) & bottom52bits) << 6; 645 /*- 646 * out[x > 1] < 2^58 + 2^6 + 2^58 + 2^12 647 * < 2^59 + 2^13 648 */ 649 overflow1 = ((limb) (in[7] >> 64)) >> 52; 650 651 overflow1 += ((limb) in[8]) >> 58; 652 overflow1 += (((limb) (in[8] >> 64)) & bottom52bits) << 6; 653 overflow2 = ((limb) (in[8] >> 64)) >> 52; 654 655 overflow1 <<= 1; /* overflow1 < 2^13 + 2^7 + 2^59 */ 656 overflow2 <<= 1; /* overflow2 < 2^13 */ 657 658 out[0] += overflow1; /* out[0] < 2^60 */ 659 out[1] += overflow2; /* out[1] < 2^59 + 2^6 + 2^13 */ 660 661 out[1] += out[0] >> 58; 662 out[0] &= bottom58bits; 663 /*- 664 * out[0] < 2^58 665 * out[1] < 2^59 + 2^6 + 2^13 + 2^2 666 * < 2^59 + 2^14 667 */ 668 } 669 670 static void felem_square_reduce(felem out, const felem in) 671 { 672 largefelem tmp; 673 felem_square(tmp, in); 674 felem_reduce(out, tmp); 675 } 676 677 static void felem_mul_reduce(felem out, const felem in1, const felem in2) 678 { 679 largefelem tmp; 680 felem_mul(tmp, in1, in2); 681 felem_reduce(out, tmp); 682 } 683 684 /*- 685 * felem_inv calculates |out| = |in|^{-1} 686 * 687 * Based on Fermat's Little Theorem: 688 * a^p = a (mod p) 689 * a^{p-1} = 1 (mod p) 690 * a^{p-2} = a^{-1} (mod p) 691 */ 692 static void felem_inv(felem out, const felem in) 693 { 694 felem ftmp, ftmp2, ftmp3, ftmp4; 695 largefelem tmp; 696 unsigned i; 697 698 felem_square(tmp, in); 699 felem_reduce(ftmp, tmp); /* 2^1 */ 700 felem_mul(tmp, in, ftmp); 701 felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */ 702 felem_assign(ftmp2, ftmp); 703 felem_square(tmp, ftmp); 704 felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */ 705 felem_mul(tmp, in, ftmp); 706 felem_reduce(ftmp, tmp); /* 2^3 - 2^0 */ 707 felem_square(tmp, ftmp); 708 felem_reduce(ftmp, tmp); /* 2^4 - 2^1 */ 709 710 felem_square(tmp, ftmp2); 711 felem_reduce(ftmp3, tmp); /* 2^3 - 2^1 */ 712 felem_square(tmp, ftmp3); 713 felem_reduce(ftmp3, tmp); /* 2^4 - 2^2 */ 714 felem_mul(tmp, ftmp3, ftmp2); 715 felem_reduce(ftmp3, tmp); /* 2^4 - 2^0 */ 716 717 felem_assign(ftmp2, ftmp3); 718 felem_square(tmp, ftmp3); 719 felem_reduce(ftmp3, tmp); /* 2^5 - 2^1 */ 720 felem_square(tmp, ftmp3); 721 felem_reduce(ftmp3, tmp); /* 2^6 - 2^2 */ 722 felem_square(tmp, ftmp3); 723 felem_reduce(ftmp3, tmp); /* 2^7 - 2^3 */ 724 felem_square(tmp, ftmp3); 725 felem_reduce(ftmp3, tmp); /* 2^8 - 2^4 */ 726 felem_assign(ftmp4, ftmp3); 727 felem_mul(tmp, ftmp3, ftmp); 728 felem_reduce(ftmp4, tmp); /* 2^8 - 2^1 */ 729 felem_square(tmp, ftmp4); 730 felem_reduce(ftmp4, tmp); /* 2^9 - 2^2 */ 731 felem_mul(tmp, ftmp3, ftmp2); 732 felem_reduce(ftmp3, tmp); /* 2^8 - 2^0 */ 733 felem_assign(ftmp2, ftmp3); 734 735 for (i = 0; i < 8; i++) { 736 felem_square(tmp, ftmp3); 737 felem_reduce(ftmp3, tmp); /* 2^16 - 2^8 */ 738 } 739 felem_mul(tmp, ftmp3, ftmp2); 740 felem_reduce(ftmp3, tmp); /* 2^16 - 2^0 */ 741 felem_assign(ftmp2, ftmp3); 742 743 for (i = 0; i < 16; i++) { 744 felem_square(tmp, ftmp3); 745 felem_reduce(ftmp3, tmp); /* 2^32 - 2^16 */ 746 } 747 felem_mul(tmp, ftmp3, ftmp2); 748 felem_reduce(ftmp3, tmp); /* 2^32 - 2^0 */ 749 felem_assign(ftmp2, ftmp3); 750 751 for (i = 0; i < 32; i++) { 752 felem_square(tmp, ftmp3); 753 felem_reduce(ftmp3, tmp); /* 2^64 - 2^32 */ 754 } 755 felem_mul(tmp, ftmp3, ftmp2); 756 felem_reduce(ftmp3, tmp); /* 2^64 - 2^0 */ 757 felem_assign(ftmp2, ftmp3); 758 759 for (i = 0; i < 64; i++) { 760 felem_square(tmp, ftmp3); 761 felem_reduce(ftmp3, tmp); /* 2^128 - 2^64 */ 762 } 763 felem_mul(tmp, ftmp3, ftmp2); 764 felem_reduce(ftmp3, tmp); /* 2^128 - 2^0 */ 765 felem_assign(ftmp2, ftmp3); 766 767 for (i = 0; i < 128; i++) { 768 felem_square(tmp, ftmp3); 769 felem_reduce(ftmp3, tmp); /* 2^256 - 2^128 */ 770 } 771 felem_mul(tmp, ftmp3, ftmp2); 772 felem_reduce(ftmp3, tmp); /* 2^256 - 2^0 */ 773 felem_assign(ftmp2, ftmp3); 774 775 for (i = 0; i < 256; i++) { 776 felem_square(tmp, ftmp3); 777 felem_reduce(ftmp3, tmp); /* 2^512 - 2^256 */ 778 } 779 felem_mul(tmp, ftmp3, ftmp2); 780 felem_reduce(ftmp3, tmp); /* 2^512 - 2^0 */ 781 782 for (i = 0; i < 9; i++) { 783 felem_square(tmp, ftmp3); 784 felem_reduce(ftmp3, tmp); /* 2^521 - 2^9 */ 785 } 786 felem_mul(tmp, ftmp3, ftmp4); 787 felem_reduce(ftmp3, tmp); /* 2^512 - 2^2 */ 788 felem_mul(tmp, ftmp3, in); 789 felem_reduce(out, tmp); /* 2^512 - 3 */ 790 } 791 792 /* This is 2^521-1, expressed as an felem */ 793 static const felem kPrime = { 794 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff, 795 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff, 796 0x03ffffffffffffff, 0x03ffffffffffffff, 0x01ffffffffffffff 797 }; 798 799 /*- 800 * felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0 801 * otherwise. 802 * On entry: 803 * in[i] < 2^59 + 2^14 804 */ 805 static limb felem_is_zero(const felem in) 806 { 807 felem ftmp; 808 limb is_zero, is_p; 809 felem_assign(ftmp, in); 810 811 ftmp[0] += ftmp[8] >> 57; 812 ftmp[8] &= bottom57bits; 813 /* ftmp[8] < 2^57 */ 814 ftmp[1] += ftmp[0] >> 58; 815 ftmp[0] &= bottom58bits; 816 ftmp[2] += ftmp[1] >> 58; 817 ftmp[1] &= bottom58bits; 818 ftmp[3] += ftmp[2] >> 58; 819 ftmp[2] &= bottom58bits; 820 ftmp[4] += ftmp[3] >> 58; 821 ftmp[3] &= bottom58bits; 822 ftmp[5] += ftmp[4] >> 58; 823 ftmp[4] &= bottom58bits; 824 ftmp[6] += ftmp[5] >> 58; 825 ftmp[5] &= bottom58bits; 826 ftmp[7] += ftmp[6] >> 58; 827 ftmp[6] &= bottom58bits; 828 ftmp[8] += ftmp[7] >> 58; 829 ftmp[7] &= bottom58bits; 830 /* ftmp[8] < 2^57 + 4 */ 831 832 /* 833 * The ninth limb of 2*(2^521-1) is 0x03ffffffffffffff, which is greater 834 * than our bound for ftmp[8]. Therefore we only have to check if the 835 * zero is zero or 2^521-1. 836 */ 837 838 is_zero = 0; 839 is_zero |= ftmp[0]; 840 is_zero |= ftmp[1]; 841 is_zero |= ftmp[2]; 842 is_zero |= ftmp[3]; 843 is_zero |= ftmp[4]; 844 is_zero |= ftmp[5]; 845 is_zero |= ftmp[6]; 846 is_zero |= ftmp[7]; 847 is_zero |= ftmp[8]; 848 849 is_zero--; 850 /* 851 * We know that ftmp[i] < 2^63, therefore the only way that the top bit 852 * can be set is if is_zero was 0 before the decrement. 853 */ 854 is_zero = 0 - (is_zero >> 63); 855 856 is_p = ftmp[0] ^ kPrime[0]; 857 is_p |= ftmp[1] ^ kPrime[1]; 858 is_p |= ftmp[2] ^ kPrime[2]; 859 is_p |= ftmp[3] ^ kPrime[3]; 860 is_p |= ftmp[4] ^ kPrime[4]; 861 is_p |= ftmp[5] ^ kPrime[5]; 862 is_p |= ftmp[6] ^ kPrime[6]; 863 is_p |= ftmp[7] ^ kPrime[7]; 864 is_p |= ftmp[8] ^ kPrime[8]; 865 866 is_p--; 867 is_p = 0 - (is_p >> 63); 868 869 is_zero |= is_p; 870 return is_zero; 871 } 872 873 static int felem_is_zero_int(const void *in) 874 { 875 return (int)(felem_is_zero(in) & ((limb) 1)); 876 } 877 878 /*- 879 * felem_contract converts |in| to its unique, minimal representation. 880 * On entry: 881 * in[i] < 2^59 + 2^14 882 */ 883 static void felem_contract(felem out, const felem in) 884 { 885 limb is_p, is_greater, sign; 886 static const limb two58 = ((limb) 1) << 58; 887 888 felem_assign(out, in); 889 890 out[0] += out[8] >> 57; 891 out[8] &= bottom57bits; 892 /* out[8] < 2^57 */ 893 out[1] += out[0] >> 58; 894 out[0] &= bottom58bits; 895 out[2] += out[1] >> 58; 896 out[1] &= bottom58bits; 897 out[3] += out[2] >> 58; 898 out[2] &= bottom58bits; 899 out[4] += out[3] >> 58; 900 out[3] &= bottom58bits; 901 out[5] += out[4] >> 58; 902 out[4] &= bottom58bits; 903 out[6] += out[5] >> 58; 904 out[5] &= bottom58bits; 905 out[7] += out[6] >> 58; 906 out[6] &= bottom58bits; 907 out[8] += out[7] >> 58; 908 out[7] &= bottom58bits; 909 /* out[8] < 2^57 + 4 */ 910 911 /* 912 * If the value is greater than 2^521-1 then we have to subtract 2^521-1 913 * out. See the comments in felem_is_zero regarding why we don't test for 914 * other multiples of the prime. 915 */ 916 917 /* 918 * First, if |out| is equal to 2^521-1, we subtract it out to get zero. 919 */ 920 921 is_p = out[0] ^ kPrime[0]; 922 is_p |= out[1] ^ kPrime[1]; 923 is_p |= out[2] ^ kPrime[2]; 924 is_p |= out[3] ^ kPrime[3]; 925 is_p |= out[4] ^ kPrime[4]; 926 is_p |= out[5] ^ kPrime[5]; 927 is_p |= out[6] ^ kPrime[6]; 928 is_p |= out[7] ^ kPrime[7]; 929 is_p |= out[8] ^ kPrime[8]; 930 931 is_p--; 932 is_p &= is_p << 32; 933 is_p &= is_p << 16; 934 is_p &= is_p << 8; 935 is_p &= is_p << 4; 936 is_p &= is_p << 2; 937 is_p &= is_p << 1; 938 is_p = 0 - (is_p >> 63); 939 is_p = ~is_p; 940 941 /* is_p is 0 iff |out| == 2^521-1 and all ones otherwise */ 942 943 out[0] &= is_p; 944 out[1] &= is_p; 945 out[2] &= is_p; 946 out[3] &= is_p; 947 out[4] &= is_p; 948 out[5] &= is_p; 949 out[6] &= is_p; 950 out[7] &= is_p; 951 out[8] &= is_p; 952 953 /* 954 * In order to test that |out| >= 2^521-1 we need only test if out[8] >> 955 * 57 is greater than zero as (2^521-1) + x >= 2^522 956 */ 957 is_greater = out[8] >> 57; 958 is_greater |= is_greater << 32; 959 is_greater |= is_greater << 16; 960 is_greater |= is_greater << 8; 961 is_greater |= is_greater << 4; 962 is_greater |= is_greater << 2; 963 is_greater |= is_greater << 1; 964 is_greater = 0 - (is_greater >> 63); 965 966 out[0] -= kPrime[0] & is_greater; 967 out[1] -= kPrime[1] & is_greater; 968 out[2] -= kPrime[2] & is_greater; 969 out[3] -= kPrime[3] & is_greater; 970 out[4] -= kPrime[4] & is_greater; 971 out[5] -= kPrime[5] & is_greater; 972 out[6] -= kPrime[6] & is_greater; 973 out[7] -= kPrime[7] & is_greater; 974 out[8] -= kPrime[8] & is_greater; 975 976 /* Eliminate negative coefficients */ 977 sign = -(out[0] >> 63); 978 out[0] += (two58 & sign); 979 out[1] -= (1 & sign); 980 sign = -(out[1] >> 63); 981 out[1] += (two58 & sign); 982 out[2] -= (1 & sign); 983 sign = -(out[2] >> 63); 984 out[2] += (two58 & sign); 985 out[3] -= (1 & sign); 986 sign = -(out[3] >> 63); 987 out[3] += (two58 & sign); 988 out[4] -= (1 & sign); 989 sign = -(out[4] >> 63); 990 out[4] += (two58 & sign); 991 out[5] -= (1 & sign); 992 sign = -(out[0] >> 63); 993 out[5] += (two58 & sign); 994 out[6] -= (1 & sign); 995 sign = -(out[6] >> 63); 996 out[6] += (two58 & sign); 997 out[7] -= (1 & sign); 998 sign = -(out[7] >> 63); 999 out[7] += (two58 & sign); 1000 out[8] -= (1 & sign); 1001 sign = -(out[5] >> 63); 1002 out[5] += (two58 & sign); 1003 out[6] -= (1 & sign); 1004 sign = -(out[6] >> 63); 1005 out[6] += (two58 & sign); 1006 out[7] -= (1 & sign); 1007 sign = -(out[7] >> 63); 1008 out[7] += (two58 & sign); 1009 out[8] -= (1 & sign); 1010 } 1011 1012 /*- 1013 * Group operations 1014 * ---------------- 1015 * 1016 * Building on top of the field operations we have the operations on the 1017 * elliptic curve group itself. Points on the curve are represented in Jacobian 1018 * coordinates */ 1019 1020 /*- 1021 * point_double calcuates 2*(x_in, y_in, z_in) 1022 * 1023 * The method is taken from: 1024 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b 1025 * 1026 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed. 1027 * while x_out == y_in is not (maybe this works, but it's not tested). */ 1028 static void 1029 point_double(felem x_out, felem y_out, felem z_out, 1030 const felem x_in, const felem y_in, const felem z_in) 1031 { 1032 largefelem tmp, tmp2; 1033 felem delta, gamma, beta, alpha, ftmp, ftmp2; 1034 1035 felem_assign(ftmp, x_in); 1036 felem_assign(ftmp2, x_in); 1037 1038 /* delta = z^2 */ 1039 felem_square(tmp, z_in); 1040 felem_reduce(delta, tmp); /* delta[i] < 2^59 + 2^14 */ 1041 1042 /* gamma = y^2 */ 1043 felem_square(tmp, y_in); 1044 felem_reduce(gamma, tmp); /* gamma[i] < 2^59 + 2^14 */ 1045 1046 /* beta = x*gamma */ 1047 felem_mul(tmp, x_in, gamma); 1048 felem_reduce(beta, tmp); /* beta[i] < 2^59 + 2^14 */ 1049 1050 /* alpha = 3*(x-delta)*(x+delta) */ 1051 felem_diff64(ftmp, delta); 1052 /* ftmp[i] < 2^61 */ 1053 felem_sum64(ftmp2, delta); 1054 /* ftmp2[i] < 2^60 + 2^15 */ 1055 felem_scalar64(ftmp2, 3); 1056 /* ftmp2[i] < 3*2^60 + 3*2^15 */ 1057 felem_mul(tmp, ftmp, ftmp2); 1058 /*- 1059 * tmp[i] < 17(3*2^121 + 3*2^76) 1060 * = 61*2^121 + 61*2^76 1061 * < 64*2^121 + 64*2^76 1062 * = 2^127 + 2^82 1063 * < 2^128 1064 */ 1065 felem_reduce(alpha, tmp); 1066 1067 /* x' = alpha^2 - 8*beta */ 1068 felem_square(tmp, alpha); 1069 /* 1070 * tmp[i] < 17*2^120 < 2^125 1071 */ 1072 felem_assign(ftmp, beta); 1073 felem_scalar64(ftmp, 8); 1074 /* ftmp[i] < 2^62 + 2^17 */ 1075 felem_diff_128_64(tmp, ftmp); 1076 /* tmp[i] < 2^125 + 2^63 + 2^62 + 2^17 */ 1077 felem_reduce(x_out, tmp); 1078 1079 /* z' = (y + z)^2 - gamma - delta */ 1080 felem_sum64(delta, gamma); 1081 /* delta[i] < 2^60 + 2^15 */ 1082 felem_assign(ftmp, y_in); 1083 felem_sum64(ftmp, z_in); 1084 /* ftmp[i] < 2^60 + 2^15 */ 1085 felem_square(tmp, ftmp); 1086 /* 1087 * tmp[i] < 17(2^122) < 2^127 1088 */ 1089 felem_diff_128_64(tmp, delta); 1090 /* tmp[i] < 2^127 + 2^63 */ 1091 felem_reduce(z_out, tmp); 1092 1093 /* y' = alpha*(4*beta - x') - 8*gamma^2 */ 1094 felem_scalar64(beta, 4); 1095 /* beta[i] < 2^61 + 2^16 */ 1096 felem_diff64(beta, x_out); 1097 /* beta[i] < 2^61 + 2^60 + 2^16 */ 1098 felem_mul(tmp, alpha, beta); 1099 /*- 1100 * tmp[i] < 17*((2^59 + 2^14)(2^61 + 2^60 + 2^16)) 1101 * = 17*(2^120 + 2^75 + 2^119 + 2^74 + 2^75 + 2^30) 1102 * = 17*(2^120 + 2^119 + 2^76 + 2^74 + 2^30) 1103 * < 2^128 1104 */ 1105 felem_square(tmp2, gamma); 1106 /*- 1107 * tmp2[i] < 17*(2^59 + 2^14)^2 1108 * = 17*(2^118 + 2^74 + 2^28) 1109 */ 1110 felem_scalar128(tmp2, 8); 1111 /*- 1112 * tmp2[i] < 8*17*(2^118 + 2^74 + 2^28) 1113 * = 2^125 + 2^121 + 2^81 + 2^77 + 2^35 + 2^31 1114 * < 2^126 1115 */ 1116 felem_diff128(tmp, tmp2); 1117 /*- 1118 * tmp[i] < 2^127 - 2^69 + 17(2^120 + 2^119 + 2^76 + 2^74 + 2^30) 1119 * = 2^127 + 2^124 + 2^122 + 2^120 + 2^118 + 2^80 + 2^78 + 2^76 + 1120 * 2^74 + 2^69 + 2^34 + 2^30 1121 * < 2^128 1122 */ 1123 felem_reduce(y_out, tmp); 1124 } 1125 1126 /* copy_conditional copies in to out iff mask is all ones. */ 1127 static void copy_conditional(felem out, const felem in, limb mask) 1128 { 1129 unsigned i; 1130 for (i = 0; i < NLIMBS; ++i) { 1131 const limb tmp = mask & (in[i] ^ out[i]); 1132 out[i] ^= tmp; 1133 } 1134 } 1135 1136 /*- 1137 * point_add calcuates (x1, y1, z1) + (x2, y2, z2) 1138 * 1139 * The method is taken from 1140 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl, 1141 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity). 1142 * 1143 * This function includes a branch for checking whether the two input points 1144 * are equal (while not equal to the point at infinity). This case never 1145 * happens during single point multiplication, so there is no timing leak for 1146 * ECDH or ECDSA signing. */ 1147 static void point_add(felem x3, felem y3, felem z3, 1148 const felem x1, const felem y1, const felem z1, 1149 const int mixed, const felem x2, const felem y2, 1150 const felem z2) 1151 { 1152 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out; 1153 largefelem tmp, tmp2; 1154 limb x_equal, y_equal, z1_is_zero, z2_is_zero; 1155 1156 z1_is_zero = felem_is_zero(z1); 1157 z2_is_zero = felem_is_zero(z2); 1158 1159 /* ftmp = z1z1 = z1**2 */ 1160 felem_square(tmp, z1); 1161 felem_reduce(ftmp, tmp); 1162 1163 if (!mixed) { 1164 /* ftmp2 = z2z2 = z2**2 */ 1165 felem_square(tmp, z2); 1166 felem_reduce(ftmp2, tmp); 1167 1168 /* u1 = ftmp3 = x1*z2z2 */ 1169 felem_mul(tmp, x1, ftmp2); 1170 felem_reduce(ftmp3, tmp); 1171 1172 /* ftmp5 = z1 + z2 */ 1173 felem_assign(ftmp5, z1); 1174 felem_sum64(ftmp5, z2); 1175 /* ftmp5[i] < 2^61 */ 1176 1177 /* ftmp5 = (z1 + z2)**2 - z1z1 - z2z2 = 2*z1z2 */ 1178 felem_square(tmp, ftmp5); 1179 /* tmp[i] < 17*2^122 */ 1180 felem_diff_128_64(tmp, ftmp); 1181 /* tmp[i] < 17*2^122 + 2^63 */ 1182 felem_diff_128_64(tmp, ftmp2); 1183 /* tmp[i] < 17*2^122 + 2^64 */ 1184 felem_reduce(ftmp5, tmp); 1185 1186 /* ftmp2 = z2 * z2z2 */ 1187 felem_mul(tmp, ftmp2, z2); 1188 felem_reduce(ftmp2, tmp); 1189 1190 /* s1 = ftmp6 = y1 * z2**3 */ 1191 felem_mul(tmp, y1, ftmp2); 1192 felem_reduce(ftmp6, tmp); 1193 } else { 1194 /* 1195 * We'll assume z2 = 1 (special case z2 = 0 is handled later) 1196 */ 1197 1198 /* u1 = ftmp3 = x1*z2z2 */ 1199 felem_assign(ftmp3, x1); 1200 1201 /* ftmp5 = 2*z1z2 */ 1202 felem_scalar(ftmp5, z1, 2); 1203 1204 /* s1 = ftmp6 = y1 * z2**3 */ 1205 felem_assign(ftmp6, y1); 1206 } 1207 1208 /* u2 = x2*z1z1 */ 1209 felem_mul(tmp, x2, ftmp); 1210 /* tmp[i] < 17*2^120 */ 1211 1212 /* h = ftmp4 = u2 - u1 */ 1213 felem_diff_128_64(tmp, ftmp3); 1214 /* tmp[i] < 17*2^120 + 2^63 */ 1215 felem_reduce(ftmp4, tmp); 1216 1217 x_equal = felem_is_zero(ftmp4); 1218 1219 /* z_out = ftmp5 * h */ 1220 felem_mul(tmp, ftmp5, ftmp4); 1221 felem_reduce(z_out, tmp); 1222 1223 /* ftmp = z1 * z1z1 */ 1224 felem_mul(tmp, ftmp, z1); 1225 felem_reduce(ftmp, tmp); 1226 1227 /* s2 = tmp = y2 * z1**3 */ 1228 felem_mul(tmp, y2, ftmp); 1229 /* tmp[i] < 17*2^120 */ 1230 1231 /* r = ftmp5 = (s2 - s1)*2 */ 1232 felem_diff_128_64(tmp, ftmp6); 1233 /* tmp[i] < 17*2^120 + 2^63 */ 1234 felem_reduce(ftmp5, tmp); 1235 y_equal = felem_is_zero(ftmp5); 1236 felem_scalar64(ftmp5, 2); 1237 /* ftmp5[i] < 2^61 */ 1238 1239 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) { 1240 point_double(x3, y3, z3, x1, y1, z1); 1241 return; 1242 } 1243 1244 /* I = ftmp = (2h)**2 */ 1245 felem_assign(ftmp, ftmp4); 1246 felem_scalar64(ftmp, 2); 1247 /* ftmp[i] < 2^61 */ 1248 felem_square(tmp, ftmp); 1249 /* tmp[i] < 17*2^122 */ 1250 felem_reduce(ftmp, tmp); 1251 1252 /* J = ftmp2 = h * I */ 1253 felem_mul(tmp, ftmp4, ftmp); 1254 felem_reduce(ftmp2, tmp); 1255 1256 /* V = ftmp4 = U1 * I */ 1257 felem_mul(tmp, ftmp3, ftmp); 1258 felem_reduce(ftmp4, tmp); 1259 1260 /* x_out = r**2 - J - 2V */ 1261 felem_square(tmp, ftmp5); 1262 /* tmp[i] < 17*2^122 */ 1263 felem_diff_128_64(tmp, ftmp2); 1264 /* tmp[i] < 17*2^122 + 2^63 */ 1265 felem_assign(ftmp3, ftmp4); 1266 felem_scalar64(ftmp4, 2); 1267 /* ftmp4[i] < 2^61 */ 1268 felem_diff_128_64(tmp, ftmp4); 1269 /* tmp[i] < 17*2^122 + 2^64 */ 1270 felem_reduce(x_out, tmp); 1271 1272 /* y_out = r(V-x_out) - 2 * s1 * J */ 1273 felem_diff64(ftmp3, x_out); 1274 /* 1275 * ftmp3[i] < 2^60 + 2^60 = 2^61 1276 */ 1277 felem_mul(tmp, ftmp5, ftmp3); 1278 /* tmp[i] < 17*2^122 */ 1279 felem_mul(tmp2, ftmp6, ftmp2); 1280 /* tmp2[i] < 17*2^120 */ 1281 felem_scalar128(tmp2, 2); 1282 /* tmp2[i] < 17*2^121 */ 1283 felem_diff128(tmp, tmp2); 1284 /*- 1285 * tmp[i] < 2^127 - 2^69 + 17*2^122 1286 * = 2^126 - 2^122 - 2^6 - 2^2 - 1 1287 * < 2^127 1288 */ 1289 felem_reduce(y_out, tmp); 1290 1291 copy_conditional(x_out, x2, z1_is_zero); 1292 copy_conditional(x_out, x1, z2_is_zero); 1293 copy_conditional(y_out, y2, z1_is_zero); 1294 copy_conditional(y_out, y1, z2_is_zero); 1295 copy_conditional(z_out, z2, z1_is_zero); 1296 copy_conditional(z_out, z1, z2_is_zero); 1297 felem_assign(x3, x_out); 1298 felem_assign(y3, y_out); 1299 felem_assign(z3, z_out); 1300 } 1301 1302 /*- 1303 * Base point pre computation 1304 * -------------------------- 1305 * 1306 * Two different sorts of precomputed tables are used in the following code. 1307 * Each contain various points on the curve, where each point is three field 1308 * elements (x, y, z). 1309 * 1310 * For the base point table, z is usually 1 (0 for the point at infinity). 1311 * This table has 16 elements: 1312 * index | bits | point 1313 * ------+---------+------------------------------ 1314 * 0 | 0 0 0 0 | 0G 1315 * 1 | 0 0 0 1 | 1G 1316 * 2 | 0 0 1 0 | 2^130G 1317 * 3 | 0 0 1 1 | (2^130 + 1)G 1318 * 4 | 0 1 0 0 | 2^260G 1319 * 5 | 0 1 0 1 | (2^260 + 1)G 1320 * 6 | 0 1 1 0 | (2^260 + 2^130)G 1321 * 7 | 0 1 1 1 | (2^260 + 2^130 + 1)G 1322 * 8 | 1 0 0 0 | 2^390G 1323 * 9 | 1 0 0 1 | (2^390 + 1)G 1324 * 10 | 1 0 1 0 | (2^390 + 2^130)G 1325 * 11 | 1 0 1 1 | (2^390 + 2^130 + 1)G 1326 * 12 | 1 1 0 0 | (2^390 + 2^260)G 1327 * 13 | 1 1 0 1 | (2^390 + 2^260 + 1)G 1328 * 14 | 1 1 1 0 | (2^390 + 2^260 + 2^130)G 1329 * 15 | 1 1 1 1 | (2^390 + 2^260 + 2^130 + 1)G 1330 * 1331 * The reason for this is so that we can clock bits into four different 1332 * locations when doing simple scalar multiplies against the base point. 1333 * 1334 * Tables for other points have table[i] = iG for i in 0 .. 16. */ 1335 1336 /* gmul is the table of precomputed base points */ 1337 static const felem gmul[16][3] = { {{0, 0, 0, 0, 0, 0, 0, 0, 0}, 1338 {0, 0, 0, 0, 0, 0, 0, 0, 0}, 1339 {0, 0, 0, 0, 0, 0, 0, 0, 0}}, 1340 {{0x017e7e31c2e5bd66, 0x022cf0615a90a6fe, 0x00127a2ffa8de334, 1341 0x01dfbf9d64a3f877, 0x006b4d3dbaa14b5e, 0x014fed487e0a2bd8, 1342 0x015b4429c6481390, 0x03a73678fb2d988e, 0x00c6858e06b70404}, 1343 {0x00be94769fd16650, 0x031c21a89cb09022, 0x039013fad0761353, 1344 0x02657bd099031542, 0x03273e662c97ee72, 0x01e6d11a05ebef45, 1345 0x03d1bd998f544495, 0x03001172297ed0b1, 0x011839296a789a3b}, 1346 {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1347 {{0x0373faacbc875bae, 0x00f325023721c671, 0x00f666fd3dbde5ad, 1348 0x01a6932363f88ea7, 0x01fc6d9e13f9c47b, 0x03bcbffc2bbf734e, 1349 0x013ee3c3647f3a92, 0x029409fefe75d07d, 0x00ef9199963d85e5}, 1350 {0x011173743ad5b178, 0x02499c7c21bf7d46, 0x035beaeabb8b1a58, 1351 0x00f989c4752ea0a3, 0x0101e1de48a9c1a3, 0x01a20076be28ba6c, 1352 0x02f8052e5eb2de95, 0x01bfe8f82dea117c, 0x0160074d3c36ddb7}, 1353 {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1354 {{0x012f3fc373393b3b, 0x03d3d6172f1419fa, 0x02adc943c0b86873, 1355 0x00d475584177952b, 0x012a4d1673750ee2, 0x00512517a0f13b0c, 1356 0x02b184671a7b1734, 0x0315b84236f1a50a, 0x00a4afc472edbdb9}, 1357 {0x00152a7077f385c4, 0x03044007d8d1c2ee, 0x0065829d61d52b52, 1358 0x00494ff6b6631d0d, 0x00a11d94d5f06bcf, 0x02d2f89474d9282e, 1359 0x0241c5727c06eeb9, 0x0386928710fbdb9d, 0x01f883f727b0dfbe}, 1360 {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1361 {{0x019b0c3c9185544d, 0x006243a37c9d97db, 0x02ee3cbe030a2ad2, 1362 0x00cfdd946bb51e0d, 0x0271c00932606b91, 0x03f817d1ec68c561, 1363 0x03f37009806a369c, 0x03c1f30baf184fd5, 0x01091022d6d2f065}, 1364 {0x0292c583514c45ed, 0x0316fca51f9a286c, 0x00300af507c1489a, 1365 0x0295f69008298cf1, 0x02c0ed8274943d7b, 0x016509b9b47a431e, 1366 0x02bc9de9634868ce, 0x005b34929bffcb09, 0x000c1a0121681524}, 1367 {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1368 {{0x0286abc0292fb9f2, 0x02665eee9805b3f7, 0x01ed7455f17f26d6, 1369 0x0346355b83175d13, 0x006284944cd0a097, 0x0191895bcdec5e51, 1370 0x02e288370afda7d9, 0x03b22312bfefa67a, 0x01d104d3fc0613fe}, 1371 {0x0092421a12f7e47f, 0x0077a83fa373c501, 0x03bd25c5f696bd0d, 1372 0x035c41e4d5459761, 0x01ca0d1742b24f53, 0x00aaab27863a509c, 1373 0x018b6de47df73917, 0x025c0b771705cd01, 0x01fd51d566d760a7}, 1374 {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1375 {{0x01dd92ff6b0d1dbd, 0x039c5e2e8f8afa69, 0x0261ed13242c3b27, 1376 0x0382c6e67026e6a0, 0x01d60b10be2089f9, 0x03c15f3dce86723f, 1377 0x03c764a32d2a062d, 0x017307eac0fad056, 0x018207c0b96c5256}, 1378 {0x0196a16d60e13154, 0x03e6ce74c0267030, 0x00ddbf2b4e52a5aa, 1379 0x012738241bbf31c8, 0x00ebe8dc04685a28, 0x024c2ad6d380d4a2, 1380 0x035ee062a6e62d0e, 0x0029ed74af7d3a0f, 0x00eef32aec142ebd}, 1381 {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1382 {{0x00c31ec398993b39, 0x03a9f45bcda68253, 0x00ac733c24c70890, 1383 0x00872b111401ff01, 0x01d178c23195eafb, 0x03bca2c816b87f74, 1384 0x0261a9af46fbad7a, 0x0324b2a8dd3d28f9, 0x00918121d8f24e23}, 1385 {0x032bc8c1ca983cd7, 0x00d869dfb08fc8c6, 0x01693cb61fce1516, 1386 0x012a5ea68f4e88a8, 0x010869cab88d7ae3, 0x009081ad277ceee1, 1387 0x033a77166d064cdc, 0x03955235a1fb3a95, 0x01251a4a9b25b65e}, 1388 {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1389 {{0x00148a3a1b27f40b, 0x0123186df1b31fdc, 0x00026e7beaad34ce, 1390 0x01db446ac1d3dbba, 0x0299c1a33437eaec, 0x024540610183cbb7, 1391 0x0173bb0e9ce92e46, 0x02b937e43921214b, 0x01ab0436a9bf01b5}, 1392 {0x0383381640d46948, 0x008dacbf0e7f330f, 0x03602122bcc3f318, 1393 0x01ee596b200620d6, 0x03bd0585fda430b3, 0x014aed77fd123a83, 1394 0x005ace749e52f742, 0x0390fe041da2b842, 0x0189a8ceb3299242}, 1395 {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1396 {{0x012a19d6b3282473, 0x00c0915918b423ce, 0x023a954eb94405ae, 1397 0x00529f692be26158, 0x0289fa1b6fa4b2aa, 0x0198ae4ceea346ef, 1398 0x0047d8cdfbdedd49, 0x00cc8c8953f0f6b8, 0x001424abbff49203}, 1399 {0x0256732a1115a03a, 0x0351bc38665c6733, 0x03f7b950fb4a6447, 1400 0x000afffa94c22155, 0x025763d0a4dab540, 0x000511e92d4fc283, 1401 0x030a7e9eda0ee96c, 0x004c3cd93a28bf0a, 0x017edb3a8719217f}, 1402 {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1403 {{0x011de5675a88e673, 0x031d7d0f5e567fbe, 0x0016b2062c970ae5, 1404 0x03f4a2be49d90aa7, 0x03cef0bd13822866, 0x03f0923dcf774a6c, 1405 0x0284bebc4f322f72, 0x016ab2645302bb2c, 0x01793f95dace0e2a}, 1406 {0x010646e13527a28f, 0x01ca1babd59dc5e7, 0x01afedfd9a5595df, 1407 0x01f15785212ea6b1, 0x0324e5d64f6ae3f4, 0x02d680f526d00645, 1408 0x0127920fadf627a7, 0x03b383f75df4f684, 0x0089e0057e783b0a}, 1409 {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1410 {{0x00f334b9eb3c26c6, 0x0298fdaa98568dce, 0x01c2d24843a82292, 1411 0x020bcb24fa1b0711, 0x02cbdb3d2b1875e6, 0x0014907598f89422, 1412 0x03abe3aa43b26664, 0x02cbf47f720bc168, 0x0133b5e73014b79b}, 1413 {0x034aab5dab05779d, 0x00cdc5d71fee9abb, 0x0399f16bd4bd9d30, 1414 0x03582fa592d82647, 0x02be1cdfb775b0e9, 0x0034f7cea32e94cb, 1415 0x0335a7f08f56f286, 0x03b707e9565d1c8b, 0x0015c946ea5b614f}, 1416 {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1417 {{0x024676f6cff72255, 0x00d14625cac96378, 0x00532b6008bc3767, 1418 0x01fc16721b985322, 0x023355ea1b091668, 0x029de7afdc0317c3, 1419 0x02fc8a7ca2da037c, 0x02de1217d74a6f30, 0x013f7173175b73bf}, 1420 {0x0344913f441490b5, 0x0200f9e272b61eca, 0x0258a246b1dd55d2, 1421 0x03753db9ea496f36, 0x025e02937a09c5ef, 0x030cbd3d14012692, 1422 0x01793a67e70dc72a, 0x03ec1d37048a662e, 0x006550f700c32a8d}, 1423 {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1424 {{0x00d3f48a347eba27, 0x008e636649b61bd8, 0x00d3b93716778fb3, 1425 0x004d1915757bd209, 0x019d5311a3da44e0, 0x016d1afcbbe6aade, 1426 0x0241bf5f73265616, 0x0384672e5d50d39b, 0x005009fee522b684}, 1427 {0x029b4fab064435fe, 0x018868ee095bbb07, 0x01ea3d6936cc92b8, 1428 0x000608b00f78a2f3, 0x02db911073d1c20f, 0x018205938470100a, 1429 0x01f1e4964cbe6ff2, 0x021a19a29eed4663, 0x01414485f42afa81}, 1430 {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1431 {{0x01612b3a17f63e34, 0x03813992885428e6, 0x022b3c215b5a9608, 1432 0x029b4057e19f2fcb, 0x0384059a587af7e6, 0x02d6400ace6fe610, 1433 0x029354d896e8e331, 0x00c047ee6dfba65e, 0x0037720542e9d49d}, 1434 {0x02ce9eed7c5e9278, 0x0374ed703e79643b, 0x01316c54c4072006, 1435 0x005aaa09054b2ee8, 0x002824000c840d57, 0x03d4eba24771ed86, 1436 0x0189c50aabc3bdae, 0x0338c01541e15510, 0x00466d56e38eed42}, 1437 {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1438 {{0x007efd8330ad8bd6, 0x02465ed48047710b, 0x0034c6606b215e0c, 1439 0x016ae30c53cbf839, 0x01fa17bd37161216, 0x018ead4e61ce8ab9, 1440 0x005482ed5f5dee46, 0x037543755bba1d7f, 0x005e5ac7e70a9d0f}, 1441 {0x0117e1bb2fdcb2a2, 0x03deea36249f40c4, 0x028d09b4a6246cb7, 1442 0x03524b8855bcf756, 0x023d7d109d5ceb58, 0x0178e43e3223ef9c, 1443 0x0154536a0c6e966a, 0x037964d1286ee9fe, 0x0199bcd90e125055}, 1444 {1, 0, 0, 0, 0, 0, 0, 0, 0}} 1445 }; 1446 1447 /* 1448 * select_point selects the |idx|th point from a precomputation table and 1449 * copies it to out. 1450 */ 1451 /* pre_comp below is of the size provided in |size| */ 1452 static void select_point(const limb idx, unsigned int size, 1453 const felem pre_comp[][3], felem out[3]) 1454 { 1455 unsigned i, j; 1456 limb *outlimbs = &out[0][0]; 1457 memset(outlimbs, 0, 3 * sizeof(felem)); 1458 1459 for (i = 0; i < size; i++) { 1460 const limb *inlimbs = &pre_comp[i][0][0]; 1461 limb mask = i ^ idx; 1462 mask |= mask >> 4; 1463 mask |= mask >> 2; 1464 mask |= mask >> 1; 1465 mask &= 1; 1466 mask--; 1467 for (j = 0; j < NLIMBS * 3; j++) 1468 outlimbs[j] |= inlimbs[j] & mask; 1469 } 1470 } 1471 1472 /* get_bit returns the |i|th bit in |in| */ 1473 static char get_bit(const felem_bytearray in, int i) 1474 { 1475 if (i < 0) 1476 return 0; 1477 return (in[i >> 3] >> (i & 7)) & 1; 1478 } 1479 1480 /* 1481 * Interleaved point multiplication using precomputed point multiples: The 1482 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars 1483 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the 1484 * generator, using certain (large) precomputed multiples in g_pre_comp. 1485 * Output point (X, Y, Z) is stored in x_out, y_out, z_out 1486 */ 1487 static void batch_mul(felem x_out, felem y_out, felem z_out, 1488 const felem_bytearray scalars[], 1489 const unsigned num_points, const u8 *g_scalar, 1490 const int mixed, const felem pre_comp[][17][3], 1491 const felem g_pre_comp[16][3]) 1492 { 1493 int i, skip; 1494 unsigned num, gen_mul = (g_scalar != NULL); 1495 felem nq[3], tmp[4]; 1496 limb bits; 1497 u8 sign, digit; 1498 1499 /* set nq to the point at infinity */ 1500 memset(nq, 0, 3 * sizeof(felem)); 1501 1502 /* 1503 * Loop over all scalars msb-to-lsb, interleaving additions of multiples 1504 * of the generator (last quarter of rounds) and additions of other 1505 * points multiples (every 5th round). 1506 */ 1507 skip = 1; /* save two point operations in the first 1508 * round */ 1509 for (i = (num_points ? 520 : 130); i >= 0; --i) { 1510 /* double */ 1511 if (!skip) 1512 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); 1513 1514 /* add multiples of the generator */ 1515 if (gen_mul && (i <= 130)) { 1516 bits = get_bit(g_scalar, i + 390) << 3; 1517 if (i < 130) { 1518 bits |= get_bit(g_scalar, i + 260) << 2; 1519 bits |= get_bit(g_scalar, i + 130) << 1; 1520 bits |= get_bit(g_scalar, i); 1521 } 1522 /* select the point to add, in constant time */ 1523 select_point(bits, 16, g_pre_comp, tmp); 1524 if (!skip) { 1525 /* The 1 argument below is for "mixed" */ 1526 point_add(nq[0], nq[1], nq[2], 1527 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]); 1528 } else { 1529 memcpy(nq, tmp, 3 * sizeof(felem)); 1530 skip = 0; 1531 } 1532 } 1533 1534 /* do other additions every 5 doublings */ 1535 if (num_points && (i % 5 == 0)) { 1536 /* loop over all scalars */ 1537 for (num = 0; num < num_points; ++num) { 1538 bits = get_bit(scalars[num], i + 4) << 5; 1539 bits |= get_bit(scalars[num], i + 3) << 4; 1540 bits |= get_bit(scalars[num], i + 2) << 3; 1541 bits |= get_bit(scalars[num], i + 1) << 2; 1542 bits |= get_bit(scalars[num], i) << 1; 1543 bits |= get_bit(scalars[num], i - 1); 1544 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits); 1545 1546 /* 1547 * select the point to add or subtract, in constant time 1548 */ 1549 select_point(digit, 17, pre_comp[num], tmp); 1550 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative 1551 * point */ 1552 copy_conditional(tmp[1], tmp[3], (-(limb) sign)); 1553 1554 if (!skip) { 1555 point_add(nq[0], nq[1], nq[2], 1556 nq[0], nq[1], nq[2], 1557 mixed, tmp[0], tmp[1], tmp[2]); 1558 } else { 1559 memcpy(nq, tmp, 3 * sizeof(felem)); 1560 skip = 0; 1561 } 1562 } 1563 } 1564 } 1565 felem_assign(x_out, nq[0]); 1566 felem_assign(y_out, nq[1]); 1567 felem_assign(z_out, nq[2]); 1568 } 1569 1570 /* Precomputation for the group generator. */ 1571 typedef struct { 1572 felem g_pre_comp[16][3]; 1573 int references; 1574 } NISTP521_PRE_COMP; 1575 1576 const EC_METHOD *EC_GFp_nistp521_method(void) 1577 { 1578 static const EC_METHOD ret = { 1579 EC_FLAGS_DEFAULT_OCT, 1580 NID_X9_62_prime_field, 1581 ec_GFp_nistp521_group_init, 1582 ec_GFp_simple_group_finish, 1583 ec_GFp_simple_group_clear_finish, 1584 ec_GFp_nist_group_copy, 1585 ec_GFp_nistp521_group_set_curve, 1586 ec_GFp_simple_group_get_curve, 1587 ec_GFp_simple_group_get_degree, 1588 ec_GFp_simple_group_check_discriminant, 1589 ec_GFp_simple_point_init, 1590 ec_GFp_simple_point_finish, 1591 ec_GFp_simple_point_clear_finish, 1592 ec_GFp_simple_point_copy, 1593 ec_GFp_simple_point_set_to_infinity, 1594 ec_GFp_simple_set_Jprojective_coordinates_GFp, 1595 ec_GFp_simple_get_Jprojective_coordinates_GFp, 1596 ec_GFp_simple_point_set_affine_coordinates, 1597 ec_GFp_nistp521_point_get_affine_coordinates, 1598 0 /* point_set_compressed_coordinates */ , 1599 0 /* point2oct */ , 1600 0 /* oct2point */ , 1601 ec_GFp_simple_add, 1602 ec_GFp_simple_dbl, 1603 ec_GFp_simple_invert, 1604 ec_GFp_simple_is_at_infinity, 1605 ec_GFp_simple_is_on_curve, 1606 ec_GFp_simple_cmp, 1607 ec_GFp_simple_make_affine, 1608 ec_GFp_simple_points_make_affine, 1609 ec_GFp_nistp521_points_mul, 1610 ec_GFp_nistp521_precompute_mult, 1611 ec_GFp_nistp521_have_precompute_mult, 1612 ec_GFp_nist_field_mul, 1613 ec_GFp_nist_field_sqr, 1614 0 /* field_div */ , 1615 0 /* field_encode */ , 1616 0 /* field_decode */ , 1617 0 /* field_set_to_one */ 1618 }; 1619 1620 return &ret; 1621 } 1622 1623 /******************************************************************************/ 1624 /* 1625 * FUNCTIONS TO MANAGE PRECOMPUTATION 1626 */ 1627 1628 static NISTP521_PRE_COMP *nistp521_pre_comp_new() 1629 { 1630 NISTP521_PRE_COMP *ret = NULL; 1631 ret = (NISTP521_PRE_COMP *) OPENSSL_malloc(sizeof(NISTP521_PRE_COMP)); 1632 if (!ret) { 1633 ECerr(EC_F_NISTP521_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE); 1634 return ret; 1635 } 1636 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp)); 1637 ret->references = 1; 1638 return ret; 1639 } 1640 1641 static void *nistp521_pre_comp_dup(void *src_) 1642 { 1643 NISTP521_PRE_COMP *src = src_; 1644 1645 /* no need to actually copy, these objects never change! */ 1646 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP); 1647 1648 return src_; 1649 } 1650 1651 static void nistp521_pre_comp_free(void *pre_) 1652 { 1653 int i; 1654 NISTP521_PRE_COMP *pre = pre_; 1655 1656 if (!pre) 1657 return; 1658 1659 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); 1660 if (i > 0) 1661 return; 1662 1663 OPENSSL_free(pre); 1664 } 1665 1666 static void nistp521_pre_comp_clear_free(void *pre_) 1667 { 1668 int i; 1669 NISTP521_PRE_COMP *pre = pre_; 1670 1671 if (!pre) 1672 return; 1673 1674 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); 1675 if (i > 0) 1676 return; 1677 1678 OPENSSL_cleanse(pre, sizeof(*pre)); 1679 OPENSSL_free(pre); 1680 } 1681 1682 /******************************************************************************/ 1683 /* 1684 * OPENSSL EC_METHOD FUNCTIONS 1685 */ 1686 1687 int ec_GFp_nistp521_group_init(EC_GROUP *group) 1688 { 1689 int ret; 1690 ret = ec_GFp_simple_group_init(group); 1691 group->a_is_minus3 = 1; 1692 return ret; 1693 } 1694 1695 int ec_GFp_nistp521_group_set_curve(EC_GROUP *group, const BIGNUM *p, 1696 const BIGNUM *a, const BIGNUM *b, 1697 BN_CTX *ctx) 1698 { 1699 int ret = 0; 1700 BN_CTX *new_ctx = NULL; 1701 BIGNUM *curve_p, *curve_a, *curve_b; 1702 1703 if (ctx == NULL) 1704 if ((ctx = new_ctx = BN_CTX_new()) == NULL) 1705 return 0; 1706 BN_CTX_start(ctx); 1707 if (((curve_p = BN_CTX_get(ctx)) == NULL) || 1708 ((curve_a = BN_CTX_get(ctx)) == NULL) || 1709 ((curve_b = BN_CTX_get(ctx)) == NULL)) 1710 goto err; 1711 BN_bin2bn(nistp521_curve_params[0], sizeof(felem_bytearray), curve_p); 1712 BN_bin2bn(nistp521_curve_params[1], sizeof(felem_bytearray), curve_a); 1713 BN_bin2bn(nistp521_curve_params[2], sizeof(felem_bytearray), curve_b); 1714 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) { 1715 ECerr(EC_F_EC_GFP_NISTP521_GROUP_SET_CURVE, 1716 EC_R_WRONG_CURVE_PARAMETERS); 1717 goto err; 1718 } 1719 group->field_mod_func = BN_nist_mod_521; 1720 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx); 1721 err: 1722 BN_CTX_end(ctx); 1723 if (new_ctx != NULL) 1724 BN_CTX_free(new_ctx); 1725 return ret; 1726 } 1727 1728 /* 1729 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') = 1730 * (X/Z^2, Y/Z^3) 1731 */ 1732 int ec_GFp_nistp521_point_get_affine_coordinates(const EC_GROUP *group, 1733 const EC_POINT *point, 1734 BIGNUM *x, BIGNUM *y, 1735 BN_CTX *ctx) 1736 { 1737 felem z1, z2, x_in, y_in, x_out, y_out; 1738 largefelem tmp; 1739 1740 if (EC_POINT_is_at_infinity(group, point)) { 1741 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES, 1742 EC_R_POINT_AT_INFINITY); 1743 return 0; 1744 } 1745 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) || 1746 (!BN_to_felem(z1, &point->Z))) 1747 return 0; 1748 felem_inv(z2, z1); 1749 felem_square(tmp, z2); 1750 felem_reduce(z1, tmp); 1751 felem_mul(tmp, x_in, z1); 1752 felem_reduce(x_in, tmp); 1753 felem_contract(x_out, x_in); 1754 if (x != NULL) { 1755 if (!felem_to_BN(x, x_out)) { 1756 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES, 1757 ERR_R_BN_LIB); 1758 return 0; 1759 } 1760 } 1761 felem_mul(tmp, z1, z2); 1762 felem_reduce(z1, tmp); 1763 felem_mul(tmp, y_in, z1); 1764 felem_reduce(y_in, tmp); 1765 felem_contract(y_out, y_in); 1766 if (y != NULL) { 1767 if (!felem_to_BN(y, y_out)) { 1768 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES, 1769 ERR_R_BN_LIB); 1770 return 0; 1771 } 1772 } 1773 return 1; 1774 } 1775 1776 /* points below is of size |num|, and tmp_felems is of size |num+1/ */ 1777 static void make_points_affine(size_t num, felem points[][3], 1778 felem tmp_felems[]) 1779 { 1780 /* 1781 * Runs in constant time, unless an input is the point at infinity (which 1782 * normally shouldn't happen). 1783 */ 1784 ec_GFp_nistp_points_make_affine_internal(num, 1785 points, 1786 sizeof(felem), 1787 tmp_felems, 1788 (void (*)(void *))felem_one, 1789 felem_is_zero_int, 1790 (void (*)(void *, const void *)) 1791 felem_assign, 1792 (void (*)(void *, const void *)) 1793 felem_square_reduce, (void (*) 1794 (void *, 1795 const void 1796 *, 1797 const void 1798 *)) 1799 felem_mul_reduce, 1800 (void (*)(void *, const void *)) 1801 felem_inv, 1802 (void (*)(void *, const void *)) 1803 felem_contract); 1804 } 1805 1806 /* 1807 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL 1808 * values Result is stored in r (r can equal one of the inputs). 1809 */ 1810 int ec_GFp_nistp521_points_mul(const EC_GROUP *group, EC_POINT *r, 1811 const BIGNUM *scalar, size_t num, 1812 const EC_POINT *points[], 1813 const BIGNUM *scalars[], BN_CTX *ctx) 1814 { 1815 int ret = 0; 1816 int j; 1817 int mixed = 0; 1818 BN_CTX *new_ctx = NULL; 1819 BIGNUM *x, *y, *z, *tmp_scalar; 1820 felem_bytearray g_secret; 1821 felem_bytearray *secrets = NULL; 1822 felem(*pre_comp)[17][3] = NULL; 1823 felem *tmp_felems = NULL; 1824 felem_bytearray tmp; 1825 unsigned i, num_bytes; 1826 int have_pre_comp = 0; 1827 size_t num_points = num; 1828 felem x_in, y_in, z_in, x_out, y_out, z_out; 1829 NISTP521_PRE_COMP *pre = NULL; 1830 felem(*g_pre_comp)[3] = NULL; 1831 EC_POINT *generator = NULL; 1832 const EC_POINT *p = NULL; 1833 const BIGNUM *p_scalar = NULL; 1834 1835 if (ctx == NULL) 1836 if ((ctx = new_ctx = BN_CTX_new()) == NULL) 1837 return 0; 1838 BN_CTX_start(ctx); 1839 if (((x = BN_CTX_get(ctx)) == NULL) || 1840 ((y = BN_CTX_get(ctx)) == NULL) || 1841 ((z = BN_CTX_get(ctx)) == NULL) || 1842 ((tmp_scalar = BN_CTX_get(ctx)) == NULL)) 1843 goto err; 1844 1845 if (scalar != NULL) { 1846 pre = EC_EX_DATA_get_data(group->extra_data, 1847 nistp521_pre_comp_dup, 1848 nistp521_pre_comp_free, 1849 nistp521_pre_comp_clear_free); 1850 if (pre) 1851 /* we have precomputation, try to use it */ 1852 g_pre_comp = &pre->g_pre_comp[0]; 1853 else 1854 /* try to use the standard precomputation */ 1855 g_pre_comp = (felem(*)[3]) gmul; 1856 generator = EC_POINT_new(group); 1857 if (generator == NULL) 1858 goto err; 1859 /* get the generator from precomputation */ 1860 if (!felem_to_BN(x, g_pre_comp[1][0]) || 1861 !felem_to_BN(y, g_pre_comp[1][1]) || 1862 !felem_to_BN(z, g_pre_comp[1][2])) { 1863 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB); 1864 goto err; 1865 } 1866 if (!EC_POINT_set_Jprojective_coordinates_GFp(group, 1867 generator, x, y, z, 1868 ctx)) 1869 goto err; 1870 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) 1871 /* precomputation matches generator */ 1872 have_pre_comp = 1; 1873 else 1874 /* 1875 * we don't have valid precomputation: treat the generator as a 1876 * random point 1877 */ 1878 num_points++; 1879 } 1880 1881 if (num_points > 0) { 1882 if (num_points >= 2) { 1883 /* 1884 * unless we precompute multiples for just one point, converting 1885 * those into affine form is time well spent 1886 */ 1887 mixed = 1; 1888 } 1889 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray)); 1890 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem)); 1891 if (mixed) 1892 tmp_felems = 1893 OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem)); 1894 if ((secrets == NULL) || (pre_comp == NULL) 1895 || (mixed && (tmp_felems == NULL))) { 1896 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_MALLOC_FAILURE); 1897 goto err; 1898 } 1899 1900 /* 1901 * we treat NULL scalars as 0, and NULL points as points at infinity, 1902 * i.e., they contribute nothing to the linear combination 1903 */ 1904 memset(secrets, 0, num_points * sizeof(felem_bytearray)); 1905 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem)); 1906 for (i = 0; i < num_points; ++i) { 1907 if (i == num) 1908 /* 1909 * we didn't have a valid precomputation, so we pick the 1910 * generator 1911 */ 1912 { 1913 p = EC_GROUP_get0_generator(group); 1914 p_scalar = scalar; 1915 } else 1916 /* the i^th point */ 1917 { 1918 p = points[i]; 1919 p_scalar = scalars[i]; 1920 } 1921 if ((p_scalar != NULL) && (p != NULL)) { 1922 /* reduce scalar to 0 <= scalar < 2^521 */ 1923 if ((BN_num_bits(p_scalar) > 521) 1924 || (BN_is_negative(p_scalar))) { 1925 /* 1926 * this is an unusual input, and we don't guarantee 1927 * constant-timeness 1928 */ 1929 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) { 1930 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB); 1931 goto err; 1932 } 1933 num_bytes = BN_bn2bin(tmp_scalar, tmp); 1934 } else 1935 num_bytes = BN_bn2bin(p_scalar, tmp); 1936 flip_endian(secrets[i], tmp, num_bytes); 1937 /* precompute multiples */ 1938 if ((!BN_to_felem(x_out, &p->X)) || 1939 (!BN_to_felem(y_out, &p->Y)) || 1940 (!BN_to_felem(z_out, &p->Z))) 1941 goto err; 1942 memcpy(pre_comp[i][1][0], x_out, sizeof(felem)); 1943 memcpy(pre_comp[i][1][1], y_out, sizeof(felem)); 1944 memcpy(pre_comp[i][1][2], z_out, sizeof(felem)); 1945 for (j = 2; j <= 16; ++j) { 1946 if (j & 1) { 1947 point_add(pre_comp[i][j][0], pre_comp[i][j][1], 1948 pre_comp[i][j][2], pre_comp[i][1][0], 1949 pre_comp[i][1][1], pre_comp[i][1][2], 0, 1950 pre_comp[i][j - 1][0], 1951 pre_comp[i][j - 1][1], 1952 pre_comp[i][j - 1][2]); 1953 } else { 1954 point_double(pre_comp[i][j][0], pre_comp[i][j][1], 1955 pre_comp[i][j][2], pre_comp[i][j / 2][0], 1956 pre_comp[i][j / 2][1], 1957 pre_comp[i][j / 2][2]); 1958 } 1959 } 1960 } 1961 } 1962 if (mixed) 1963 make_points_affine(num_points * 17, pre_comp[0], tmp_felems); 1964 } 1965 1966 /* the scalar for the generator */ 1967 if ((scalar != NULL) && (have_pre_comp)) { 1968 memset(g_secret, 0, sizeof(g_secret)); 1969 /* reduce scalar to 0 <= scalar < 2^521 */ 1970 if ((BN_num_bits(scalar) > 521) || (BN_is_negative(scalar))) { 1971 /* 1972 * this is an unusual input, and we don't guarantee 1973 * constant-timeness 1974 */ 1975 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) { 1976 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB); 1977 goto err; 1978 } 1979 num_bytes = BN_bn2bin(tmp_scalar, tmp); 1980 } else 1981 num_bytes = BN_bn2bin(scalar, tmp); 1982 flip_endian(g_secret, tmp, num_bytes); 1983 /* do the multiplication with generator precomputation */ 1984 batch_mul(x_out, y_out, z_out, 1985 (const felem_bytearray(*))secrets, num_points, 1986 g_secret, 1987 mixed, (const felem(*)[17][3])pre_comp, 1988 (const felem(*)[3])g_pre_comp); 1989 } else 1990 /* do the multiplication without generator precomputation */ 1991 batch_mul(x_out, y_out, z_out, 1992 (const felem_bytearray(*))secrets, num_points, 1993 NULL, mixed, (const felem(*)[17][3])pre_comp, NULL); 1994 /* reduce the output to its unique minimal representation */ 1995 felem_contract(x_in, x_out); 1996 felem_contract(y_in, y_out); 1997 felem_contract(z_in, z_out); 1998 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) || 1999 (!felem_to_BN(z, z_in))) { 2000 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB); 2001 goto err; 2002 } 2003 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx); 2004 2005 err: 2006 BN_CTX_end(ctx); 2007 if (generator != NULL) 2008 EC_POINT_free(generator); 2009 if (new_ctx != NULL) 2010 BN_CTX_free(new_ctx); 2011 if (secrets != NULL) 2012 OPENSSL_free(secrets); 2013 if (pre_comp != NULL) 2014 OPENSSL_free(pre_comp); 2015 if (tmp_felems != NULL) 2016 OPENSSL_free(tmp_felems); 2017 return ret; 2018 } 2019 2020 int ec_GFp_nistp521_precompute_mult(EC_GROUP *group, BN_CTX *ctx) 2021 { 2022 int ret = 0; 2023 NISTP521_PRE_COMP *pre = NULL; 2024 int i, j; 2025 BN_CTX *new_ctx = NULL; 2026 BIGNUM *x, *y; 2027 EC_POINT *generator = NULL; 2028 felem tmp_felems[16]; 2029 2030 /* throw away old precomputation */ 2031 EC_EX_DATA_free_data(&group->extra_data, nistp521_pre_comp_dup, 2032 nistp521_pre_comp_free, 2033 nistp521_pre_comp_clear_free); 2034 if (ctx == NULL) 2035 if ((ctx = new_ctx = BN_CTX_new()) == NULL) 2036 return 0; 2037 BN_CTX_start(ctx); 2038 if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL)) 2039 goto err; 2040 /* get the generator */ 2041 if (group->generator == NULL) 2042 goto err; 2043 generator = EC_POINT_new(group); 2044 if (generator == NULL) 2045 goto err; 2046 BN_bin2bn(nistp521_curve_params[3], sizeof(felem_bytearray), x); 2047 BN_bin2bn(nistp521_curve_params[4], sizeof(felem_bytearray), y); 2048 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx)) 2049 goto err; 2050 if ((pre = nistp521_pre_comp_new()) == NULL) 2051 goto err; 2052 /* 2053 * if the generator is the standard one, use built-in precomputation 2054 */ 2055 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) { 2056 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp)); 2057 goto done; 2058 } 2059 if ((!BN_to_felem(pre->g_pre_comp[1][0], &group->generator->X)) || 2060 (!BN_to_felem(pre->g_pre_comp[1][1], &group->generator->Y)) || 2061 (!BN_to_felem(pre->g_pre_comp[1][2], &group->generator->Z))) 2062 goto err; 2063 /* compute 2^130*G, 2^260*G, 2^390*G */ 2064 for (i = 1; i <= 4; i <<= 1) { 2065 point_double(pre->g_pre_comp[2 * i][0], pre->g_pre_comp[2 * i][1], 2066 pre->g_pre_comp[2 * i][2], pre->g_pre_comp[i][0], 2067 pre->g_pre_comp[i][1], pre->g_pre_comp[i][2]); 2068 for (j = 0; j < 129; ++j) { 2069 point_double(pre->g_pre_comp[2 * i][0], 2070 pre->g_pre_comp[2 * i][1], 2071 pre->g_pre_comp[2 * i][2], 2072 pre->g_pre_comp[2 * i][0], 2073 pre->g_pre_comp[2 * i][1], 2074 pre->g_pre_comp[2 * i][2]); 2075 } 2076 } 2077 /* g_pre_comp[0] is the point at infinity */ 2078 memset(pre->g_pre_comp[0], 0, sizeof(pre->g_pre_comp[0])); 2079 /* the remaining multiples */ 2080 /* 2^130*G + 2^260*G */ 2081 point_add(pre->g_pre_comp[6][0], pre->g_pre_comp[6][1], 2082 pre->g_pre_comp[6][2], pre->g_pre_comp[4][0], 2083 pre->g_pre_comp[4][1], pre->g_pre_comp[4][2], 2084 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1], 2085 pre->g_pre_comp[2][2]); 2086 /* 2^130*G + 2^390*G */ 2087 point_add(pre->g_pre_comp[10][0], pre->g_pre_comp[10][1], 2088 pre->g_pre_comp[10][2], pre->g_pre_comp[8][0], 2089 pre->g_pre_comp[8][1], pre->g_pre_comp[8][2], 2090 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1], 2091 pre->g_pre_comp[2][2]); 2092 /* 2^260*G + 2^390*G */ 2093 point_add(pre->g_pre_comp[12][0], pre->g_pre_comp[12][1], 2094 pre->g_pre_comp[12][2], pre->g_pre_comp[8][0], 2095 pre->g_pre_comp[8][1], pre->g_pre_comp[8][2], 2096 0, pre->g_pre_comp[4][0], pre->g_pre_comp[4][1], 2097 pre->g_pre_comp[4][2]); 2098 /* 2^130*G + 2^260*G + 2^390*G */ 2099 point_add(pre->g_pre_comp[14][0], pre->g_pre_comp[14][1], 2100 pre->g_pre_comp[14][2], pre->g_pre_comp[12][0], 2101 pre->g_pre_comp[12][1], pre->g_pre_comp[12][2], 2102 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1], 2103 pre->g_pre_comp[2][2]); 2104 for (i = 1; i < 8; ++i) { 2105 /* odd multiples: add G */ 2106 point_add(pre->g_pre_comp[2 * i + 1][0], 2107 pre->g_pre_comp[2 * i + 1][1], 2108 pre->g_pre_comp[2 * i + 1][2], pre->g_pre_comp[2 * i][0], 2109 pre->g_pre_comp[2 * i][1], pre->g_pre_comp[2 * i][2], 0, 2110 pre->g_pre_comp[1][0], pre->g_pre_comp[1][1], 2111 pre->g_pre_comp[1][2]); 2112 } 2113 make_points_affine(15, &(pre->g_pre_comp[1]), tmp_felems); 2114 2115 done: 2116 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp521_pre_comp_dup, 2117 nistp521_pre_comp_free, 2118 nistp521_pre_comp_clear_free)) 2119 goto err; 2120 ret = 1; 2121 pre = NULL; 2122 err: 2123 BN_CTX_end(ctx); 2124 if (generator != NULL) 2125 EC_POINT_free(generator); 2126 if (new_ctx != NULL) 2127 BN_CTX_free(new_ctx); 2128 if (pre) 2129 nistp521_pre_comp_free(pre); 2130 return ret; 2131 } 2132 2133 int ec_GFp_nistp521_have_precompute_mult(const EC_GROUP *group) 2134 { 2135 if (EC_EX_DATA_get_data(group->extra_data, nistp521_pre_comp_dup, 2136 nistp521_pre_comp_free, 2137 nistp521_pre_comp_clear_free) 2138 != NULL) 2139 return 1; 2140 else 2141 return 0; 2142 } 2143 2144 #else 2145 static void *dummy = &dummy; 2146 #endif 2147