1 /*
2 * Copyright 2023-2025 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10 /* Copyright 2023 IBM Corp.
11 *
12 * Licensed under the Apache License, Version 2.0 (the "License");
13 *
14 * you may not use this file except in compliance with the License.
15 * You may obtain a copy of the License at
16 *
17 * http://www.apache.org/licenses/LICENSE-2.0
18 *
19 * Unless required by applicable law or agreed to in writing, software
20 * distributed under the License is distributed on an "AS IS" BASIS,
21 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 * See the License for the specific language governing permissions and
23 * limitations under the License.
24 */
25
26 /*
27 * Designed for 56-bit limbs by Rohan McLure <rohan.mclure@linux.ibm.com>.
28 * The layout is based on that of ecp_nistp{224,521}.c, allowing even for asm
29 * acceleration of felem_{square,mul} as supported in these files.
30 */
31
32 #include <openssl/e_os2.h>
33
34 #include <string.h>
35 #include <openssl/err.h>
36 #include "ec_local.h"
37
38 #include "internal/numbers.h"
39
40 #ifndef INT128_MAX
41 # error "Your compiler doesn't appear to support 128-bit integer types"
42 #endif
43
44 typedef uint8_t u8;
45 typedef uint64_t u64;
46
47 /*
48 * The underlying field. P384 operates over GF(2^384-2^128-2^96+2^32-1). We
49 * can serialize an element of this field into 48 bytes. We call this an
50 * felem_bytearray.
51 */
52
53 typedef u8 felem_bytearray[48];
54
55 /*
56 * These are the parameters of P384, taken from FIPS 186-3, section D.1.2.4.
57 * These values are big-endian.
58 */
59 static const felem_bytearray nistp384_curve_params[5] = {
60 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
61 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
62 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
63 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF},
64 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a = -3 */
65 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
66 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
67 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFC},
68 {0xB3, 0x31, 0x2F, 0xA7, 0xE2, 0x3E, 0xE7, 0xE4, 0x98, 0x8E, 0x05, 0x6B, /* b */
69 0xE3, 0xF8, 0x2D, 0x19, 0x18, 0x1D, 0x9C, 0x6E, 0xFE, 0x81, 0x41, 0x12,
70 0x03, 0x14, 0x08, 0x8F, 0x50, 0x13, 0x87, 0x5A, 0xC6, 0x56, 0x39, 0x8D,
71 0x8A, 0x2E, 0xD1, 0x9D, 0x2A, 0x85, 0xC8, 0xED, 0xD3, 0xEC, 0x2A, 0xEF},
72 {0xAA, 0x87, 0xCA, 0x22, 0xBE, 0x8B, 0x05, 0x37, 0x8E, 0xB1, 0xC7, 0x1E, /* x */
73 0xF3, 0x20, 0xAD, 0x74, 0x6E, 0x1D, 0x3B, 0x62, 0x8B, 0xA7, 0x9B, 0x98,
74 0x59, 0xF7, 0x41, 0xE0, 0x82, 0x54, 0x2A, 0x38, 0x55, 0x02, 0xF2, 0x5D,
75 0xBF, 0x55, 0x29, 0x6C, 0x3A, 0x54, 0x5E, 0x38, 0x72, 0x76, 0x0A, 0xB7},
76 {0x36, 0x17, 0xDE, 0x4A, 0x96, 0x26, 0x2C, 0x6F, 0x5D, 0x9E, 0x98, 0xBF, /* y */
77 0x92, 0x92, 0xDC, 0x29, 0xF8, 0xF4, 0x1D, 0xBD, 0x28, 0x9A, 0x14, 0x7C,
78 0xE9, 0xDA, 0x31, 0x13, 0xB5, 0xF0, 0xB8, 0xC0, 0x0A, 0x60, 0xB1, 0xCE,
79 0x1D, 0x7E, 0x81, 0x9D, 0x7A, 0x43, 0x1D, 0x7C, 0x90, 0xEA, 0x0E, 0x5F},
80 };
81
82 /*-
83 * The representation of field elements.
84 * ------------------------------------
85 *
86 * We represent field elements with seven values. These values are either 64 or
87 * 128 bits and the field element represented is:
88 * v[0]*2^0 + v[1]*2^56 + v[2]*2^112 + ... + v[6]*2^336 (mod p)
89 * Each of the seven values is called a 'limb'. Since the limbs are spaced only
90 * 56 bits apart, but are greater than 56 bits in length, the most significant
91 * bits of each limb overlap with the least significant bits of the next
92 *
93 * This representation is considered to be 'redundant' in the sense that
94 * intermediate values can each contain more than a 56-bit value in each limb.
95 * Reduction causes all but the final limb to be reduced to contain a value less
96 * than 2^56, with the final value represented allowed to be larger than 2^384,
97 * inasmuch as we can be sure that arithmetic overflow remains impossible. The
98 * reduced value must of course be congruent to the unreduced value.
99 *
100 * A field element with 64-bit limbs is an 'felem'. One with 128-bit limbs is a
101 * 'widefelem', featuring enough bits to store the result of a multiplication
102 * and even some further arithmetic without need for immediate reduction.
103 */
104
105 #define NLIMBS 7
106
107 typedef uint64_t limb;
108 typedef uint128_t widelimb;
109 typedef limb limb_aX __attribute((__aligned__(1)));
110 typedef limb felem[NLIMBS];
111 typedef widelimb widefelem[2*NLIMBS-1];
112
113 static const limb bottom56bits = 0xffffffffffffff;
114
115 /* Helper functions (de)serialising reduced field elements in little endian */
bin48_to_felem(felem out,const u8 in[48])116 static void bin48_to_felem(felem out, const u8 in[48])
117 {
118 memset(out, 0, 56);
119 out[0] = (*((limb *) & in[0])) & bottom56bits;
120 out[1] = (*((limb_aX *) & in[7])) & bottom56bits;
121 out[2] = (*((limb_aX *) & in[14])) & bottom56bits;
122 out[3] = (*((limb_aX *) & in[21])) & bottom56bits;
123 out[4] = (*((limb_aX *) & in[28])) & bottom56bits;
124 out[5] = (*((limb_aX *) & in[35])) & bottom56bits;
125 memmove(&out[6], &in[42], 6);
126 }
127
felem_to_bin48(u8 out[48],const felem in)128 static void felem_to_bin48(u8 out[48], const felem in)
129 {
130 memset(out, 0, 48);
131 (*((limb *) & out[0])) |= (in[0] & bottom56bits);
132 (*((limb_aX *) & out[7])) |= (in[1] & bottom56bits);
133 (*((limb_aX *) & out[14])) |= (in[2] & bottom56bits);
134 (*((limb_aX *) & out[21])) |= (in[3] & bottom56bits);
135 (*((limb_aX *) & out[28])) |= (in[4] & bottom56bits);
136 (*((limb_aX *) & out[35])) |= (in[5] & bottom56bits);
137 memmove(&out[42], &in[6], 6);
138 }
139
140 /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
BN_to_felem(felem out,const BIGNUM * bn)141 static int BN_to_felem(felem out, const BIGNUM *bn)
142 {
143 felem_bytearray b_out;
144 int num_bytes;
145
146 if (BN_is_negative(bn)) {
147 ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
148 return 0;
149 }
150 num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
151 if (num_bytes < 0) {
152 ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
153 return 0;
154 }
155 bin48_to_felem(out, b_out);
156 return 1;
157 }
158
159 /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
felem_to_BN(BIGNUM * out,const felem in)160 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
161 {
162 felem_bytearray b_out;
163
164 felem_to_bin48(b_out, in);
165 return BN_lebin2bn(b_out, sizeof(b_out), out);
166 }
167
168 /*-
169 * Field operations
170 * ----------------
171 */
172
felem_one(felem out)173 static void felem_one(felem out)
174 {
175 out[0] = 1;
176 memset(&out[1], 0, sizeof(limb) * (NLIMBS-1));
177 }
178
felem_assign(felem out,const felem in)179 static void felem_assign(felem out, const felem in)
180 {
181 memcpy(out, in, sizeof(felem));
182 }
183
184 /* felem_sum64 sets out = out + in. */
felem_sum64(felem out,const felem in)185 static void felem_sum64(felem out, const felem in)
186 {
187 unsigned int i;
188
189 for (i = 0; i < NLIMBS; i++)
190 out[i] += in[i];
191 }
192
193 /* felem_scalar sets out = in * scalar */
felem_scalar(felem out,const felem in,limb scalar)194 static void felem_scalar(felem out, const felem in, limb scalar)
195 {
196 unsigned int i;
197
198 for (i = 0; i < NLIMBS; i++)
199 out[i] = in[i] * scalar;
200 }
201
202 /* felem_scalar64 sets out = out * scalar */
felem_scalar64(felem out,limb scalar)203 static void felem_scalar64(felem out, limb scalar)
204 {
205 unsigned int i;
206
207 for (i = 0; i < NLIMBS; i++)
208 out[i] *= scalar;
209 }
210
211 /* felem_scalar128 sets out = out * scalar */
felem_scalar128(widefelem out,limb scalar)212 static void felem_scalar128(widefelem out, limb scalar)
213 {
214 unsigned int i;
215
216 for (i = 0; i < 2*NLIMBS-1; i++)
217 out[i] *= scalar;
218 }
219
220 /*-
221 * felem_neg sets |out| to |-in|
222 * On entry:
223 * in[i] < 2^60 - 2^29
224 * On exit:
225 * out[i] < 2^60
226 */
felem_neg(felem out,const felem in)227 static void felem_neg(felem out, const felem in)
228 {
229 /*
230 * In order to prevent underflow, we add a multiple of p before subtracting.
231 * Use telescopic sums to represent 2^12 * p redundantly with each limb
232 * of the form 2^60 + ...
233 */
234 static const limb two60m52m4 = (((limb) 1) << 60)
235 - (((limb) 1) << 52)
236 - (((limb) 1) << 4);
237 static const limb two60p44m12 = (((limb) 1) << 60)
238 + (((limb) 1) << 44)
239 - (((limb) 1) << 12);
240 static const limb two60m28m4 = (((limb) 1) << 60)
241 - (((limb) 1) << 28)
242 - (((limb) 1) << 4);
243 static const limb two60m4 = (((limb) 1) << 60)
244 - (((limb) 1) << 4);
245
246 out[0] = two60p44m12 - in[0];
247 out[1] = two60m52m4 - in[1];
248 out[2] = two60m28m4 - in[2];
249 out[3] = two60m4 - in[3];
250 out[4] = two60m4 - in[4];
251 out[5] = two60m4 - in[5];
252 out[6] = two60m4 - in[6];
253 }
254
255 #if defined(ECP_NISTP384_ASM)
256 void p384_felem_diff64(felem out, const felem in);
257 void p384_felem_diff128(widefelem out, const widefelem in);
258 void p384_felem_diff_128_64(widefelem out, const felem in);
259
260 # define felem_diff64 p384_felem_diff64
261 # define felem_diff128 p384_felem_diff128
262 # define felem_diff_128_64 p384_felem_diff_128_64
263
264 #else
265 /*-
266 * felem_diff64 subtracts |in| from |out|
267 * On entry:
268 * in[i] < 2^60 - 2^52 - 2^4
269 * On exit:
270 * out[i] < out_orig[i] + 2^60 + 2^44
271 */
felem_diff64(felem out,const felem in)272 static void felem_diff64(felem out, const felem in)
273 {
274 /*
275 * In order to prevent underflow, we add a multiple of p before subtracting.
276 * Use telescopic sums to represent 2^12 * p redundantly with each limb
277 * of the form 2^60 + ...
278 */
279
280 static const limb two60m52m4 = (((limb) 1) << 60)
281 - (((limb) 1) << 52)
282 - (((limb) 1) << 4);
283 static const limb two60p44m12 = (((limb) 1) << 60)
284 + (((limb) 1) << 44)
285 - (((limb) 1) << 12);
286 static const limb two60m28m4 = (((limb) 1) << 60)
287 - (((limb) 1) << 28)
288 - (((limb) 1) << 4);
289 static const limb two60m4 = (((limb) 1) << 60)
290 - (((limb) 1) << 4);
291
292 out[0] += two60p44m12 - in[0];
293 out[1] += two60m52m4 - in[1];
294 out[2] += two60m28m4 - in[2];
295 out[3] += two60m4 - in[3];
296 out[4] += two60m4 - in[4];
297 out[5] += two60m4 - in[5];
298 out[6] += two60m4 - in[6];
299 }
300
301 /*
302 * in[i] < 2^63
303 * out[i] < out_orig[i] + 2^64 + 2^48
304 */
felem_diff_128_64(widefelem out,const felem in)305 static void felem_diff_128_64(widefelem out, const felem in)
306 {
307 /*
308 * In order to prevent underflow, we add a multiple of p before subtracting.
309 * Use telescopic sums to represent 2^16 * p redundantly with each limb
310 * of the form 2^64 + ...
311 */
312
313 static const widelimb two64m56m8 = (((widelimb) 1) << 64)
314 - (((widelimb) 1) << 56)
315 - (((widelimb) 1) << 8);
316 static const widelimb two64m32m8 = (((widelimb) 1) << 64)
317 - (((widelimb) 1) << 32)
318 - (((widelimb) 1) << 8);
319 static const widelimb two64m8 = (((widelimb) 1) << 64)
320 - (((widelimb) 1) << 8);
321 static const widelimb two64p48m16 = (((widelimb) 1) << 64)
322 + (((widelimb) 1) << 48)
323 - (((widelimb) 1) << 16);
324 unsigned int i;
325
326 out[0] += two64p48m16;
327 out[1] += two64m56m8;
328 out[2] += two64m32m8;
329 out[3] += two64m8;
330 out[4] += two64m8;
331 out[5] += two64m8;
332 out[6] += two64m8;
333
334 for (i = 0; i < NLIMBS; i++)
335 out[i] -= in[i];
336 }
337
338 /*
339 * in[i] < 2^127 - 2^119 - 2^71
340 * out[i] < out_orig[i] + 2^127 + 2^111
341 */
felem_diff128(widefelem out,const widefelem in)342 static void felem_diff128(widefelem out, const widefelem in)
343 {
344 /*
345 * In order to prevent underflow, we add a multiple of p before subtracting.
346 * Use telescopic sums to represent 2^415 * p redundantly with each limb
347 * of the form 2^127 + ...
348 */
349
350 static const widelimb two127 = ((widelimb) 1) << 127;
351 static const widelimb two127m71 = (((widelimb) 1) << 127)
352 - (((widelimb) 1) << 71);
353 static const widelimb two127p111m79m71 = (((widelimb) 1) << 127)
354 + (((widelimb) 1) << 111)
355 - (((widelimb) 1) << 79)
356 - (((widelimb) 1) << 71);
357 static const widelimb two127m119m71 = (((widelimb) 1) << 127)
358 - (((widelimb) 1) << 119)
359 - (((widelimb) 1) << 71);
360 static const widelimb two127m95m71 = (((widelimb) 1) << 127)
361 - (((widelimb) 1) << 95)
362 - (((widelimb) 1) << 71);
363 unsigned int i;
364
365 out[0] += two127;
366 out[1] += two127m71;
367 out[2] += two127m71;
368 out[3] += two127m71;
369 out[4] += two127m71;
370 out[5] += two127m71;
371 out[6] += two127p111m79m71;
372 out[7] += two127m119m71;
373 out[8] += two127m95m71;
374 out[9] += two127m71;
375 out[10] += two127m71;
376 out[11] += two127m71;
377 out[12] += two127m71;
378
379 for (i = 0; i < 2*NLIMBS-1; i++)
380 out[i] -= in[i];
381 }
382 #endif /* ECP_NISTP384_ASM */
383
felem_square_ref(widefelem out,const felem in)384 static void felem_square_ref(widefelem out, const felem in)
385 {
386 felem inx2;
387 felem_scalar(inx2, in, 2);
388
389 out[0] = ((uint128_t) in[0]) * in[0];
390
391 out[1] = ((uint128_t) in[0]) * inx2[1];
392
393 out[2] = ((uint128_t) in[0]) * inx2[2]
394 + ((uint128_t) in[1]) * in[1];
395
396 out[3] = ((uint128_t) in[0]) * inx2[3]
397 + ((uint128_t) in[1]) * inx2[2];
398
399 out[4] = ((uint128_t) in[0]) * inx2[4]
400 + ((uint128_t) in[1]) * inx2[3]
401 + ((uint128_t) in[2]) * in[2];
402
403 out[5] = ((uint128_t) in[0]) * inx2[5]
404 + ((uint128_t) in[1]) * inx2[4]
405 + ((uint128_t) in[2]) * inx2[3];
406
407 out[6] = ((uint128_t) in[0]) * inx2[6]
408 + ((uint128_t) in[1]) * inx2[5]
409 + ((uint128_t) in[2]) * inx2[4]
410 + ((uint128_t) in[3]) * in[3];
411
412 out[7] = ((uint128_t) in[1]) * inx2[6]
413 + ((uint128_t) in[2]) * inx2[5]
414 + ((uint128_t) in[3]) * inx2[4];
415
416 out[8] = ((uint128_t) in[2]) * inx2[6]
417 + ((uint128_t) in[3]) * inx2[5]
418 + ((uint128_t) in[4]) * in[4];
419
420 out[9] = ((uint128_t) in[3]) * inx2[6]
421 + ((uint128_t) in[4]) * inx2[5];
422
423 out[10] = ((uint128_t) in[4]) * inx2[6]
424 + ((uint128_t) in[5]) * in[5];
425
426 out[11] = ((uint128_t) in[5]) * inx2[6];
427
428 out[12] = ((uint128_t) in[6]) * in[6];
429 }
430
felem_mul_ref(widefelem out,const felem in1,const felem in2)431 static void felem_mul_ref(widefelem out, const felem in1, const felem in2)
432 {
433 out[0] = ((uint128_t) in1[0]) * in2[0];
434
435 out[1] = ((uint128_t) in1[0]) * in2[1]
436 + ((uint128_t) in1[1]) * in2[0];
437
438 out[2] = ((uint128_t) in1[0]) * in2[2]
439 + ((uint128_t) in1[1]) * in2[1]
440 + ((uint128_t) in1[2]) * in2[0];
441
442 out[3] = ((uint128_t) in1[0]) * in2[3]
443 + ((uint128_t) in1[1]) * in2[2]
444 + ((uint128_t) in1[2]) * in2[1]
445 + ((uint128_t) in1[3]) * in2[0];
446
447 out[4] = ((uint128_t) in1[0]) * in2[4]
448 + ((uint128_t) in1[1]) * in2[3]
449 + ((uint128_t) in1[2]) * in2[2]
450 + ((uint128_t) in1[3]) * in2[1]
451 + ((uint128_t) in1[4]) * in2[0];
452
453 out[5] = ((uint128_t) in1[0]) * in2[5]
454 + ((uint128_t) in1[1]) * in2[4]
455 + ((uint128_t) in1[2]) * in2[3]
456 + ((uint128_t) in1[3]) * in2[2]
457 + ((uint128_t) in1[4]) * in2[1]
458 + ((uint128_t) in1[5]) * in2[0];
459
460 out[6] = ((uint128_t) in1[0]) * in2[6]
461 + ((uint128_t) in1[1]) * in2[5]
462 + ((uint128_t) in1[2]) * in2[4]
463 + ((uint128_t) in1[3]) * in2[3]
464 + ((uint128_t) in1[4]) * in2[2]
465 + ((uint128_t) in1[5]) * in2[1]
466 + ((uint128_t) in1[6]) * in2[0];
467
468 out[7] = ((uint128_t) in1[1]) * in2[6]
469 + ((uint128_t) in1[2]) * in2[5]
470 + ((uint128_t) in1[3]) * in2[4]
471 + ((uint128_t) in1[4]) * in2[3]
472 + ((uint128_t) in1[5]) * in2[2]
473 + ((uint128_t) in1[6]) * in2[1];
474
475 out[8] = ((uint128_t) in1[2]) * in2[6]
476 + ((uint128_t) in1[3]) * in2[5]
477 + ((uint128_t) in1[4]) * in2[4]
478 + ((uint128_t) in1[5]) * in2[3]
479 + ((uint128_t) in1[6]) * in2[2];
480
481 out[9] = ((uint128_t) in1[3]) * in2[6]
482 + ((uint128_t) in1[4]) * in2[5]
483 + ((uint128_t) in1[5]) * in2[4]
484 + ((uint128_t) in1[6]) * in2[3];
485
486 out[10] = ((uint128_t) in1[4]) * in2[6]
487 + ((uint128_t) in1[5]) * in2[5]
488 + ((uint128_t) in1[6]) * in2[4];
489
490 out[11] = ((uint128_t) in1[5]) * in2[6]
491 + ((uint128_t) in1[6]) * in2[5];
492
493 out[12] = ((uint128_t) in1[6]) * in2[6];
494 }
495
496 /*-
497 * Reduce thirteen 128-bit coefficients to seven 64-bit coefficients.
498 * in[i] < 2^128 - 2^125
499 * out[i] < 2^56 for i < 6,
500 * out[6] <= 2^48
501 *
502 * The technique in use here stems from the format of the prime modulus:
503 * P384 = 2^384 - delta
504 *
505 * Thus we can reduce numbers of the form (X + 2^384 * Y) by substituting
506 * them with (X + delta Y), with delta = 2^128 + 2^96 + (-2^32 + 1). These
507 * coefficients are still quite large, and so we repeatedly apply this
508 * technique on high-order bits in order to guarantee the desired bounds on
509 * the size of our output.
510 *
511 * The three phases of elimination are as follows:
512 * [1]: Y = 2^120 (in[12] | in[11] | in[10] | in[9])
513 * [2]: Y = 2^8 (acc[8] | acc[7])
514 * [3]: Y = 2^48 (acc[6] >> 48)
515 * (Where a | b | c | d = (2^56)^3 a + (2^56)^2 b + (2^56) c + d)
516 */
felem_reduce_ref(felem out,const widefelem in)517 static void felem_reduce_ref(felem out, const widefelem in)
518 {
519 /*
520 * In order to prevent underflow, we add a multiple of p before subtracting.
521 * Use telescopic sums to represent 2^76 * p redundantly with each limb
522 * of the form 2^124 + ...
523 */
524 static const widelimb two124m68 = (((widelimb) 1) << 124)
525 - (((widelimb) 1) << 68);
526 static const widelimb two124m116m68 = (((widelimb) 1) << 124)
527 - (((widelimb) 1) << 116)
528 - (((widelimb) 1) << 68);
529 static const widelimb two124p108m76 = (((widelimb) 1) << 124)
530 + (((widelimb) 1) << 108)
531 - (((widelimb) 1) << 76);
532 static const widelimb two124m92m68 = (((widelimb) 1) << 124)
533 - (((widelimb) 1) << 92)
534 - (((widelimb) 1) << 68);
535 widelimb temp, acc[9];
536 unsigned int i;
537
538 memcpy(acc, in, sizeof(widelimb) * 9);
539
540 acc[0] += two124p108m76;
541 acc[1] += two124m116m68;
542 acc[2] += two124m92m68;
543 acc[3] += two124m68;
544 acc[4] += two124m68;
545 acc[5] += two124m68;
546 acc[6] += two124m68;
547
548 /* [1]: Eliminate in[9], ..., in[12] */
549 acc[8] += in[12] >> 32;
550 acc[7] += (in[12] & 0xffffffff) << 24;
551 acc[7] += in[12] >> 8;
552 acc[6] += (in[12] & 0xff) << 48;
553 acc[6] -= in[12] >> 16;
554 acc[5] -= (in[12] & 0xffff) << 40;
555 acc[6] += in[12] >> 48;
556 acc[5] += (in[12] & 0xffffffffffff) << 8;
557
558 acc[7] += in[11] >> 32;
559 acc[6] += (in[11] & 0xffffffff) << 24;
560 acc[6] += in[11] >> 8;
561 acc[5] += (in[11] & 0xff) << 48;
562 acc[5] -= in[11] >> 16;
563 acc[4] -= (in[11] & 0xffff) << 40;
564 acc[5] += in[11] >> 48;
565 acc[4] += (in[11] & 0xffffffffffff) << 8;
566
567 acc[6] += in[10] >> 32;
568 acc[5] += (in[10] & 0xffffffff) << 24;
569 acc[5] += in[10] >> 8;
570 acc[4] += (in[10] & 0xff) << 48;
571 acc[4] -= in[10] >> 16;
572 acc[3] -= (in[10] & 0xffff) << 40;
573 acc[4] += in[10] >> 48;
574 acc[3] += (in[10] & 0xffffffffffff) << 8;
575
576 acc[5] += in[9] >> 32;
577 acc[4] += (in[9] & 0xffffffff) << 24;
578 acc[4] += in[9] >> 8;
579 acc[3] += (in[9] & 0xff) << 48;
580 acc[3] -= in[9] >> 16;
581 acc[2] -= (in[9] & 0xffff) << 40;
582 acc[3] += in[9] >> 48;
583 acc[2] += (in[9] & 0xffffffffffff) << 8;
584
585 /*
586 * [2]: Eliminate acc[7], acc[8], that is the 7 and eighth limbs, as
587 * well as the contributions made from eliminating higher limbs.
588 * acc[7] < in[7] + 2^120 + 2^56 < in[7] + 2^121
589 * acc[8] < in[8] + 2^96
590 */
591 acc[4] += acc[8] >> 32;
592 acc[3] += (acc[8] & 0xffffffff) << 24;
593 acc[3] += acc[8] >> 8;
594 acc[2] += (acc[8] & 0xff) << 48;
595 acc[2] -= acc[8] >> 16;
596 acc[1] -= (acc[8] & 0xffff) << 40;
597 acc[2] += acc[8] >> 48;
598 acc[1] += (acc[8] & 0xffffffffffff) << 8;
599
600 acc[3] += acc[7] >> 32;
601 acc[2] += (acc[7] & 0xffffffff) << 24;
602 acc[2] += acc[7] >> 8;
603 acc[1] += (acc[7] & 0xff) << 48;
604 acc[1] -= acc[7] >> 16;
605 acc[0] -= (acc[7] & 0xffff) << 40;
606 acc[1] += acc[7] >> 48;
607 acc[0] += (acc[7] & 0xffffffffffff) << 8;
608
609 /*-
610 * acc[k] < in[k] + 2^124 + 2^121
611 * < in[k] + 2^125
612 * < 2^128, for k <= 6
613 */
614
615 /*
616 * Carry 4 -> 5 -> 6
617 * This has the effect of ensuring that these more significant limbs
618 * will be small in value after eliminating high bits from acc[6].
619 */
620 acc[5] += acc[4] >> 56;
621 acc[4] &= 0x00ffffffffffffff;
622
623 acc[6] += acc[5] >> 56;
624 acc[5] &= 0x00ffffffffffffff;
625
626 /*-
627 * acc[6] < in[6] + 2^124 + 2^121 + 2^72 + 2^16
628 * < in[6] + 2^125
629 * < 2^128
630 */
631
632 /* [3]: Eliminate high bits of acc[6] */
633 temp = acc[6] >> 48;
634 acc[6] &= 0x0000ffffffffffff;
635
636 /* temp < 2^80 */
637
638 acc[3] += temp >> 40;
639 acc[2] += (temp & 0xffffffffff) << 16;
640 acc[2] += temp >> 16;
641 acc[1] += (temp & 0xffff) << 40;
642 acc[1] -= temp >> 24;
643 acc[0] -= (temp & 0xffffff) << 32;
644 acc[0] += temp;
645
646 /*-
647 * acc[k] < acc_old[k] + 2^64 + 2^56
648 * < in[k] + 2^124 + 2^121 + 2^72 + 2^64 + 2^56 + 2^16 , k < 4
649 */
650
651 /* Carry 0 -> 1 -> 2 -> 3 -> 4 -> 5 -> 6 */
652 acc[1] += acc[0] >> 56; /* acc[1] < acc_old[1] + 2^72 */
653 acc[0] &= 0x00ffffffffffffff;
654
655 acc[2] += acc[1] >> 56; /* acc[2] < acc_old[2] + 2^72 + 2^16 */
656 acc[1] &= 0x00ffffffffffffff;
657
658 acc[3] += acc[2] >> 56; /* acc[3] < acc_old[3] + 2^72 + 2^16 */
659 acc[2] &= 0x00ffffffffffffff;
660
661 /*-
662 * acc[k] < acc_old[k] + 2^72 + 2^16
663 * < in[k] + 2^124 + 2^121 + 2^73 + 2^64 + 2^56 + 2^17
664 * < in[k] + 2^125
665 * < 2^128 , k < 4
666 */
667
668 acc[4] += acc[3] >> 56; /*-
669 * acc[4] < acc_old[4] + 2^72 + 2^16
670 * < 2^72 + 2^56 + 2^16
671 */
672 acc[3] &= 0x00ffffffffffffff;
673
674 acc[5] += acc[4] >> 56; /*-
675 * acc[5] < acc_old[5] + 2^16 + 1
676 * < 2^56 + 2^16 + 1
677 */
678 acc[4] &= 0x00ffffffffffffff;
679
680 acc[6] += acc[5] >> 56; /* acc[6] < 2^48 + 1 <= 2^48 */
681 acc[5] &= 0x00ffffffffffffff;
682
683 for (i = 0; i < NLIMBS; i++)
684 out[i] = acc[i];
685 }
686
felem_square_reduce_ref(felem out,const felem in)687 static ossl_inline void felem_square_reduce_ref(felem out, const felem in)
688 {
689 widefelem tmp;
690
691 felem_square_ref(tmp, in);
692 felem_reduce_ref(out, tmp);
693 }
694
felem_mul_reduce_ref(felem out,const felem in1,const felem in2)695 static ossl_inline void felem_mul_reduce_ref(felem out, const felem in1, const felem in2)
696 {
697 widefelem tmp;
698
699 felem_mul_ref(tmp, in1, in2);
700 felem_reduce_ref(out, tmp);
701 }
702
703 #if defined(ECP_NISTP384_ASM)
704 static void felem_square_wrapper(widefelem out, const felem in);
705 static void felem_mul_wrapper(widefelem out, const felem in1, const felem in2);
706
707 static void (*felem_square_p)(widefelem out, const felem in) =
708 felem_square_wrapper;
709 static void (*felem_mul_p)(widefelem out, const felem in1, const felem in2) =
710 felem_mul_wrapper;
711
712 static void (*felem_reduce_p)(felem out, const widefelem in) = felem_reduce_ref;
713
714 static void (*felem_square_reduce_p)(felem out, const felem in) =
715 felem_square_reduce_ref;
716 static void (*felem_mul_reduce_p)(felem out, const felem in1, const felem in2) =
717 felem_mul_reduce_ref;
718
719 void p384_felem_square(widefelem out, const felem in);
720 void p384_felem_mul(widefelem out, const felem in1, const felem in2);
721 void p384_felem_reduce(felem out, const widefelem in);
722
723 void p384_felem_square_reduce(felem out, const felem in);
724 void p384_felem_mul_reduce(felem out, const felem in1, const felem in2);
725
726 # if defined(_ARCH_PPC64)
727 # include "crypto/ppc_arch.h"
728 # endif
729
felem_select(void)730 static void felem_select(void)
731 {
732 # if defined(_ARCH_PPC64)
733 if ((OPENSSL_ppccap_P & PPC_MADD300) && (OPENSSL_ppccap_P & PPC_ALTIVEC)) {
734 felem_square_p = p384_felem_square;
735 felem_mul_p = p384_felem_mul;
736 felem_reduce_p = p384_felem_reduce;
737 felem_square_reduce_p = p384_felem_square_reduce;
738 felem_mul_reduce_p = p384_felem_mul_reduce;
739
740 return;
741 }
742 # endif
743
744 /* Default */
745 felem_square_p = felem_square_ref;
746 felem_mul_p = felem_mul_ref;
747 felem_reduce_p = felem_reduce_ref;
748 felem_square_reduce_p = felem_square_reduce_ref;
749 felem_mul_reduce_p = felem_mul_reduce_ref;
750 }
751
felem_square_wrapper(widefelem out,const felem in)752 static void felem_square_wrapper(widefelem out, const felem in)
753 {
754 felem_select();
755 felem_square_p(out, in);
756 }
757
felem_mul_wrapper(widefelem out,const felem in1,const felem in2)758 static void felem_mul_wrapper(widefelem out, const felem in1, const felem in2)
759 {
760 felem_select();
761 felem_mul_p(out, in1, in2);
762 }
763
764 # define felem_square felem_square_p
765 # define felem_mul felem_mul_p
766 # define felem_reduce felem_reduce_p
767
768 # define felem_square_reduce felem_square_reduce_p
769 # define felem_mul_reduce felem_mul_reduce_p
770 #else
771 # define felem_square felem_square_ref
772 # define felem_mul felem_mul_ref
773 # define felem_reduce felem_reduce_ref
774
775 # define felem_square_reduce felem_square_reduce_ref
776 # define felem_mul_reduce felem_mul_reduce_ref
777 #endif
778
779 /*-
780 * felem_inv calculates |out| = |in|^{-1}
781 *
782 * Based on Fermat's Little Theorem:
783 * a^p = a (mod p)
784 * a^{p-1} = 1 (mod p)
785 * a^{p-2} = a^{-1} (mod p)
786 */
felem_inv(felem out,const felem in)787 static void felem_inv(felem out, const felem in)
788 {
789 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6;
790 unsigned int i = 0;
791
792 felem_square_reduce(ftmp, in); /* 2^1 */
793 felem_mul_reduce(ftmp, ftmp, in); /* 2^1 + 2^0 */
794 felem_assign(ftmp2, ftmp);
795
796 felem_square_reduce(ftmp, ftmp); /* 2^2 + 2^1 */
797 felem_mul_reduce(ftmp, ftmp, in); /* 2^2 + 2^1 * 2^0 */
798 felem_assign(ftmp3, ftmp);
799
800 for (i = 0; i < 3; i++)
801 felem_square_reduce(ftmp, ftmp); /* 2^5 + 2^4 + 2^3 */
802 felem_mul_reduce(ftmp, ftmp3, ftmp); /* 2^5 + 2^4 + 2^3 + 2^2 + 2^1 + 2^0 */
803 felem_assign(ftmp4, ftmp);
804
805 for (i = 0; i < 6; i++)
806 felem_square_reduce(ftmp, ftmp); /* 2^11 + ... + 2^6 */
807 felem_mul_reduce(ftmp, ftmp4, ftmp); /* 2^11 + ... + 2^0 */
808
809 for (i = 0; i < 3; i++)
810 felem_square_reduce(ftmp, ftmp); /* 2^14 + ... + 2^3 */
811 felem_mul_reduce(ftmp, ftmp3, ftmp); /* 2^14 + ... + 2^0 */
812 felem_assign(ftmp5, ftmp);
813
814 for (i = 0; i < 15; i++)
815 felem_square_reduce(ftmp, ftmp); /* 2^29 + ... + 2^15 */
816 felem_mul_reduce(ftmp, ftmp5, ftmp); /* 2^29 + ... + 2^0 */
817 felem_assign(ftmp6, ftmp);
818
819 for (i = 0; i < 30; i++)
820 felem_square_reduce(ftmp, ftmp); /* 2^59 + ... + 2^30 */
821 felem_mul_reduce(ftmp, ftmp6, ftmp); /* 2^59 + ... + 2^0 */
822 felem_assign(ftmp4, ftmp);
823
824 for (i = 0; i < 60; i++)
825 felem_square_reduce(ftmp, ftmp); /* 2^119 + ... + 2^60 */
826 felem_mul_reduce(ftmp, ftmp4, ftmp); /* 2^119 + ... + 2^0 */
827 felem_assign(ftmp4, ftmp);
828
829 for (i = 0; i < 120; i++)
830 felem_square_reduce(ftmp, ftmp); /* 2^239 + ... + 2^120 */
831 felem_mul_reduce(ftmp, ftmp4, ftmp); /* 2^239 + ... + 2^0 */
832
833 for (i = 0; i < 15; i++)
834 felem_square_reduce(ftmp, ftmp); /* 2^254 + ... + 2^15 */
835 felem_mul_reduce(ftmp, ftmp5, ftmp); /* 2^254 + ... + 2^0 */
836
837 for (i = 0; i < 31; i++)
838 felem_square_reduce(ftmp, ftmp); /* 2^285 + ... + 2^31 */
839 felem_mul_reduce(ftmp, ftmp6, ftmp); /* 2^285 + ... + 2^31 + 2^29 + ... + 2^0 */
840
841 for (i = 0; i < 2; i++)
842 felem_square_reduce(ftmp, ftmp); /* 2^287 + ... + 2^33 + 2^31 + ... + 2^2 */
843 felem_mul_reduce(ftmp, ftmp2, ftmp); /* 2^287 + ... + 2^33 + 2^31 + ... + 2^0 */
844
845 for (i = 0; i < 94; i++)
846 felem_square_reduce(ftmp, ftmp); /* 2^381 + ... + 2^127 + 2^125 + ... + 2^94 */
847 felem_mul_reduce(ftmp, ftmp6, ftmp); /* 2^381 + ... + 2^127 + 2^125 + ... + 2^94 + 2^29 + ... + 2^0 */
848
849 for (i = 0; i < 2; i++)
850 felem_square_reduce(ftmp, ftmp); /* 2^383 + ... + 2^129 + 2^127 + ... + 2^96 + 2^31 + ... + 2^2 */
851 felem_mul_reduce(ftmp, in, ftmp); /* 2^383 + ... + 2^129 + 2^127 + ... + 2^96 + 2^31 + ... + 2^2 + 2^0 */
852
853 memcpy(out, ftmp, sizeof(felem));
854 }
855
856 /*
857 * Zero-check: returns a limb with all bits set if |in| == 0 (mod p)
858 * and 0 otherwise. We know that field elements are reduced to
859 * 0 < in < 2p, so we only need to check two cases:
860 * 0 and 2^384 - 2^128 - 2^96 + 2^32 - 1
861 * in[k] < 2^56, k < 6
862 * in[6] <= 2^48
863 */
felem_is_zero(const felem in)864 static limb felem_is_zero(const felem in)
865 {
866 limb zero, p384;
867
868 zero = in[0] | in[1] | in[2] | in[3] | in[4] | in[5] | in[6];
869 zero = ((int64_t) (zero) - 1) >> 63;
870 p384 = (in[0] ^ 0x000000ffffffff) | (in[1] ^ 0xffff0000000000)
871 | (in[2] ^ 0xfffffffffeffff) | (in[3] ^ 0xffffffffffffff)
872 | (in[4] ^ 0xffffffffffffff) | (in[5] ^ 0xffffffffffffff)
873 | (in[6] ^ 0xffffffffffff);
874 p384 = ((int64_t) (p384) - 1) >> 63;
875
876 return (zero | p384);
877 }
878
felem_is_zero_int(const void * in)879 static int felem_is_zero_int(const void *in)
880 {
881 return (int)(felem_is_zero(in) & ((limb) 1));
882 }
883
884 /*-
885 * felem_contract converts |in| to its unique, minimal representation.
886 * Assume we've removed all redundant bits.
887 * On entry:
888 * in[k] < 2^56, k < 6
889 * in[6] <= 2^48
890 */
felem_contract(felem out,const felem in)891 static void felem_contract(felem out, const felem in)
892 {
893 static const int64_t two56 = ((limb) 1) << 56;
894
895 /*
896 * We know for a fact that 0 <= |in| < 2*p, for p = 2^384 - 2^128 - 2^96 + 2^32 - 1
897 * Perform two successive, idempotent subtractions to reduce if |in| >= p.
898 */
899
900 int64_t tmp[NLIMBS], cond[5], a;
901 unsigned int i;
902
903 memcpy(tmp, in, sizeof(felem));
904
905 /* Case 1: a = 1 iff |in| >= 2^384 */
906 a = (in[6] >> 48);
907 tmp[0] += a;
908 tmp[0] -= a << 32;
909 tmp[1] += a << 40;
910 tmp[2] += a << 16;
911 tmp[6] &= 0x0000ffffffffffff;
912
913 /*
914 * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
915 * non-zero, so we only need one step
916 */
917
918 a = tmp[0] >> 63;
919 tmp[0] += a & two56;
920 tmp[1] -= a & 1;
921
922 /* Carry 1 -> 2 -> 3 -> 4 -> 5 -> 6 */
923 tmp[2] += tmp[1] >> 56;
924 tmp[1] &= 0x00ffffffffffffff;
925
926 tmp[3] += tmp[2] >> 56;
927 tmp[2] &= 0x00ffffffffffffff;
928
929 tmp[4] += tmp[3] >> 56;
930 tmp[3] &= 0x00ffffffffffffff;
931
932 tmp[5] += tmp[4] >> 56;
933 tmp[4] &= 0x00ffffffffffffff;
934
935 tmp[6] += tmp[5] >> 56; /* tmp[6] < 2^48 */
936 tmp[5] &= 0x00ffffffffffffff;
937
938 /*
939 * Case 2: a = all ones if p <= |in| < 2^384, 0 otherwise
940 */
941
942 /* 0 iff (2^129..2^383) are all one */
943 cond[0] = ((tmp[6] | 0xff000000000000) & tmp[5] & tmp[4] & tmp[3] & (tmp[2] | 0x0000000001ffff)) + 1;
944 /* 0 iff 2^128 bit is one */
945 cond[1] = (tmp[2] | ~0x00000000010000) + 1;
946 /* 0 iff (2^96..2^127) bits are all one */
947 cond[2] = ((tmp[2] | 0xffffffffff0000) & (tmp[1] | 0x0000ffffffffff)) + 1;
948 /* 0 iff (2^32..2^95) bits are all zero */
949 cond[3] = (tmp[1] & ~0xffff0000000000) | (tmp[0] & ~((int64_t) 0x000000ffffffff));
950 /* 0 iff (2^0..2^31) bits are all one */
951 cond[4] = (tmp[0] | 0xffffff00000000) + 1;
952
953 /*
954 * In effect, invert our conditions, so that 0 values become all 1's,
955 * any non-zero value in the low-order 56 bits becomes all 0's
956 */
957 for (i = 0; i < 5; i++)
958 cond[i] = ((cond[i] & 0x00ffffffffffffff) - 1) >> 63;
959
960 /*
961 * The condition for determining whether in is greater than our
962 * prime is given by the following condition.
963 */
964
965 /* First subtract 2^384 - 2^129 cheaply */
966 a = cond[0] & (cond[1] | (cond[2] & (~cond[3] | cond[4])));
967 tmp[6] &= ~a;
968 tmp[5] &= ~a;
969 tmp[4] &= ~a;
970 tmp[3] &= ~a;
971 tmp[2] &= ~a | 0x0000000001ffff;
972
973 /*
974 * Subtract 2^128 - 2^96 by
975 * means of disjoint cases.
976 */
977
978 /* subtract 2^128 if that bit is present, and add 2^96 */
979 a = cond[0] & cond[1];
980 tmp[2] &= ~a | 0xfffffffffeffff;
981 tmp[1] += a & ((int64_t) 1 << 40);
982
983 /* otherwise, clear bits 2^127 .. 2^96 */
984 a = cond[0] & ~cond[1] & (cond[2] & (~cond[3] | cond[4]));
985 tmp[2] &= ~a | 0xffffffffff0000;
986 tmp[1] &= ~a | 0x0000ffffffffff;
987
988 /* finally, subtract the last 2^32 - 1 */
989 a = cond[0] & (cond[1] | (cond[2] & (~cond[3] | cond[4])));
990 tmp[0] += a & (-((int64_t) 1 << 32) + 1);
991
992 /*
993 * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
994 * non-zero, so we only need one step
995 */
996 a = tmp[0] >> 63;
997 tmp[0] += a & two56;
998 tmp[1] -= a & 1;
999
1000 /* Carry 1 -> 2 -> 3 -> 4 -> 5 -> 6 */
1001 tmp[2] += tmp[1] >> 56;
1002 tmp[1] &= 0x00ffffffffffffff;
1003
1004 tmp[3] += tmp[2] >> 56;
1005 tmp[2] &= 0x00ffffffffffffff;
1006
1007 tmp[4] += tmp[3] >> 56;
1008 tmp[3] &= 0x00ffffffffffffff;
1009
1010 tmp[5] += tmp[4] >> 56;
1011 tmp[4] &= 0x00ffffffffffffff;
1012
1013 tmp[6] += tmp[5] >> 56;
1014 tmp[5] &= 0x00ffffffffffffff;
1015
1016 memcpy(out, tmp, sizeof(felem));
1017 }
1018
1019 /*-
1020 * Group operations
1021 * ----------------
1022 *
1023 * Building on top of the field operations we have the operations on the
1024 * elliptic curve group itself. Points on the curve are represented in Jacobian
1025 * coordinates
1026 */
1027
1028 /*-
1029 * point_double calculates 2*(x_in, y_in, z_in)
1030 *
1031 * The method is taken from:
1032 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1033 *
1034 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
1035 * while x_out == y_in is not (maybe this works, but it's not tested).
1036 */
1037 static void
point_double(felem x_out,felem y_out,felem z_out,const felem x_in,const felem y_in,const felem z_in)1038 point_double(felem x_out, felem y_out, felem z_out,
1039 const felem x_in, const felem y_in, const felem z_in)
1040 {
1041 widefelem tmp, tmp2;
1042 felem delta, gamma, beta, alpha, ftmp, ftmp2;
1043
1044 felem_assign(ftmp, x_in);
1045 felem_assign(ftmp2, x_in);
1046
1047 /* delta = z^2 */
1048 felem_square_reduce(delta, z_in); /* delta[i] < 2^56 */
1049
1050 /* gamma = y^2 */
1051 felem_square_reduce(gamma, y_in); /* gamma[i] < 2^56 */
1052
1053 /* beta = x*gamma */
1054 felem_mul_reduce(beta, x_in, gamma); /* beta[i] < 2^56 */
1055
1056 /* alpha = 3*(x-delta)*(x+delta) */
1057 felem_diff64(ftmp, delta); /* ftmp[i] < 2^60 + 2^58 + 2^44 */
1058 felem_sum64(ftmp2, delta); /* ftmp2[i] < 2^59 */
1059 felem_scalar64(ftmp2, 3); /* ftmp2[i] < 2^61 */
1060 felem_mul_reduce(alpha, ftmp, ftmp2); /* alpha[i] < 2^56 */
1061
1062 /* x' = alpha^2 - 8*beta */
1063 felem_square(tmp, alpha); /* tmp[i] < 2^115 */
1064 felem_assign(ftmp, beta); /* ftmp[i] < 2^56 */
1065 felem_scalar64(ftmp, 8); /* ftmp[i] < 2^59 */
1066 felem_diff_128_64(tmp, ftmp); /* tmp[i] < 2^115 + 2^64 + 2^48 */
1067 felem_reduce(x_out, tmp); /* x_out[i] < 2^56 */
1068
1069 /* z' = (y + z)^2 - gamma - delta */
1070 felem_sum64(delta, gamma); /* delta[i] < 2^57 */
1071 felem_assign(ftmp, y_in); /* ftmp[i] < 2^56 */
1072 felem_sum64(ftmp, z_in); /* ftmp[i] < 2^56 */
1073 felem_square(tmp, ftmp); /* tmp[i] < 2^115 */
1074 felem_diff_128_64(tmp, delta); /* tmp[i] < 2^115 + 2^64 + 2^48 */
1075 felem_reduce(z_out, tmp); /* z_out[i] < 2^56 */
1076
1077 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1078 felem_scalar64(beta, 4); /* beta[i] < 2^58 */
1079 felem_diff64(beta, x_out); /* beta[i] < 2^60 + 2^58 + 2^44 */
1080 felem_mul(tmp, alpha, beta); /* tmp[i] < 2^119 */
1081 felem_square(tmp2, gamma); /* tmp2[i] < 2^115 */
1082 felem_scalar128(tmp2, 8); /* tmp2[i] < 2^118 */
1083 felem_diff128(tmp, tmp2); /* tmp[i] < 2^127 + 2^119 + 2^111 */
1084 felem_reduce(y_out, tmp); /* tmp[i] < 2^56 */
1085 }
1086
1087 /* copy_conditional copies in to out iff mask is all ones. */
copy_conditional(felem out,const felem in,limb mask)1088 static void copy_conditional(felem out, const felem in, limb mask)
1089 {
1090 unsigned int i;
1091
1092 for (i = 0; i < NLIMBS; i++)
1093 out[i] ^= mask & (in[i] ^ out[i]);
1094 }
1095
1096 /*-
1097 * point_add calculates (x1, y1, z1) + (x2, y2, z2)
1098 *
1099 * The method is taken from
1100 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1101 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1102 *
1103 * This function includes a branch for checking whether the two input points
1104 * are equal (while not equal to the point at infinity). See comment below
1105 * on constant-time.
1106 */
point_add(felem x3,felem y3,felem z3,const felem x1,const felem y1,const felem z1,const int mixed,const felem x2,const felem y2,const felem z2)1107 static void point_add(felem x3, felem y3, felem z3,
1108 const felem x1, const felem y1, const felem z1,
1109 const int mixed, const felem x2, const felem y2,
1110 const felem z2)
1111 {
1112 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1113 widefelem tmp, tmp2;
1114 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1115 limb points_equal;
1116
1117 z1_is_zero = felem_is_zero(z1);
1118 z2_is_zero = felem_is_zero(z2);
1119
1120 /* ftmp = z1z1 = z1**2 */
1121 felem_square_reduce(ftmp, z1); /* ftmp[i] < 2^56 */
1122
1123 if (!mixed) {
1124 /* ftmp2 = z2z2 = z2**2 */
1125 felem_square_reduce(ftmp2, z2); /* ftmp2[i] < 2^56 */
1126
1127 /* u1 = ftmp3 = x1*z2z2 */
1128 felem_mul_reduce(ftmp3, x1, ftmp2); /* ftmp3[i] < 2^56 */
1129
1130 /* ftmp5 = z1 + z2 */
1131 felem_assign(ftmp5, z1); /* ftmp5[i] < 2^56 */
1132 felem_sum64(ftmp5, z2); /* ftmp5[i] < 2^57 */
1133
1134 /* ftmp5 = (z1 + z2)**2 - z1z1 - z2z2 = 2*z1z2 */
1135 felem_square(tmp, ftmp5); /* tmp[i] < 2^117 */
1136 felem_diff_128_64(tmp, ftmp); /* tmp[i] < 2^117 + 2^64 + 2^48 */
1137 felem_diff_128_64(tmp, ftmp2); /* tmp[i] < 2^117 + 2^65 + 2^49 */
1138 felem_reduce(ftmp5, tmp); /* ftmp5[i] < 2^56 */
1139
1140 /* ftmp2 = z2 * z2z2 */
1141 felem_mul_reduce(ftmp2, ftmp2, z2); /* ftmp2[i] < 2^56 */
1142
1143 /* s1 = ftmp6 = y1 * z2**3 */
1144 felem_mul_reduce(ftmp6, y1, ftmp2); /* ftmp6[i] < 2^56 */
1145 } else {
1146 /*
1147 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
1148 */
1149
1150 /* u1 = ftmp3 = x1*z2z2 */
1151 felem_assign(ftmp3, x1); /* ftmp3[i] < 2^56 */
1152
1153 /* ftmp5 = 2*z1z2 */
1154 felem_scalar(ftmp5, z1, 2); /* ftmp5[i] < 2^57 */
1155
1156 /* s1 = ftmp6 = y1 * z2**3 */
1157 felem_assign(ftmp6, y1); /* ftmp6[i] < 2^56 */
1158 }
1159 /* ftmp3[i] < 2^56, ftmp5[i] < 2^57, ftmp6[i] < 2^56 */
1160
1161 /* u2 = x2*z1z1 */
1162 felem_mul(tmp, x2, ftmp); /* tmp[i] < 2^115 */
1163
1164 /* h = ftmp4 = u2 - u1 */
1165 felem_diff_128_64(tmp, ftmp3); /* tmp[i] < 2^115 + 2^64 + 2^48 */
1166 felem_reduce(ftmp4, tmp); /* ftmp[4] < 2^56 */
1167
1168 x_equal = felem_is_zero(ftmp4);
1169
1170 /* z_out = ftmp5 * h */
1171 felem_mul_reduce(z_out, ftmp5, ftmp4); /* z_out[i] < 2^56 */
1172
1173 /* ftmp = z1 * z1z1 */
1174 felem_mul_reduce(ftmp, ftmp, z1); /* ftmp[i] < 2^56 */
1175
1176 /* s2 = tmp = y2 * z1**3 */
1177 felem_mul(tmp, y2, ftmp); /* tmp[i] < 2^115 */
1178
1179 /* r = ftmp5 = (s2 - s1)*2 */
1180 felem_diff_128_64(tmp, ftmp6); /* tmp[i] < 2^115 + 2^64 + 2^48 */
1181 felem_reduce(ftmp5, tmp); /* ftmp5[i] < 2^56 */
1182 y_equal = felem_is_zero(ftmp5);
1183 felem_scalar64(ftmp5, 2); /* ftmp5[i] < 2^57 */
1184
1185 /*
1186 * The formulae are incorrect if the points are equal, in affine coordinates
1187 * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
1188 * happens.
1189 *
1190 * We use bitwise operations to avoid potential side-channels introduced by
1191 * the short-circuiting behaviour of boolean operators.
1192 *
1193 * The special case of either point being the point at infinity (z1 and/or
1194 * z2 are zero), is handled separately later on in this function, so we
1195 * avoid jumping to point_double here in those special cases.
1196 *
1197 * Notice the comment below on the implications of this branching for timing
1198 * leaks and why it is considered practically irrelevant.
1199 */
1200 points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero));
1201
1202 if (points_equal) {
1203 /*
1204 * This is obviously not constant-time but it will almost-never happen
1205 * for ECDH / ECDSA.
1206 */
1207 point_double(x3, y3, z3, x1, y1, z1);
1208 return;
1209 }
1210
1211 /* I = ftmp = (2h)**2 */
1212 felem_assign(ftmp, ftmp4); /* ftmp[i] < 2^56 */
1213 felem_scalar64(ftmp, 2); /* ftmp[i] < 2^57 */
1214 felem_square_reduce(ftmp, ftmp); /* ftmp[i] < 2^56 */
1215
1216 /* J = ftmp2 = h * I */
1217 felem_mul_reduce(ftmp2, ftmp4, ftmp); /* ftmp2[i] < 2^56 */
1218
1219 /* V = ftmp4 = U1 * I */
1220 felem_mul_reduce(ftmp4, ftmp3, ftmp); /* ftmp4[i] < 2^56 */
1221
1222 /* x_out = r**2 - J - 2V */
1223 felem_square(tmp, ftmp5); /* tmp[i] < 2^117 */
1224 felem_diff_128_64(tmp, ftmp2); /* tmp[i] < 2^117 + 2^64 + 2^48 */
1225 felem_assign(ftmp3, ftmp4); /* ftmp3[i] < 2^56 */
1226 felem_scalar64(ftmp4, 2); /* ftmp4[i] < 2^57 */
1227 felem_diff_128_64(tmp, ftmp4); /* tmp[i] < 2^117 + 2^65 + 2^49 */
1228 felem_reduce(x_out, tmp); /* x_out[i] < 2^56 */
1229
1230 /* y_out = r(V-x_out) - 2 * s1 * J */
1231 felem_diff64(ftmp3, x_out); /* ftmp3[i] < 2^60 + 2^56 + 2^44 */
1232 felem_mul(tmp, ftmp5, ftmp3); /* tmp[i] < 2^116 */
1233 felem_mul(tmp2, ftmp6, ftmp2); /* tmp2[i] < 2^115 */
1234 felem_scalar128(tmp2, 2); /* tmp2[i] < 2^116 */
1235 felem_diff128(tmp, tmp2); /* tmp[i] < 2^127 + 2^116 + 2^111 */
1236 felem_reduce(y_out, tmp); /* y_out[i] < 2^56 */
1237
1238 copy_conditional(x_out, x2, z1_is_zero);
1239 copy_conditional(x_out, x1, z2_is_zero);
1240 copy_conditional(y_out, y2, z1_is_zero);
1241 copy_conditional(y_out, y1, z2_is_zero);
1242 copy_conditional(z_out, z2, z1_is_zero);
1243 copy_conditional(z_out, z1, z2_is_zero);
1244 felem_assign(x3, x_out);
1245 felem_assign(y3, y_out);
1246 felem_assign(z3, z_out);
1247 }
1248
1249 /*-
1250 * Base point pre computation
1251 * --------------------------
1252 *
1253 * Two different sorts of precomputed tables are used in the following code.
1254 * Each contain various points on the curve, where each point is three field
1255 * elements (x, y, z).
1256 *
1257 * For the base point table, z is usually 1 (0 for the point at infinity).
1258 * This table has 16 elements:
1259 * index | bits | point
1260 * ------+---------+------------------------------
1261 * 0 | 0 0 0 0 | 0G
1262 * 1 | 0 0 0 1 | 1G
1263 * 2 | 0 0 1 0 | 2^95G
1264 * 3 | 0 0 1 1 | (2^95 + 1)G
1265 * 4 | 0 1 0 0 | 2^190G
1266 * 5 | 0 1 0 1 | (2^190 + 1)G
1267 * 6 | 0 1 1 0 | (2^190 + 2^95)G
1268 * 7 | 0 1 1 1 | (2^190 + 2^95 + 1)G
1269 * 8 | 1 0 0 0 | 2^285G
1270 * 9 | 1 0 0 1 | (2^285 + 1)G
1271 * 10 | 1 0 1 0 | (2^285 + 2^95)G
1272 * 11 | 1 0 1 1 | (2^285 + 2^95 + 1)G
1273 * 12 | 1 1 0 0 | (2^285 + 2^190)G
1274 * 13 | 1 1 0 1 | (2^285 + 2^190 + 1)G
1275 * 14 | 1 1 1 0 | (2^285 + 2^190 + 2^95)G
1276 * 15 | 1 1 1 1 | (2^285 + 2^190 + 2^95 + 1)G
1277 *
1278 * The reason for this is so that we can clock bits into four different
1279 * locations when doing simple scalar multiplies against the base point.
1280 *
1281 * Tables for other points have table[i] = iG for i in 0 .. 16.
1282 */
1283
1284 /* gmul is the table of precomputed base points */
1285 static const felem gmul[16][3] = {
1286 {{0, 0, 0, 0, 0, 0, 0},
1287 {0, 0, 0, 0, 0, 0, 0},
1288 {0, 0, 0, 0, 0, 0, 0}},
1289 {{0x00545e3872760ab7, 0x00f25dbf55296c3a, 0x00e082542a385502, 0x008ba79b9859f741,
1290 0x0020ad746e1d3b62, 0x0005378eb1c71ef3, 0x0000aa87ca22be8b},
1291 {0x00431d7c90ea0e5f, 0x00b1ce1d7e819d7a, 0x0013b5f0b8c00a60, 0x00289a147ce9da31,
1292 0x0092dc29f8f41dbd, 0x002c6f5d9e98bf92, 0x00003617de4a9626},
1293 {1, 0, 0, 0, 0, 0, 0}},
1294 {{0x00024711cc902a90, 0x00acb2e579ab4fe1, 0x00af818a4b4d57b1, 0x00a17c7bec49c3de,
1295 0x004280482d726a8b, 0x00128dd0f0a90f3b, 0x00004387c1c3fa3c},
1296 {0x002ce76543cf5c3a, 0x00de6cee5ef58f0a, 0x00403e42fa561ca6, 0x00bc54d6f9cb9731,
1297 0x007155f925fb4ff1, 0x004a9ce731b7b9bc, 0x00002609076bd7b2},
1298 {1, 0, 0, 0, 0, 0, 0}},
1299 {{0x00e74c9182f0251d, 0x0039bf54bb111974, 0x00b9d2f2eec511d2, 0x0036b1594eb3a6a4,
1300 0x00ac3bb82d9d564b, 0x00f9313f4615a100, 0x00006716a9a91b10},
1301 {0x0046698116e2f15c, 0x00f34347067d3d33, 0x008de4ccfdebd002, 0x00e838c6b8e8c97b,
1302 0x006faf0798def346, 0x007349794a57563c, 0x00002629e7e6ad84},
1303 {1, 0, 0, 0, 0, 0, 0}},
1304 {{0x0075300e34fd163b, 0x0092e9db4e8d0ad3, 0x00254be9f625f760, 0x00512c518c72ae68,
1305 0x009bfcf162bede5a, 0x00bf9341566ce311, 0x0000cd6175bd41cf},
1306 {0x007dfe52af4ac70f, 0x0002159d2d5c4880, 0x00b504d16f0af8d0, 0x0014585e11f5e64c,
1307 0x0089c6388e030967, 0x00ffb270cbfa5f71, 0x00009a15d92c3947},
1308 {1, 0, 0, 0, 0, 0, 0}},
1309 {{0x0033fc1278dc4fe5, 0x00d53088c2caa043, 0x0085558827e2db66, 0x00c192bef387b736,
1310 0x00df6405a2225f2c, 0x0075205aa90fd91a, 0x0000137e3f12349d},
1311 {0x00ce5b115efcb07e, 0x00abc3308410deeb, 0x005dc6fc1de39904, 0x00907c1c496f36b4,
1312 0x0008e6ad3926cbe1, 0x00110747b787928c, 0x0000021b9162eb7e},
1313 {1, 0, 0, 0, 0, 0, 0}},
1314 {{0x008180042cfa26e1, 0x007b826a96254967, 0x0082473694d6b194, 0x007bd6880a45b589,
1315 0x00c0a5097072d1a3, 0x0019186555e18b4e, 0x000020278190e5ca},
1316 {0x00b4bef17de61ac0, 0x009535e3c38ed348, 0x002d4aa8e468ceab, 0x00ef40b431036ad3,
1317 0x00defd52f4542857, 0x0086edbf98234266, 0x00002025b3a7814d},
1318 {1, 0, 0, 0, 0, 0, 0}},
1319 {{0x00b238aa97b886be, 0x00ef3192d6dd3a32, 0x0079f9e01fd62df8, 0x00742e890daba6c5,
1320 0x008e5289144408ce, 0x0073bbcc8e0171a5, 0x0000c4fd329d3b52},
1321 {0x00c6f64a15ee23e7, 0x00dcfb7b171cad8b, 0x00039f6cbd805867, 0x00de024e428d4562,
1322 0x00be6a594d7c64c5, 0x0078467b70dbcd64, 0x0000251f2ed7079b},
1323 {1, 0, 0, 0, 0, 0, 0}},
1324 {{0x000e5cc25fc4b872, 0x005ebf10d31ef4e1, 0x0061e0ebd11e8256, 0x0076e026096f5a27,
1325 0x0013e6fc44662e9a, 0x0042b00289d3597e, 0x000024f089170d88},
1326 {0x001604d7e0effbe6, 0x0048d77cba64ec2c, 0x008166b16da19e36, 0x006b0d1a0f28c088,
1327 0x000259fcd47754fd, 0x00cc643e4d725f9a, 0x00007b10f3c79c14},
1328 {1, 0, 0, 0, 0, 0, 0}},
1329 {{0x00430155e3b908af, 0x00b801e4fec25226, 0x00b0d4bcfe806d26, 0x009fc4014eb13d37,
1330 0x0066c94e44ec07e8, 0x00d16adc03874ba2, 0x000030c917a0d2a7},
1331 {0x00edac9e21eb891c, 0x00ef0fb768102eff, 0x00c088cef272a5f3, 0x00cbf782134e2964,
1332 0x0001044a7ba9a0e3, 0x00e363f5b194cf3c, 0x00009ce85249e372},
1333 {1, 0, 0, 0, 0, 0, 0}},
1334 {{0x001dd492dda5a7eb, 0x008fd577be539fd1, 0x002ff4b25a5fc3f1, 0x0074a8a1b64df72f,
1335 0x002ba3d8c204a76c, 0x009d5cff95c8235a, 0x0000e014b9406e0f},
1336 {0x008c2e4dbfc98aba, 0x00f30bb89f1a1436, 0x00b46f7aea3e259c, 0x009224454ac02f54,
1337 0x00906401f5645fa2, 0x003a1d1940eabc77, 0x00007c9351d680e6},
1338 {1, 0, 0, 0, 0, 0, 0}},
1339 {{0x005a35d872ef967c, 0x0049f1b7884e1987, 0x0059d46d7e31f552, 0x00ceb4869d2d0fb6,
1340 0x00e8e89eee56802a, 0x0049d806a774aaf2, 0x0000147e2af0ae24},
1341 {0x005fd1bd852c6e5e, 0x00b674b7b3de6885, 0x003b9ea5eb9b6c08, 0x005c9f03babf3ef7,
1342 0x00605337fecab3c7, 0x009a3f85b11bbcc8, 0x0000455470f330ec},
1343 {1, 0, 0, 0, 0, 0, 0}},
1344 {{0x002197ff4d55498d, 0x00383e8916c2d8af, 0x00eb203f34d1c6d2, 0x0080367cbd11b542,
1345 0x00769b3be864e4f5, 0x0081a8458521c7bb, 0x0000c531b34d3539},
1346 {0x00e2a3d775fa2e13, 0x00534fc379573844, 0x00ff237d2a8db54a, 0x00d301b2335a8882,
1347 0x000f75ea96103a80, 0x0018fecb3cdd96fa, 0x0000304bf61e94eb},
1348 {1, 0, 0, 0, 0, 0, 0}},
1349 {{0x00b2afc332a73dbd, 0x0029a0d5bb007bc5, 0x002d628eb210f577, 0x009f59a36dd05f50,
1350 0x006d339de4eca613, 0x00c75a71addc86bc, 0x000060384c5ea93c},
1351 {0x00aa9641c32a30b4, 0x00cc73ae8cce565d, 0x00ec911a4df07f61, 0x00aa4b762ea4b264,
1352 0x0096d395bb393629, 0x004efacfb7632fe0, 0x00006f252f46fa3f},
1353 {1, 0, 0, 0, 0, 0, 0}},
1354 {{0x00567eec597c7af6, 0x0059ba6795204413, 0x00816d4e6f01196f, 0x004ae6b3eb57951d,
1355 0x00420f5abdda2108, 0x003401d1f57ca9d9, 0x0000cf5837b0b67a},
1356 {0x00eaa64b8aeeabf9, 0x00246ddf16bcb4de, 0x000e7e3c3aecd751, 0x0008449f04fed72e,
1357 0x00307b67ccf09183, 0x0017108c3556b7b1, 0x0000229b2483b3bf},
1358 {1, 0, 0, 0, 0, 0, 0}},
1359 {{0x00e7c491a7bb78a1, 0x00eafddd1d3049ab, 0x00352c05e2bc7c98, 0x003d6880c165fa5c,
1360 0x00b6ac61cc11c97d, 0x00beeb54fcf90ce5, 0x0000dc1f0b455edc},
1361 {0x002db2e7aee34d60, 0x0073b5f415a2d8c0, 0x00dd84e4193e9a0c, 0x00d02d873467c572,
1362 0x0018baaeda60aee5, 0x0013fb11d697c61e, 0x000083aafcc3a973},
1363 {1, 0, 0, 0, 0, 0, 0}}
1364 };
1365
1366 /*
1367 * select_point selects the |idx|th point from a precomputation table and
1368 * copies it to out.
1369 *
1370 * pre_comp below is of the size provided in |size|.
1371 */
select_point(const limb idx,unsigned int size,const felem pre_comp[][3],felem out[3])1372 static void select_point(const limb idx, unsigned int size,
1373 const felem pre_comp[][3], felem out[3])
1374 {
1375 unsigned int i, j;
1376 limb *outlimbs = &out[0][0];
1377
1378 memset(out, 0, sizeof(*out) * 3);
1379
1380 for (i = 0; i < size; i++) {
1381 const limb *inlimbs = &pre_comp[i][0][0];
1382 limb mask = i ^ idx;
1383
1384 mask |= mask >> 4;
1385 mask |= mask >> 2;
1386 mask |= mask >> 1;
1387 mask &= 1;
1388 mask--;
1389 for (j = 0; j < NLIMBS * 3; j++)
1390 outlimbs[j] |= inlimbs[j] & mask;
1391 }
1392 }
1393
1394 /* get_bit returns the |i|th bit in |in| */
get_bit(const felem_bytearray in,int i)1395 static char get_bit(const felem_bytearray in, int i)
1396 {
1397 if (i < 0 || i >= 384)
1398 return 0;
1399 return (in[i >> 3] >> (i & 7)) & 1;
1400 }
1401
1402 /*
1403 * Interleaved point multiplication using precomputed point multiples: The
1404 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1405 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1406 * generator, using certain (large) precomputed multiples in g_pre_comp.
1407 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1408 */
batch_mul(felem x_out,felem y_out,felem z_out,const felem_bytearray scalars[],const unsigned int num_points,const u8 * g_scalar,const int mixed,const felem pre_comp[][17][3],const felem g_pre_comp[16][3])1409 static void batch_mul(felem x_out, felem y_out, felem z_out,
1410 const felem_bytearray scalars[],
1411 const unsigned int num_points, const u8 *g_scalar,
1412 const int mixed, const felem pre_comp[][17][3],
1413 const felem g_pre_comp[16][3])
1414 {
1415 int i, skip;
1416 unsigned int num, gen_mul = (g_scalar != NULL);
1417 felem nq[3], tmp[4];
1418 limb bits;
1419 u8 sign, digit;
1420
1421 /* set nq to the point at infinity */
1422 memset(nq, 0, sizeof(nq));
1423
1424 /*
1425 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1426 * of the generator (last quarter of rounds) and additions of other
1427 * points multiples (every 5th round).
1428 */
1429 skip = 1; /* save two point operations in the first
1430 * round */
1431 for (i = (num_points ? 380 : 98); i >= 0; --i) {
1432 /* double */
1433 if (!skip)
1434 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1435
1436 /* add multiples of the generator */
1437 if (gen_mul && (i <= 98)) {
1438 bits = get_bit(g_scalar, i + 285) << 3;
1439 if (i < 95) {
1440 bits |= get_bit(g_scalar, i + 190) << 2;
1441 bits |= get_bit(g_scalar, i + 95) << 1;
1442 bits |= get_bit(g_scalar, i);
1443 }
1444 /* select the point to add, in constant time */
1445 select_point(bits, 16, g_pre_comp, tmp);
1446 if (!skip) {
1447 /* The 1 argument below is for "mixed" */
1448 point_add(nq[0], nq[1], nq[2],
1449 nq[0], nq[1], nq[2], 1,
1450 tmp[0], tmp[1], tmp[2]);
1451 } else {
1452 memcpy(nq, tmp, 3 * sizeof(felem));
1453 skip = 0;
1454 }
1455 }
1456
1457 /* do other additions every 5 doublings */
1458 if (num_points && (i % 5 == 0)) {
1459 /* loop over all scalars */
1460 for (num = 0; num < num_points; ++num) {
1461 bits = get_bit(scalars[num], i + 4) << 5;
1462 bits |= get_bit(scalars[num], i + 3) << 4;
1463 bits |= get_bit(scalars[num], i + 2) << 3;
1464 bits |= get_bit(scalars[num], i + 1) << 2;
1465 bits |= get_bit(scalars[num], i) << 1;
1466 bits |= get_bit(scalars[num], i - 1);
1467 ossl_ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1468
1469 /*
1470 * select the point to add or subtract, in constant time
1471 */
1472 select_point(digit, 17, pre_comp[num], tmp);
1473 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1474 * point */
1475 copy_conditional(tmp[1], tmp[3], (-(limb) sign));
1476
1477 if (!skip) {
1478 point_add(nq[0], nq[1], nq[2],
1479 nq[0], nq[1], nq[2], mixed,
1480 tmp[0], tmp[1], tmp[2]);
1481 } else {
1482 memcpy(nq, tmp, 3 * sizeof(felem));
1483 skip = 0;
1484 }
1485 }
1486 }
1487 }
1488 felem_assign(x_out, nq[0]);
1489 felem_assign(y_out, nq[1]);
1490 felem_assign(z_out, nq[2]);
1491 }
1492
1493 /* Precomputation for the group generator. */
1494 struct nistp384_pre_comp_st {
1495 felem g_pre_comp[16][3];
1496 CRYPTO_REF_COUNT references;
1497 };
1498
ossl_ec_GFp_nistp384_method(void)1499 const EC_METHOD *ossl_ec_GFp_nistp384_method(void)
1500 {
1501 static const EC_METHOD ret = {
1502 EC_FLAGS_DEFAULT_OCT,
1503 NID_X9_62_prime_field,
1504 ossl_ec_GFp_nistp384_group_init,
1505 ossl_ec_GFp_simple_group_finish,
1506 ossl_ec_GFp_simple_group_clear_finish,
1507 ossl_ec_GFp_nist_group_copy,
1508 ossl_ec_GFp_nistp384_group_set_curve,
1509 ossl_ec_GFp_simple_group_get_curve,
1510 ossl_ec_GFp_simple_group_get_degree,
1511 ossl_ec_group_simple_order_bits,
1512 ossl_ec_GFp_simple_group_check_discriminant,
1513 ossl_ec_GFp_simple_point_init,
1514 ossl_ec_GFp_simple_point_finish,
1515 ossl_ec_GFp_simple_point_clear_finish,
1516 ossl_ec_GFp_simple_point_copy,
1517 ossl_ec_GFp_simple_point_set_to_infinity,
1518 ossl_ec_GFp_simple_point_set_affine_coordinates,
1519 ossl_ec_GFp_nistp384_point_get_affine_coordinates,
1520 0, /* point_set_compressed_coordinates */
1521 0, /* point2oct */
1522 0, /* oct2point */
1523 ossl_ec_GFp_simple_add,
1524 ossl_ec_GFp_simple_dbl,
1525 ossl_ec_GFp_simple_invert,
1526 ossl_ec_GFp_simple_is_at_infinity,
1527 ossl_ec_GFp_simple_is_on_curve,
1528 ossl_ec_GFp_simple_cmp,
1529 ossl_ec_GFp_simple_make_affine,
1530 ossl_ec_GFp_simple_points_make_affine,
1531 ossl_ec_GFp_nistp384_points_mul,
1532 ossl_ec_GFp_nistp384_precompute_mult,
1533 ossl_ec_GFp_nistp384_have_precompute_mult,
1534 ossl_ec_GFp_nist_field_mul,
1535 ossl_ec_GFp_nist_field_sqr,
1536 0, /* field_div */
1537 ossl_ec_GFp_simple_field_inv,
1538 0, /* field_encode */
1539 0, /* field_decode */
1540 0, /* field_set_to_one */
1541 ossl_ec_key_simple_priv2oct,
1542 ossl_ec_key_simple_oct2priv,
1543 0, /* set private */
1544 ossl_ec_key_simple_generate_key,
1545 ossl_ec_key_simple_check_key,
1546 ossl_ec_key_simple_generate_public_key,
1547 0, /* keycopy */
1548 0, /* keyfinish */
1549 ossl_ecdh_simple_compute_key,
1550 ossl_ecdsa_simple_sign_setup,
1551 ossl_ecdsa_simple_sign_sig,
1552 ossl_ecdsa_simple_verify_sig,
1553 0, /* field_inverse_mod_ord */
1554 0, /* blind_coordinates */
1555 0, /* ladder_pre */
1556 0, /* ladder_step */
1557 0 /* ladder_post */
1558 };
1559
1560 return &ret;
1561 }
1562
1563 /******************************************************************************/
1564 /*
1565 * FUNCTIONS TO MANAGE PRECOMPUTATION
1566 */
1567
nistp384_pre_comp_new(void)1568 static NISTP384_PRE_COMP *nistp384_pre_comp_new(void)
1569 {
1570 NISTP384_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1571
1572 if (ret == NULL)
1573 return ret;
1574
1575 if (!CRYPTO_NEW_REF(&ret->references, 1)) {
1576 OPENSSL_free(ret);
1577 return NULL;
1578 }
1579 return ret;
1580 }
1581
ossl_ec_nistp384_pre_comp_dup(NISTP384_PRE_COMP * p)1582 NISTP384_PRE_COMP *ossl_ec_nistp384_pre_comp_dup(NISTP384_PRE_COMP *p)
1583 {
1584 int i;
1585
1586 if (p != NULL)
1587 CRYPTO_UP_REF(&p->references, &i);
1588 return p;
1589 }
1590
ossl_ec_nistp384_pre_comp_free(NISTP384_PRE_COMP * p)1591 void ossl_ec_nistp384_pre_comp_free(NISTP384_PRE_COMP *p)
1592 {
1593 int i;
1594
1595 if (p == NULL)
1596 return;
1597
1598 CRYPTO_DOWN_REF(&p->references, &i);
1599 REF_PRINT_COUNT("ossl_ec_nistp384", i, p);
1600 if (i > 0)
1601 return;
1602 REF_ASSERT_ISNT(i < 0);
1603
1604 CRYPTO_FREE_REF(&p->references);
1605 OPENSSL_free(p);
1606 }
1607
1608 /******************************************************************************/
1609 /*
1610 * OPENSSL EC_METHOD FUNCTIONS
1611 */
1612
ossl_ec_GFp_nistp384_group_init(EC_GROUP * group)1613 int ossl_ec_GFp_nistp384_group_init(EC_GROUP *group)
1614 {
1615 int ret;
1616
1617 ret = ossl_ec_GFp_simple_group_init(group);
1618 group->a_is_minus3 = 1;
1619 return ret;
1620 }
1621
ossl_ec_GFp_nistp384_group_set_curve(EC_GROUP * group,const BIGNUM * p,const BIGNUM * a,const BIGNUM * b,BN_CTX * ctx)1622 int ossl_ec_GFp_nistp384_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1623 const BIGNUM *a, const BIGNUM *b,
1624 BN_CTX *ctx)
1625 {
1626 int ret = 0;
1627 BIGNUM *curve_p, *curve_a, *curve_b;
1628 #ifndef FIPS_MODULE
1629 BN_CTX *new_ctx = NULL;
1630
1631 if (ctx == NULL)
1632 ctx = new_ctx = BN_CTX_new();
1633 #endif
1634 if (ctx == NULL)
1635 return 0;
1636
1637 BN_CTX_start(ctx);
1638 curve_p = BN_CTX_get(ctx);
1639 curve_a = BN_CTX_get(ctx);
1640 curve_b = BN_CTX_get(ctx);
1641 if (curve_b == NULL)
1642 goto err;
1643 BN_bin2bn(nistp384_curve_params[0], sizeof(felem_bytearray), curve_p);
1644 BN_bin2bn(nistp384_curve_params[1], sizeof(felem_bytearray), curve_a);
1645 BN_bin2bn(nistp384_curve_params[2], sizeof(felem_bytearray), curve_b);
1646 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1647 ERR_raise(ERR_LIB_EC, EC_R_WRONG_CURVE_PARAMETERS);
1648 goto err;
1649 }
1650 group->field_mod_func = BN_nist_mod_384;
1651 ret = ossl_ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1652 err:
1653 BN_CTX_end(ctx);
1654 #ifndef FIPS_MODULE
1655 BN_CTX_free(new_ctx);
1656 #endif
1657 return ret;
1658 }
1659
1660 /*
1661 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1662 * (X/Z^2, Y/Z^3)
1663 */
ossl_ec_GFp_nistp384_point_get_affine_coordinates(const EC_GROUP * group,const EC_POINT * point,BIGNUM * x,BIGNUM * y,BN_CTX * ctx)1664 int ossl_ec_GFp_nistp384_point_get_affine_coordinates(const EC_GROUP *group,
1665 const EC_POINT *point,
1666 BIGNUM *x, BIGNUM *y,
1667 BN_CTX *ctx)
1668 {
1669 felem z1, z2, x_in, y_in, x_out, y_out;
1670 widefelem tmp;
1671
1672 if (EC_POINT_is_at_infinity(group, point)) {
1673 ERR_raise(ERR_LIB_EC, EC_R_POINT_AT_INFINITY);
1674 return 0;
1675 }
1676 if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1677 (!BN_to_felem(z1, point->Z)))
1678 return 0;
1679 felem_inv(z2, z1);
1680 felem_square(tmp, z2);
1681 felem_reduce(z1, tmp);
1682 felem_mul(tmp, x_in, z1);
1683 felem_reduce(x_in, tmp);
1684 felem_contract(x_out, x_in);
1685 if (x != NULL) {
1686 if (!felem_to_BN(x, x_out)) {
1687 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1688 return 0;
1689 }
1690 }
1691 felem_mul(tmp, z1, z2);
1692 felem_reduce(z1, tmp);
1693 felem_mul(tmp, y_in, z1);
1694 felem_reduce(y_in, tmp);
1695 felem_contract(y_out, y_in);
1696 if (y != NULL) {
1697 if (!felem_to_BN(y, y_out)) {
1698 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1699 return 0;
1700 }
1701 }
1702 return 1;
1703 }
1704
1705 /* points below is of size |num|, and tmp_felems is of size |num+1/ */
make_points_affine(size_t num,felem points[][3],felem tmp_felems[])1706 static void make_points_affine(size_t num, felem points[][3],
1707 felem tmp_felems[])
1708 {
1709 /*
1710 * Runs in constant time, unless an input is the point at infinity (which
1711 * normally shouldn't happen).
1712 */
1713 ossl_ec_GFp_nistp_points_make_affine_internal(num,
1714 points,
1715 sizeof(felem),
1716 tmp_felems,
1717 (void (*)(void *))felem_one,
1718 felem_is_zero_int,
1719 (void (*)(void *, const void *))
1720 felem_assign,
1721 (void (*)(void *, const void *))
1722 felem_square_reduce,
1723 (void (*)(void *, const void *, const void*))
1724 felem_mul_reduce,
1725 (void (*)(void *, const void *))
1726 felem_inv,
1727 (void (*)(void *, const void *))
1728 felem_contract);
1729 }
1730
1731 /*
1732 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1733 * values Result is stored in r (r can equal one of the inputs).
1734 */
ossl_ec_GFp_nistp384_points_mul(const EC_GROUP * group,EC_POINT * r,const BIGNUM * scalar,size_t num,const EC_POINT * points[],const BIGNUM * scalars[],BN_CTX * ctx)1735 int ossl_ec_GFp_nistp384_points_mul(const EC_GROUP *group, EC_POINT *r,
1736 const BIGNUM *scalar, size_t num,
1737 const EC_POINT *points[],
1738 const BIGNUM *scalars[], BN_CTX *ctx)
1739 {
1740 int ret = 0;
1741 int j;
1742 int mixed = 0;
1743 BIGNUM *x, *y, *z, *tmp_scalar;
1744 felem_bytearray g_secret;
1745 felem_bytearray *secrets = NULL;
1746 felem (*pre_comp)[17][3] = NULL;
1747 felem *tmp_felems = NULL;
1748 unsigned int i;
1749 int num_bytes;
1750 int have_pre_comp = 0;
1751 size_t num_points = num;
1752 felem x_in, y_in, z_in, x_out, y_out, z_out;
1753 NISTP384_PRE_COMP *pre = NULL;
1754 felem(*g_pre_comp)[3] = NULL;
1755 EC_POINT *generator = NULL;
1756 const EC_POINT *p = NULL;
1757 const BIGNUM *p_scalar = NULL;
1758
1759 BN_CTX_start(ctx);
1760 x = BN_CTX_get(ctx);
1761 y = BN_CTX_get(ctx);
1762 z = BN_CTX_get(ctx);
1763 tmp_scalar = BN_CTX_get(ctx);
1764 if (tmp_scalar == NULL)
1765 goto err;
1766
1767 if (scalar != NULL) {
1768 pre = group->pre_comp.nistp384;
1769 if (pre)
1770 /* we have precomputation, try to use it */
1771 g_pre_comp = &pre->g_pre_comp[0];
1772 else
1773 /* try to use the standard precomputation */
1774 g_pre_comp = (felem(*)[3]) gmul;
1775 generator = EC_POINT_new(group);
1776 if (generator == NULL)
1777 goto err;
1778 /* get the generator from precomputation */
1779 if (!felem_to_BN(x, g_pre_comp[1][0]) ||
1780 !felem_to_BN(y, g_pre_comp[1][1]) ||
1781 !felem_to_BN(z, g_pre_comp[1][2])) {
1782 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1783 goto err;
1784 }
1785 if (!ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group,
1786 generator,
1787 x, y, z, ctx))
1788 goto err;
1789 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1790 /* precomputation matches generator */
1791 have_pre_comp = 1;
1792 else
1793 /*
1794 * we don't have valid precomputation: treat the generator as a
1795 * random point
1796 */
1797 num_points++;
1798 }
1799
1800 if (num_points > 0) {
1801 if (num_points >= 2) {
1802 /*
1803 * unless we precompute multiples for just one point, converting
1804 * those into affine form is time well spent
1805 */
1806 mixed = 1;
1807 }
1808 secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1809 pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1810 if (mixed)
1811 tmp_felems =
1812 OPENSSL_malloc(sizeof(*tmp_felems) * (num_points * 17 + 1));
1813 if ((secrets == NULL) || (pre_comp == NULL)
1814 || (mixed && (tmp_felems == NULL)))
1815 goto err;
1816
1817 /*
1818 * we treat NULL scalars as 0, and NULL points as points at infinity,
1819 * i.e., they contribute nothing to the linear combination
1820 */
1821 for (i = 0; i < num_points; ++i) {
1822 if (i == num) {
1823 /*
1824 * we didn't have a valid precomputation, so we pick the
1825 * generator
1826 */
1827 p = EC_GROUP_get0_generator(group);
1828 p_scalar = scalar;
1829 } else {
1830 /* the i^th point */
1831 p = points[i];
1832 p_scalar = scalars[i];
1833 }
1834 if (p_scalar != NULL && p != NULL) {
1835 /* reduce scalar to 0 <= scalar < 2^384 */
1836 if ((BN_num_bits(p_scalar) > 384)
1837 || (BN_is_negative(p_scalar))) {
1838 /*
1839 * this is an unusual input, and we don't guarantee
1840 * constant-timeness
1841 */
1842 if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1843 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1844 goto err;
1845 }
1846 num_bytes = BN_bn2lebinpad(tmp_scalar,
1847 secrets[i], sizeof(secrets[i]));
1848 } else {
1849 num_bytes = BN_bn2lebinpad(p_scalar,
1850 secrets[i], sizeof(secrets[i]));
1851 }
1852 if (num_bytes < 0) {
1853 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1854 goto err;
1855 }
1856 /* precompute multiples */
1857 if ((!BN_to_felem(x_out, p->X)) ||
1858 (!BN_to_felem(y_out, p->Y)) ||
1859 (!BN_to_felem(z_out, p->Z)))
1860 goto err;
1861 memcpy(pre_comp[i][1][0], x_out, sizeof(felem));
1862 memcpy(pre_comp[i][1][1], y_out, sizeof(felem));
1863 memcpy(pre_comp[i][1][2], z_out, sizeof(felem));
1864 for (j = 2; j <= 16; ++j) {
1865 if (j & 1) {
1866 point_add(pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1867 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2], 0,
1868 pre_comp[i][j - 1][0], pre_comp[i][j - 1][1], pre_comp[i][j - 1][2]);
1869 } else {
1870 point_double(pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1871 pre_comp[i][j / 2][0], pre_comp[i][j / 2][1], pre_comp[i][j / 2][2]);
1872 }
1873 }
1874 }
1875 }
1876 if (mixed)
1877 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1878 }
1879
1880 /* the scalar for the generator */
1881 if (scalar != NULL && have_pre_comp) {
1882 memset(g_secret, 0, sizeof(g_secret));
1883 /* reduce scalar to 0 <= scalar < 2^384 */
1884 if ((BN_num_bits(scalar) > 384) || (BN_is_negative(scalar))) {
1885 /*
1886 * this is an unusual input, and we don't guarantee
1887 * constant-timeness
1888 */
1889 if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1890 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1891 goto err;
1892 }
1893 num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1894 } else {
1895 num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1896 }
1897 /* do the multiplication with generator precomputation */
1898 batch_mul(x_out, y_out, z_out,
1899 (const felem_bytearray(*))secrets, num_points,
1900 g_secret,
1901 mixed, (const felem(*)[17][3])pre_comp,
1902 (const felem(*)[3])g_pre_comp);
1903 } else {
1904 /* do the multiplication without generator precomputation */
1905 batch_mul(x_out, y_out, z_out,
1906 (const felem_bytearray(*))secrets, num_points,
1907 NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1908 }
1909 /* reduce the output to its unique minimal representation */
1910 felem_contract(x_in, x_out);
1911 felem_contract(y_in, y_out);
1912 felem_contract(z_in, z_out);
1913 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1914 (!felem_to_BN(z, z_in))) {
1915 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1916 goto err;
1917 }
1918 ret = ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z,
1919 ctx);
1920
1921 err:
1922 BN_CTX_end(ctx);
1923 EC_POINT_free(generator);
1924 OPENSSL_free(secrets);
1925 OPENSSL_free(pre_comp);
1926 OPENSSL_free(tmp_felems);
1927 return ret;
1928 }
1929
ossl_ec_GFp_nistp384_precompute_mult(EC_GROUP * group,BN_CTX * ctx)1930 int ossl_ec_GFp_nistp384_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1931 {
1932 int ret = 0;
1933 NISTP384_PRE_COMP *pre = NULL;
1934 int i, j;
1935 BIGNUM *x, *y;
1936 EC_POINT *generator = NULL;
1937 felem tmp_felems[16];
1938 #ifndef FIPS_MODULE
1939 BN_CTX *new_ctx = NULL;
1940 #endif
1941
1942 /* throw away old precomputation */
1943 EC_pre_comp_free(group);
1944
1945 #ifndef FIPS_MODULE
1946 if (ctx == NULL)
1947 ctx = new_ctx = BN_CTX_new();
1948 #endif
1949 if (ctx == NULL)
1950 return 0;
1951
1952 BN_CTX_start(ctx);
1953 x = BN_CTX_get(ctx);
1954 y = BN_CTX_get(ctx);
1955 if (y == NULL)
1956 goto err;
1957 /* get the generator */
1958 if (group->generator == NULL)
1959 goto err;
1960 generator = EC_POINT_new(group);
1961 if (generator == NULL)
1962 goto err;
1963 BN_bin2bn(nistp384_curve_params[3], sizeof(felem_bytearray), x);
1964 BN_bin2bn(nistp384_curve_params[4], sizeof(felem_bytearray), y);
1965 if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1966 goto err;
1967 if ((pre = nistp384_pre_comp_new()) == NULL)
1968 goto err;
1969 /*
1970 * if the generator is the standard one, use built-in precomputation
1971 */
1972 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1973 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1974 goto done;
1975 }
1976 if ((!BN_to_felem(pre->g_pre_comp[1][0], group->generator->X)) ||
1977 (!BN_to_felem(pre->g_pre_comp[1][1], group->generator->Y)) ||
1978 (!BN_to_felem(pre->g_pre_comp[1][2], group->generator->Z)))
1979 goto err;
1980 /* compute 2^95*G, 2^190*G, 2^285*G */
1981 for (i = 1; i <= 4; i <<= 1) {
1982 point_double(pre->g_pre_comp[2 * i][0], pre->g_pre_comp[2 * i][1], pre->g_pre_comp[2 * i][2],
1983 pre->g_pre_comp[i][0], pre->g_pre_comp[i][1], pre->g_pre_comp[i][2]);
1984 for (j = 0; j < 94; ++j) {
1985 point_double(pre->g_pre_comp[2 * i][0], pre->g_pre_comp[2 * i][1], pre->g_pre_comp[2 * i][2],
1986 pre->g_pre_comp[2 * i][0], pre->g_pre_comp[2 * i][1], pre->g_pre_comp[2 * i][2]);
1987 }
1988 }
1989 /* g_pre_comp[0] is the point at infinity */
1990 memset(pre->g_pre_comp[0], 0, sizeof(pre->g_pre_comp[0]));
1991 /* the remaining multiples */
1992 /* 2^95*G + 2^190*G */
1993 point_add(pre->g_pre_comp[6][0], pre->g_pre_comp[6][1], pre->g_pre_comp[6][2],
1994 pre->g_pre_comp[4][0], pre->g_pre_comp[4][1], pre->g_pre_comp[4][2], 0,
1995 pre->g_pre_comp[2][0], pre->g_pre_comp[2][1], pre->g_pre_comp[2][2]);
1996 /* 2^95*G + 2^285*G */
1997 point_add(pre->g_pre_comp[10][0], pre->g_pre_comp[10][1], pre->g_pre_comp[10][2],
1998 pre->g_pre_comp[8][0], pre->g_pre_comp[8][1], pre->g_pre_comp[8][2], 0,
1999 pre->g_pre_comp[2][0], pre->g_pre_comp[2][1], pre->g_pre_comp[2][2]);
2000 /* 2^190*G + 2^285*G */
2001 point_add(pre->g_pre_comp[12][0], pre->g_pre_comp[12][1], pre->g_pre_comp[12][2],
2002 pre->g_pre_comp[8][0], pre->g_pre_comp[8][1], pre->g_pre_comp[8][2], 0,
2003 pre->g_pre_comp[4][0], pre->g_pre_comp[4][1], pre->g_pre_comp[4][2]);
2004 /* 2^95*G + 2^190*G + 2^285*G */
2005 point_add(pre->g_pre_comp[14][0], pre->g_pre_comp[14][1], pre->g_pre_comp[14][2],
2006 pre->g_pre_comp[12][0], pre->g_pre_comp[12][1], pre->g_pre_comp[12][2], 0,
2007 pre->g_pre_comp[2][0], pre->g_pre_comp[2][1], pre->g_pre_comp[2][2]);
2008 for (i = 1; i < 8; ++i) {
2009 /* odd multiples: add G */
2010 point_add(pre->g_pre_comp[2 * i + 1][0], pre->g_pre_comp[2 * i + 1][1], pre->g_pre_comp[2 * i + 1][2],
2011 pre->g_pre_comp[2 * i][0], pre->g_pre_comp[2 * i][1], pre->g_pre_comp[2 * i][2], 0,
2012 pre->g_pre_comp[1][0], pre->g_pre_comp[1][1], pre->g_pre_comp[1][2]);
2013 }
2014 make_points_affine(15, &(pre->g_pre_comp[1]), tmp_felems);
2015
2016 done:
2017 SETPRECOMP(group, nistp384, pre);
2018 ret = 1;
2019 pre = NULL;
2020 err:
2021 BN_CTX_end(ctx);
2022 EC_POINT_free(generator);
2023 #ifndef FIPS_MODULE
2024 BN_CTX_free(new_ctx);
2025 #endif
2026 ossl_ec_nistp384_pre_comp_free(pre);
2027 return ret;
2028 }
2029
ossl_ec_GFp_nistp384_have_precompute_mult(const EC_GROUP * group)2030 int ossl_ec_GFp_nistp384_have_precompute_mult(const EC_GROUP *group)
2031 {
2032 return HAVEPRECOMP(group, nistp384);
2033 }
2034