xref: /freebsd/contrib/bearssl/src/ec/ec_prime_i15.c (revision 86dc8398c9ca2283c5d6984992b7a585257b5adb)
1 /*
2  * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining
5  * a copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sublicense, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be
13  * included in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #include "inner.h"
26 
27 /*
28  * Parameters for supported curves:
29  *   - field modulus p
30  *   - R^2 mod p (R = 2^(15k) for the smallest k such that R >= p)
31  *   - b*R mod p (b is the second curve equation parameter)
32  */
33 
34 static const uint16_t P256_P[] = {
35 	0x0111,
36 	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x003F, 0x0000,
37 	0x0000, 0x0000, 0x0000, 0x0000, 0x1000, 0x0000, 0x4000, 0x7FFF,
38 	0x7FFF, 0x0001
39 };
40 
41 static const uint16_t P256_R2[] = {
42 	0x0111,
43 	0x0000, 0x6000, 0x0000, 0x0000, 0x0000, 0x0000, 0x7FFC, 0x7FFF,
44 	0x7FBF, 0x7FFF, 0x7FBF, 0x7FFF, 0x7FFF, 0x7FFF, 0x77FF, 0x7FFF,
45 	0x4FFF, 0x0000
46 };
47 
48 static const uint16_t P256_B[] = {
49 	0x0111,
50 	0x770C, 0x5EEF, 0x29C4, 0x3EC4, 0x6273, 0x0486, 0x4543, 0x3993,
51 	0x3C01, 0x6B56, 0x212E, 0x57EE, 0x4882, 0x204B, 0x7483, 0x3C16,
52 	0x0187, 0x0000
53 };
54 
55 static const uint16_t P384_P[] = {
56 	0x0199,
57 	0x7FFF, 0x7FFF, 0x0003, 0x0000, 0x0000, 0x0000, 0x7FC0, 0x7FFF,
58 	0x7EFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
59 	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
60 	0x7FFF, 0x01FF
61 };
62 
63 static const uint16_t P384_R2[] = {
64 	0x0199,
65 	0x1000, 0x0000, 0x0000, 0x7FFF, 0x7FFF, 0x0001, 0x0000, 0x0010,
66 	0x0000, 0x0000, 0x0000, 0x7F00, 0x7FFF, 0x01FF, 0x0000, 0x1000,
67 	0x0000, 0x2000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
68 	0x0000, 0x0000
69 };
70 
71 static const uint16_t P384_B[] = {
72 	0x0199,
73 	0x7333, 0x2096, 0x70D1, 0x2310, 0x3020, 0x6197, 0x1464, 0x35BB,
74 	0x70CA, 0x0117, 0x1920, 0x4136, 0x5FC8, 0x5713, 0x4938, 0x7DD2,
75 	0x4DD2, 0x4A71, 0x0220, 0x683E, 0x2C87, 0x4DB1, 0x7BFF, 0x6C09,
76 	0x0452, 0x0084
77 };
78 
79 static const uint16_t P521_P[] = {
80 	0x022B,
81 	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
82 	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
83 	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
84 	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
85 	0x7FFF, 0x7FFF, 0x07FF
86 };
87 
88 static const uint16_t P521_R2[] = {
89 	0x022B,
90 	0x0100, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
91 	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
92 	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
93 	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
94 	0x0000, 0x0000, 0x0000
95 };
96 
97 static const uint16_t P521_B[] = {
98 	0x022B,
99 	0x7002, 0x6A07, 0x751A, 0x228F, 0x71EF, 0x5869, 0x20F4, 0x1EFC,
100 	0x7357, 0x37E0, 0x4EEC, 0x605E, 0x1652, 0x26F6, 0x31FA, 0x4A8F,
101 	0x6193, 0x3C2A, 0x3C42, 0x48C7, 0x3489, 0x6771, 0x4C57, 0x5CCD,
102 	0x2725, 0x545B, 0x503B, 0x5B42, 0x21A0, 0x2534, 0x687E, 0x70E4,
103 	0x1618, 0x27D7, 0x0465
104 };
105 
106 typedef struct {
107 	const uint16_t *p;
108 	const uint16_t *b;
109 	const uint16_t *R2;
110 	uint16_t p0i;
111 	size_t point_len;
112 } curve_params;
113 
114 static inline const curve_params *
115 id_to_curve(int curve)
116 {
117 	static const curve_params pp[] = {
118 		{ P256_P, P256_B, P256_R2, 0x0001,  65 },
119 		{ P384_P, P384_B, P384_R2, 0x0001,  97 },
120 		{ P521_P, P521_B, P521_R2, 0x0001, 133 }
121 	};
122 
123 	return &pp[curve - BR_EC_secp256r1];
124 }
125 
126 #define I15_LEN   ((BR_MAX_EC_SIZE + 29) / 15)
127 
128 /*
129  * Type for a point in Jacobian coordinates:
130  * -- three values, x, y and z, in Montgomery representation
131  * -- affine coordinates are X = x / z^2 and Y = y / z^3
132  * -- for the point at infinity, z = 0
133  */
134 typedef struct {
135 	uint16_t c[3][I15_LEN];
136 } jacobian;
137 
138 /*
139  * We use a custom interpreter that uses a dozen registers, and
140  * only six operations:
141  *    MSET(d, a)       copy a into d
142  *    MADD(d, a)       d = d+a (modular)
143  *    MSUB(d, a)       d = d-a (modular)
144  *    MMUL(d, a, b)    d = a*b (Montgomery multiplication)
145  *    MINV(d, a, b)    invert d modulo p; a and b are used as scratch registers
146  *    MTZ(d)           clear return value if d = 0
147  * Destination of MMUL (d) must be distinct from operands (a and b).
148  * There is no such constraint for MSUB and MADD.
149  *
150  * Registers include the operand coordinates, and temporaries.
151  */
152 #define MSET(d, a)      (0x0000 + ((d) << 8) + ((a) << 4))
153 #define MADD(d, a)      (0x1000 + ((d) << 8) + ((a) << 4))
154 #define MSUB(d, a)      (0x2000 + ((d) << 8) + ((a) << 4))
155 #define MMUL(d, a, b)   (0x3000 + ((d) << 8) + ((a) << 4) + (b))
156 #define MINV(d, a, b)   (0x4000 + ((d) << 8) + ((a) << 4) + (b))
157 #define MTZ(d)          (0x5000 + ((d) << 8))
158 #define ENDCODE         0
159 
160 /*
161  * Registers for the input operands.
162  */
163 #define P1x    0
164 #define P1y    1
165 #define P1z    2
166 #define P2x    3
167 #define P2y    4
168 #define P2z    5
169 
170 /*
171  * Alternate names for the first input operand.
172  */
173 #define Px     0
174 #define Py     1
175 #define Pz     2
176 
177 /*
178  * Temporaries.
179  */
180 #define t1     6
181 #define t2     7
182 #define t3     8
183 #define t4     9
184 #define t5    10
185 #define t6    11
186 #define t7    12
187 
188 /*
189  * Extra scratch registers available when there is no second operand (e.g.
190  * for "double" and "affine").
191  */
192 #define t8     3
193 #define t9     4
194 #define t10    5
195 
196 /*
197  * Doubling formulas are:
198  *
199  *   s = 4*x*y^2
200  *   m = 3*(x + z^2)*(x - z^2)
201  *   x' = m^2 - 2*s
202  *   y' = m*(s - x') - 8*y^4
203  *   z' = 2*y*z
204  *
205  * If y = 0 (P has order 2) then this yields infinity (z' = 0), as it
206  * should. This case should not happen anyway, because our curves have
207  * prime order, and thus do not contain any point of order 2.
208  *
209  * If P is infinity (z = 0), then again the formulas yield infinity,
210  * which is correct. Thus, this code works for all points.
211  *
212  * Cost: 8 multiplications
213  */
214 static const uint16_t code_double[] = {
215 	/*
216 	 * Compute z^2 (in t1).
217 	 */
218 	MMUL(t1, Pz, Pz),
219 
220 	/*
221 	 * Compute x-z^2 (in t2) and then x+z^2 (in t1).
222 	 */
223 	MSET(t2, Px),
224 	MSUB(t2, t1),
225 	MADD(t1, Px),
226 
227 	/*
228 	 * Compute m = 3*(x+z^2)*(x-z^2) (in t1).
229 	 */
230 	MMUL(t3, t1, t2),
231 	MSET(t1, t3),
232 	MADD(t1, t3),
233 	MADD(t1, t3),
234 
235 	/*
236 	 * Compute s = 4*x*y^2 (in t2) and 2*y^2 (in t3).
237 	 */
238 	MMUL(t3, Py, Py),
239 	MADD(t3, t3),
240 	MMUL(t2, Px, t3),
241 	MADD(t2, t2),
242 
243 	/*
244 	 * Compute x' = m^2 - 2*s.
245 	 */
246 	MMUL(Px, t1, t1),
247 	MSUB(Px, t2),
248 	MSUB(Px, t2),
249 
250 	/*
251 	 * Compute z' = 2*y*z.
252 	 */
253 	MMUL(t4, Py, Pz),
254 	MSET(Pz, t4),
255 	MADD(Pz, t4),
256 
257 	/*
258 	 * Compute y' = m*(s - x') - 8*y^4. Note that we already have
259 	 * 2*y^2 in t3.
260 	 */
261 	MSUB(t2, Px),
262 	MMUL(Py, t1, t2),
263 	MMUL(t4, t3, t3),
264 	MSUB(Py, t4),
265 	MSUB(Py, t4),
266 
267 	ENDCODE
268 };
269 
270 /*
271  * Addtions formulas are:
272  *
273  *   u1 = x1 * z2^2
274  *   u2 = x2 * z1^2
275  *   s1 = y1 * z2^3
276  *   s2 = y2 * z1^3
277  *   h = u2 - u1
278  *   r = s2 - s1
279  *   x3 = r^2 - h^3 - 2 * u1 * h^2
280  *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
281  *   z3 = h * z1 * z2
282  *
283  * If both P1 and P2 are infinity, then z1 == 0 and z2 == 0, implying that
284  * z3 == 0, so the result is correct.
285  * If either of P1 or P2 is infinity, but not both, then z3 == 0, which is
286  * not correct.
287  * h == 0 only if u1 == u2; this happens in two cases:
288  * -- if s1 == s2 then P1 and/or P2 is infinity, or P1 == P2
289  * -- if s1 != s2 then P1 + P2 == infinity (but neither P1 or P2 is infinity)
290  *
291  * Thus, the following situations are not handled correctly:
292  * -- P1 = 0 and P2 != 0
293  * -- P1 != 0 and P2 = 0
294  * -- P1 = P2
295  * All other cases are properly computed. However, even in "incorrect"
296  * situations, the three coordinates still are properly formed field
297  * elements.
298  *
299  * The returned flag is cleared if r == 0. This happens in the following
300  * cases:
301  * -- Both points are on the same horizontal line (same Y coordinate).
302  * -- Both points are infinity.
303  * -- One point is infinity and the other is on line Y = 0.
304  * The third case cannot happen with our curves (there is no valid point
305  * on line Y = 0 since that would be a point of order 2). If the two
306  * source points are non-infinity, then remains only the case where the
307  * two points are on the same horizontal line.
308  *
309  * This allows us to detect the "P1 == P2" case, assuming that P1 != 0 and
310  * P2 != 0:
311  * -- If the returned value is not the point at infinity, then it was properly
312  * computed.
313  * -- Otherwise, if the returned flag is 1, then P1+P2 = 0, and the result
314  * is indeed the point at infinity.
315  * -- Otherwise (result is infinity, flag is 0), then P1 = P2 and we should
316  * use the 'double' code.
317  *
318  * Cost: 16 multiplications
319  */
320 static const uint16_t code_add[] = {
321 	/*
322 	 * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
323 	 */
324 	MMUL(t3, P2z, P2z),
325 	MMUL(t1, P1x, t3),
326 	MMUL(t4, P2z, t3),
327 	MMUL(t3, P1y, t4),
328 
329 	/*
330 	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
331 	 */
332 	MMUL(t4, P1z, P1z),
333 	MMUL(t2, P2x, t4),
334 	MMUL(t5, P1z, t4),
335 	MMUL(t4, P2y, t5),
336 
337 	/*
338 	 * Compute h = u2 - u1 (in t2) and r = s2 - s1 (in t4).
339 	 */
340 	MSUB(t2, t1),
341 	MSUB(t4, t3),
342 
343 	/*
344 	 * Report cases where r = 0 through the returned flag.
345 	 */
346 	MTZ(t4),
347 
348 	/*
349 	 * Compute u1*h^2 (in t6) and h^3 (in t5).
350 	 */
351 	MMUL(t7, t2, t2),
352 	MMUL(t6, t1, t7),
353 	MMUL(t5, t7, t2),
354 
355 	/*
356 	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
357 	 * t1 and t7 can be used as scratch registers.
358 	 */
359 	MMUL(P1x, t4, t4),
360 	MSUB(P1x, t5),
361 	MSUB(P1x, t6),
362 	MSUB(P1x, t6),
363 
364 	/*
365 	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
366 	 */
367 	MSUB(t6, P1x),
368 	MMUL(P1y, t4, t6),
369 	MMUL(t1, t5, t3),
370 	MSUB(P1y, t1),
371 
372 	/*
373 	 * Compute z3 = h*z1*z2.
374 	 */
375 	MMUL(t1, P1z, P2z),
376 	MMUL(P1z, t1, t2),
377 
378 	ENDCODE
379 };
380 
381 /*
382  * Check that the point is on the curve. This code snippet assumes the
383  * following conventions:
384  * -- Coordinates x and y have been freshly decoded in P1 (but not
385  * converted to Montgomery coordinates yet).
386  * -- P2x, P2y and P2z are set to, respectively, R^2, b*R and 1.
387  */
388 static const uint16_t code_check[] = {
389 
390 	/* Convert x and y to Montgomery representation. */
391 	MMUL(t1, P1x, P2x),
392 	MMUL(t2, P1y, P2x),
393 	MSET(P1x, t1),
394 	MSET(P1y, t2),
395 
396 	/* Compute x^3 in t1. */
397 	MMUL(t2, P1x, P1x),
398 	MMUL(t1, P1x, t2),
399 
400 	/* Subtract 3*x from t1. */
401 	MSUB(t1, P1x),
402 	MSUB(t1, P1x),
403 	MSUB(t1, P1x),
404 
405 	/* Add b. */
406 	MADD(t1, P2y),
407 
408 	/* Compute y^2 in t2. */
409 	MMUL(t2, P1y, P1y),
410 
411 	/* Compare y^2 with x^3 - 3*x + b; they must match. */
412 	MSUB(t1, t2),
413 	MTZ(t1),
414 
415 	/* Set z to 1 (in Montgomery representation). */
416 	MMUL(P1z, P2x, P2z),
417 
418 	ENDCODE
419 };
420 
421 /*
422  * Conversion back to affine coordinates. This code snippet assumes that
423  * the z coordinate of P2 is set to 1 (not in Montgomery representation).
424  */
425 static const uint16_t code_affine[] = {
426 
427 	/* Save z*R in t1. */
428 	MSET(t1, P1z),
429 
430 	/* Compute z^3 in t2. */
431 	MMUL(t2, P1z, P1z),
432 	MMUL(t3, P1z, t2),
433 	MMUL(t2, t3, P2z),
434 
435 	/* Invert to (1/z^3) in t2. */
436 	MINV(t2, t3, t4),
437 
438 	/* Compute y. */
439 	MSET(t3, P1y),
440 	MMUL(P1y, t2, t3),
441 
442 	/* Compute (1/z^2) in t3. */
443 	MMUL(t3, t2, t1),
444 
445 	/* Compute x. */
446 	MSET(t2, P1x),
447 	MMUL(P1x, t2, t3),
448 
449 	ENDCODE
450 };
451 
452 static uint32_t
453 run_code(jacobian *P1, const jacobian *P2,
454 	const curve_params *cc, const uint16_t *code)
455 {
456 	uint32_t r;
457 	uint16_t t[13][I15_LEN];
458 	size_t u;
459 
460 	r = 1;
461 
462 	/*
463 	 * Copy the two operands in the dedicated registers.
464 	 */
465 	memcpy(t[P1x], P1->c, 3 * I15_LEN * sizeof(uint16_t));
466 	memcpy(t[P2x], P2->c, 3 * I15_LEN * sizeof(uint16_t));
467 
468 	/*
469 	 * Run formulas.
470 	 */
471 	for (u = 0;; u ++) {
472 		unsigned op, d, a, b;
473 
474 		op = code[u];
475 		if (op == 0) {
476 			break;
477 		}
478 		d = (op >> 8) & 0x0F;
479 		a = (op >> 4) & 0x0F;
480 		b = op & 0x0F;
481 		op >>= 12;
482 		switch (op) {
483 			uint32_t ctl;
484 			size_t plen;
485 			unsigned char tp[(BR_MAX_EC_SIZE + 7) >> 3];
486 
487 		case 0:
488 			memcpy(t[d], t[a], I15_LEN * sizeof(uint16_t));
489 			break;
490 		case 1:
491 			ctl = br_i15_add(t[d], t[a], 1);
492 			ctl |= NOT(br_i15_sub(t[d], cc->p, 0));
493 			br_i15_sub(t[d], cc->p, ctl);
494 			break;
495 		case 2:
496 			br_i15_add(t[d], cc->p, br_i15_sub(t[d], t[a], 1));
497 			break;
498 		case 3:
499 			br_i15_montymul(t[d], t[a], t[b], cc->p, cc->p0i);
500 			break;
501 		case 4:
502 			plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3;
503 			br_i15_encode(tp, plen, cc->p);
504 			tp[plen - 1] -= 2;
505 			br_i15_modpow(t[d], tp, plen,
506 				cc->p, cc->p0i, t[a], t[b]);
507 			break;
508 		default:
509 			r &= ~br_i15_iszero(t[d]);
510 			break;
511 		}
512 	}
513 
514 	/*
515 	 * Copy back result.
516 	 */
517 	memcpy(P1->c, t[P1x], 3 * I15_LEN * sizeof(uint16_t));
518 	return r;
519 }
520 
521 static void
522 set_one(uint16_t *x, const uint16_t *p)
523 {
524 	size_t plen;
525 
526 	plen = (p[0] + 31) >> 4;
527 	memset(x, 0, plen * sizeof *x);
528 	x[0] = p[0];
529 	x[1] = 0x0001;
530 }
531 
532 static void
533 point_zero(jacobian *P, const curve_params *cc)
534 {
535 	memset(P, 0, sizeof *P);
536 	P->c[0][0] = P->c[1][0] = P->c[2][0] = cc->p[0];
537 }
538 
539 static inline void
540 point_double(jacobian *P, const curve_params *cc)
541 {
542 	run_code(P, P, cc, code_double);
543 }
544 
545 static inline uint32_t
546 point_add(jacobian *P1, const jacobian *P2, const curve_params *cc)
547 {
548 	return run_code(P1, P2, cc, code_add);
549 }
550 
551 static void
552 point_mul(jacobian *P, const unsigned char *x, size_t xlen,
553 	const curve_params *cc)
554 {
555 	/*
556 	 * We do a simple double-and-add ladder with a 2-bit window
557 	 * to make only one add every two doublings. We thus first
558 	 * precompute 2P and 3P in some local buffers.
559 	 *
560 	 * We always perform two doublings and one addition; the
561 	 * addition is with P, 2P and 3P and is done in a temporary
562 	 * array.
563 	 *
564 	 * The addition code cannot handle cases where one of the
565 	 * operands is infinity, which is the case at the start of the
566 	 * ladder. We therefore need to maintain a flag that controls
567 	 * this situation.
568 	 */
569 	uint32_t qz;
570 	jacobian P2, P3, Q, T, U;
571 
572 	memcpy(&P2, P, sizeof P2);
573 	point_double(&P2, cc);
574 	memcpy(&P3, P, sizeof P3);
575 	point_add(&P3, &P2, cc);
576 
577 	point_zero(&Q, cc);
578 	qz = 1;
579 	while (xlen -- > 0) {
580 		int k;
581 
582 		for (k = 6; k >= 0; k -= 2) {
583 			uint32_t bits;
584 			uint32_t bnz;
585 
586 			point_double(&Q, cc);
587 			point_double(&Q, cc);
588 			memcpy(&T, P, sizeof T);
589 			memcpy(&U, &Q, sizeof U);
590 			bits = (*x >> k) & (uint32_t)3;
591 			bnz = NEQ(bits, 0);
592 			CCOPY(EQ(bits, 2), &T, &P2, sizeof T);
593 			CCOPY(EQ(bits, 3), &T, &P3, sizeof T);
594 			point_add(&U, &T, cc);
595 			CCOPY(bnz & qz, &Q, &T, sizeof Q);
596 			CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
597 			qz &= ~bnz;
598 		}
599 		x ++;
600 	}
601 	memcpy(P, &Q, sizeof Q);
602 }
603 
604 /*
605  * Decode point into Jacobian coordinates. This function does not support
606  * the point at infinity. If the point is invalid then this returns 0, but
607  * the coordinates are still set to properly formed field elements.
608  */
609 static uint32_t
610 point_decode(jacobian *P, const void *src, size_t len, const curve_params *cc)
611 {
612 	/*
613 	 * Points must use uncompressed format:
614 	 * -- first byte is 0x04;
615 	 * -- coordinates X and Y use unsigned big-endian, with the same
616 	 *    length as the field modulus.
617 	 *
618 	 * We don't support hybrid format (uncompressed, but first byte
619 	 * has value 0x06 or 0x07, depending on the least significant bit
620 	 * of Y) because it is rather useless, and explicitly forbidden
621 	 * by PKIX (RFC 5480, section 2.2).
622 	 *
623 	 * We don't support compressed format either, because it is not
624 	 * much used in practice (there are or were patent-related
625 	 * concerns about point compression, which explains the lack of
626 	 * generalised support). Also, point compression support would
627 	 * need a bit more code.
628 	 */
629 	const unsigned char *buf;
630 	size_t plen, zlen;
631 	uint32_t r;
632 	jacobian Q;
633 
634 	buf = src;
635 	point_zero(P, cc);
636 	plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3;
637 	if (len != 1 + (plen << 1)) {
638 		return 0;
639 	}
640 	r = br_i15_decode_mod(P->c[0], buf + 1, plen, cc->p);
641 	r &= br_i15_decode_mod(P->c[1], buf + 1 + plen, plen, cc->p);
642 
643 	/*
644 	 * Check first byte.
645 	 */
646 	r &= EQ(buf[0], 0x04);
647 	/* obsolete
648 	r &= EQ(buf[0], 0x04) | (EQ(buf[0] & 0xFE, 0x06)
649 		& ~(uint32_t)(buf[0] ^ buf[plen << 1]));
650 	*/
651 
652 	/*
653 	 * Convert coordinates and check that the point is valid.
654 	 */
655 	zlen = ((cc->p[0] + 31) >> 4) * sizeof(uint16_t);
656 	memcpy(Q.c[0], cc->R2, zlen);
657 	memcpy(Q.c[1], cc->b, zlen);
658 	set_one(Q.c[2], cc->p);
659 	r &= ~run_code(P, &Q, cc, code_check);
660 	return r;
661 }
662 
663 /*
664  * Encode a point. This method assumes that the point is correct and is
665  * not the point at infinity. Encoded size is always 1+2*plen, where
666  * plen is the field modulus length, in bytes.
667  */
668 static void
669 point_encode(void *dst, const jacobian *P, const curve_params *cc)
670 {
671 	unsigned char *buf;
672 	size_t plen;
673 	jacobian Q, T;
674 
675 	buf = dst;
676 	plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3;
677 	buf[0] = 0x04;
678 	memcpy(&Q, P, sizeof *P);
679 	set_one(T.c[2], cc->p);
680 	run_code(&Q, &T, cc, code_affine);
681 	br_i15_encode(buf + 1, plen, Q.c[0]);
682 	br_i15_encode(buf + 1 + plen, plen, Q.c[1]);
683 }
684 
685 static const br_ec_curve_def *
686 id_to_curve_def(int curve)
687 {
688 	switch (curve) {
689 	case BR_EC_secp256r1:
690 		return &br_secp256r1;
691 	case BR_EC_secp384r1:
692 		return &br_secp384r1;
693 	case BR_EC_secp521r1:
694 		return &br_secp521r1;
695 	}
696 	return NULL;
697 }
698 
699 static const unsigned char *
700 api_generator(int curve, size_t *len)
701 {
702 	const br_ec_curve_def *cd;
703 
704 	cd = id_to_curve_def(curve);
705 	*len = cd->generator_len;
706 	return cd->generator;
707 }
708 
709 static const unsigned char *
710 api_order(int curve, size_t *len)
711 {
712 	const br_ec_curve_def *cd;
713 
714 	cd = id_to_curve_def(curve);
715 	*len = cd->order_len;
716 	return cd->order;
717 }
718 
719 static size_t
720 api_xoff(int curve, size_t *len)
721 {
722 	api_generator(curve, len);
723 	*len >>= 1;
724 	return 1;
725 }
726 
727 static uint32_t
728 api_mul(unsigned char *G, size_t Glen,
729 	const unsigned char *x, size_t xlen, int curve)
730 {
731 	uint32_t r;
732 	const curve_params *cc;
733 	jacobian P;
734 
735 	cc = id_to_curve(curve);
736 	if (Glen != cc->point_len) {
737 		return 0;
738 	}
739 	r = point_decode(&P, G, Glen, cc);
740 	point_mul(&P, x, xlen, cc);
741 	point_encode(G, &P, cc);
742 	return r;
743 }
744 
745 static size_t
746 api_mulgen(unsigned char *R,
747 	const unsigned char *x, size_t xlen, int curve)
748 {
749 	const unsigned char *G;
750 	size_t Glen;
751 
752 	G = api_generator(curve, &Glen);
753 	memcpy(R, G, Glen);
754 	api_mul(R, Glen, x, xlen, curve);
755 	return Glen;
756 }
757 
758 static uint32_t
759 api_muladd(unsigned char *A, const unsigned char *B, size_t len,
760 	const unsigned char *x, size_t xlen,
761 	const unsigned char *y, size_t ylen, int curve)
762 {
763 	uint32_t r, t, z;
764 	const curve_params *cc;
765 	jacobian P, Q;
766 
767 	/*
768 	 * TODO: see about merging the two ladders. Right now, we do
769 	 * two independent point multiplications, which is a bit
770 	 * wasteful of CPU resources (but yields short code).
771 	 */
772 
773 	cc = id_to_curve(curve);
774 	if (len != cc->point_len) {
775 		return 0;
776 	}
777 	r = point_decode(&P, A, len, cc);
778 	if (B == NULL) {
779 		size_t Glen;
780 
781 		B = api_generator(curve, &Glen);
782 	}
783 	r &= point_decode(&Q, B, len, cc);
784 	point_mul(&P, x, xlen, cc);
785 	point_mul(&Q, y, ylen, cc);
786 
787 	/*
788 	 * We want to compute P+Q. Since the base points A and B are distinct
789 	 * from infinity, and the multipliers are non-zero and lower than the
790 	 * curve order, then we know that P and Q are non-infinity. This
791 	 * leaves two special situations to test for:
792 	 * -- If P = Q then we must use point_double().
793 	 * -- If P+Q = 0 then we must report an error.
794 	 */
795 	t = point_add(&P, &Q, cc);
796 	point_double(&Q, cc);
797 	z = br_i15_iszero(P.c[2]);
798 
799 	/*
800 	 * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
801 	 * have the following:
802 	 *
803 	 *   z = 0, t = 0   return P (normal addition)
804 	 *   z = 0, t = 1   return P (normal addition)
805 	 *   z = 1, t = 0   return Q (a 'double' case)
806 	 *   z = 1, t = 1   report an error (P+Q = 0)
807 	 */
808 	CCOPY(z & ~t, &P, &Q, sizeof Q);
809 	point_encode(A, &P, cc);
810 	r &= ~(z & t);
811 
812 	return r;
813 }
814 
815 /* see bearssl_ec.h */
816 const br_ec_impl br_ec_prime_i15 = {
817 	(uint32_t)0x03800000,
818 	&api_generator,
819 	&api_order,
820 	&api_xoff,
821 	&api_mul,
822 	&api_mulgen,
823 	&api_muladd
824 };
825